<a href="https://colab.research.google.com/github/tushant-akar/CS367-Artifical-Intelligence-Lab/blob/main/MENACE_(Lab_Assignment_7).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import random
import math
from tqdm import tqdm

class MENACE:
    def __init__(self, learning_rate=0.1, discount_factor=0.9, exploration_rate=0.1, exploration_rate_decay=0.9995):
        self.matchboxes = {}
        self.moves_made = []
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        self.exploration_rate_decay = exploration_rate_decay

    def get_move(self, board):
        if board not in self.matchboxes:
            self.matchboxes[board] = {i: 0 for i in range(9) if board[i] == '-'}

        moves = self.matchboxes[board]
        if random.random() < self.exploration_rate:
            move = random.choice(list(moves.keys()))
        else:
            move = max(moves, key=moves.get)

        self.moves_made.append((board, move))
        return move

    def learn(self, final_reward):
        for i in range(len(self.moves_made) - 1, -1, -1):
            board, move = self.moves_made[i]
            next_board = self.moves_made[i+1][0] if i+1 < len(self.moves_made) else None

            if next_board:
                next_max_q = max(self.matchboxes[next_board].values()) if self.matchboxes[next_board] else 0
                reward = self.calculate_reward(board, move)
                new_q = (1 - self.learning_rate) * self.matchboxes[board][move] + \
                        self.learning_rate * (reward + self.discount_factor * next_max_q)
            else:
                new_q = final_reward

            self.matchboxes[board][move] = new_q

        self.moves_made = []
        self.exploration_rate *= self.exploration_rate_decay  # Decay the exploration rate

    def calculate_reward(self, board, move):
        new_board = board[:move] + 'X' + board[move+1:]
        if check_win(new_board):
            return 1  # Immediate win
        elif check_win(new_board.replace('X', 'O').replace('-', 'X')):
            return 0.5  # Blocked opponent's win
        else:
            return 0.1  # Neither win nor block

def check_win(board):
    winning_combinations = [
        (0, 1, 2), (3, 4, 5), (6, 7, 8),  # Rows
        (0, 3, 6), (1, 4, 7), (2, 5, 8),  # Columns
        (0, 4, 8), (2, 4, 6)  # Diagonals
    ]
    for a, b, c in winning_combinations:
        if board[a] == board[b] == board[c] != '-':
            return True
    return False

class MinimaxPlayer:
    def __init__(self, max_depth=5):
        self.max_depth = max_depth

    def get_move(self, board):
        best_move = None
        best_score = -math.inf

        for move in range(9):
            if board[move] == '-':
                new_board = board[:move] + 'O' + board[move+1:]
                score = self.minimax(new_board, 0, False)
                if score > best_score:
                    best_score = score
                    best_move = move

        return best_move

    def minimax(self, board, depth, is_maximizing):
        if check_win(board):
            return -1 if is_maximizing else 1
        elif '-' not in board:
            return 0

        if is_maximizing:
            best_score = -math.inf
            for move in range(9):
                if board[move] == '-':
                    new_board = board[:move] + 'O' + board[move+1:]
                    score = self.minimax(new_board, depth + 1, False)
                    best_score = max(best_score, score)
            return best_score
        else:
            best_score = math.inf
            for move in range(9):
                if board[move] == '-':
                    new_board = board[:move] + 'X' + board[move+1:]
                    score = self.minimax(new_board, depth + 1, True)
                    best_score = min(best_score, score)
            return best_score

def play_game(menace, opponent):
    board = '-' * 9
    current_player = menace

    while True:
        move = current_player.get_move(board)
        board = board[:move] + ('X' if current_player == menace else 'O') + board[move+1:]

        if check_win(board):
            return 1 if current_player == menace else -1
        if '-' not in board:
            return 0

        current_player = opponent if current_player == menace else menace

def train_menace(menace, opponent, episodes):
    for episode in tqdm(range(episodes), desc="Training MENACE"):
        result = play_game(menace, opponent)
        menace.learn(result)

# Training
menace = MENACE()
opponent = MinimaxPlayer(max_depth=5)

print("Training MENACE...")
train_menace(menace, opponent, 20000)

print("\nTraining completed!")

# Test game
board = '-' * 9
current_player = menace

print("\nLet's play a game against MENACE!")
while True:
    print(f"\nCurrent board:\n{board[:3]}\n{board[3:6]}\n{board[6:]}")

    if current_player == menace:
        move = menace.get_move(board)
        print(f"MENACE plays: {move}")
    else:
        move = int(input("Your move (0-8): "))
        while board[move] != '-':
            move = int(input("Invalid move. Try again (0-8): "))

    board = board[:move] + ('X' if current_player == menace else 'O') + board[move+1:]

    if check_win(board):
        print(f"\nFinal board:\n{board[:3]}\n{board[3:6]}\n{board[6:]}")
        print("MENACE wins!" if current_player == menace else "You win!")
        break
    if '-' not in board:
        print(f"\nFinal board:\n{board[:3]}\n{board[3:6]}\n{board[6:]}")
        print("It's a draw!")
        break

    current_player = opponent if current_player == menace else menace

# Final learning step
if current_player == menace:
    menace.learn(-1)  # MENACE lost
else:
    menace.learn(1)  # MENACE won

Training MENACE...

Current board:
O--
-X-
---
Exploiting: Chose move 1 with Q-value 1.35

Current board:
OX-
-X-
---

Current board:
OX-
-X-
-O-
Exploiting: Chose move 3 with Q-value 0.95

Current board:
OX-
XX-
-O-

Current board:
OX-
XXO
-O-
Exploring: Chose move 2

Current board:
OXX
XXO
-O-

Current board:
OXX
XXO
OO-
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 2


Training MENACE:   0%|          | 1/20000 [00:00<1:22:43,  4.03it/s]

Exploiting: Chose move 0 with Q-value 1.24
Exploiting: Chose move 7 with Q-value 0.90
Exploiting: Chose move 5 with Q-value 0.49
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.49
Updated Q-value for move 7 on board XOX-O----: 0.91
Updated Q-value for move 0 on board --X-O----: 1.24
Updated Q-value for move 2 on board ---------: 1.53
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 2/20000 [00:00<1:28:45,  3.76it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 3/20000 [00:00<1:34:10,  3.54it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 4/20000 [00:01<1:36:19,  3.46it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 5/20000 [00:01<1:39:48,  3.34it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 7/20000 [00:02<1:35:19,  3.50it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploring: Chose move 6
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Ex

Training MENACE:   0%|          | 8/20000 [00:02<1:37:04,  3.43it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 9/20000 [00:02<1:33:19,  3.57it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 10/20000 [00:02<1:34:30,  3.53it/s]

Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 11/20000 [00:03<1:56:35,  2.86it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 12/20000 [00:03<2:15:53,  2.45it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 13/20000 [00:04<2:27:24,  2.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 14/20000 [00:04<2:32:26,  2.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 15/20000 [00:05<2:36:29,  2.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 16/20000 [00:05<2:35:26,  2.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 5


Training MENACE:   0%|          | 17/20000 [00:06<2:17:44,  2.42it/s]

Exploiting: Chose move 0 with Q-value 1.12
Exploiting: Chose move 1 with Q-value 0.84
Exploiting: Chose move 6 with Q-value 0.48
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 6 on board XXOOOX---: 0.48
Updated Q-value for move 1 on board X-OO-X---: 0.85
Updated Q-value for move 0 on board --O--X---: 1.14
Updated Q-value for move 5 on board ---------: 1.36
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 18/20000 [00:06<2:06:51,  2.63it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 6


Training MENACE:   0%|          | 19/20000 [00:06<1:57:48,  2.83it/s]

Exploiting: Chose move 0 with Q-value 1.23
Exploiting: Chose move 5 with Q-value 0.90
Exploiting: Chose move 7 with Q-value 0.49
Exploiting: Chose move 2 with Q-value 0.00
Updated Q-value for move 2 on board XO-OOXXXO: 0.00
Updated Q-value for move 7 on board XO-OOXX--: 0.49
Updated Q-value for move 5 on board X--OO-X--: 0.90
Updated Q-value for move 0 on board ----O-X--: 1.24
Updated Q-value for move 6 on board ---------: 1.50
Exploring: Chose move 7


Training MENACE:   0%|          | 20/20000 [00:07<1:53:48,  2.93it/s]

Exploiting: Chose move 0 with Q-value 1.19
Exploiting: Chose move 2 with Q-value 0.88
Exploring: Chose move 3
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.48
Updated Q-value for move 2 on board XO----OX-: 0.88
Updated Q-value for move 0 on board -O-----X-: 1.20
Updated Q-value for move 7 on board ---------: 1.44
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 21/20000 [00:07<1:47:16,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 22/20000 [00:07<1:43:15,  3.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:   0%|          | 23/20000 [00:07<1:43:54,  3.20it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.71
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 24/20000 [00:08<1:37:40,  3.41it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploring: Chose move 8
Updated Q-value for move 8 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 25/20000 [00:08<1:40:14,  3.32it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 26/20000 [00:08<1:41:51,  3.27it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.94
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.94
Updated Q-value for move 7 on board O---X----: 1.33
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 27/20000 [00:09<1:42:06,  3.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 7


Training MENACE:   0%|          | 28/20000 [00:09<1:40:29,  3.31it/s]

Exploring: Chose move 0
Exploiting: Chose move 2 with Q-value 0.88
Exploiting: Chose move 3 with Q-value 0.48
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.49
Updated Q-value for move 2 on board XO----OX-: 0.89
Updated Q-value for move 0 on board -O-----X-: 1.21
Updated Q-value for move 7 on board ---------: 1.45
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 29/20000 [00:09<1:37:53,  3.40it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 30/20000 [00:10<1:38:38,  3.37it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 1
Exploiting: Chose move 2 with Q-value 0.34
Exploiting: Chose move 8 with Q-value -0.23
Exploiting: Chose move 4 with Q-value -1.00
Updated Q-value for move 4 on board OXXO-O--X: -1.00
Updated Q-value for move 8 on board OXXO-----: -0.24
Updated Q-value for move 2 on board OX-------: 0.33
Updated Q-value for move 1 on board ---------: 0.82
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 31/20000 [00:10<1:42:53,  3.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 32/20000 [00:10<1:39:49,  3.33it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 33/20000 [00:10<1:42:28,  3.25it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 34/20000 [00:11<1:42:14,  3.25it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 35/20000 [00:11<1:42:23,  3.25it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 36/20000 [00:11<1:41:38,  3.27it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 37/20000 [00:12<1:41:35,  3.27it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 38/20000 [00:12<1:38:17,  3.38it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 39/20000 [00:12<1:39:06,  3.36it/s]

Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 40/20000 [00:13<1:39:28,  3.34it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 41/20000 [00:13<1:37:37,  3.41it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 42/20000 [00:13<1:37:58,  3.40it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 43/20000 [00:13<1:36:03,  3.46it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 44/20000 [00:14<1:43:11,  3.22it/s]

Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 45/20000 [00:14<1:39:10,  3.35it/s]

Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 46/20000 [00:14<1:39:14,  3.35it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 47/20000 [00:15<1:39:50,  3.33it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 48/20000 [00:15<1:37:02,  3.43it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 49/20000 [00:15<1:39:18,  3.35it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 50/20000 [00:16<2:02:17,  2.72it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 51/20000 [00:16<2:17:03,  2.43it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 5


Training MENACE:   0%|          | 52/20000 [00:17<2:28:23,  2.24it/s]

Exploiting: Chose move 0 with Q-value 1.14
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Exploiting: Chose move 1 with Q-value 0.00
Updated Q-value for move 1 on board X-OOOXXOX: 0.00
Updated Q-value for move 6 on board X-OOOX--X: 0.05
Updated Q-value for move 8 on board X-OO-X---: 0.10
Updated Q-value for move 0 on board --O--X---: 1.15
Updated Q-value for move 5 on board ---------: 1.38
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 53/20000 [00:17<2:30:25,  2.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploring: Chose move 8
Updated Q-value for move 8 on board OX-OXX-O-: -1.00
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 54/20000 [00:18<2:38:12,  2.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 8


Training MENACE:   0%|          | 55/20000 [00:18<2:26:02,  2.28it/s]

Exploiting: Chose move 0 with Q-value 1.20
Exploiting: Chose move 7 with Q-value 0.88
Exploiting: Chose move 2 with Q-value 0.49
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OOOXX: 0.00
Updated Q-value for move 2 on board XO--O-OXX: 0.49
Updated Q-value for move 7 on board XO--O---X: 0.89
Updated Q-value for move 0 on board ----O---X: 1.21
Updated Q-value for move 8 on board ---------: 1.47
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 56/20000 [00:18<2:11:17,  2.53it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 57/20000 [00:19<1:58:30,  2.80it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 58/20000 [00:19<1:55:40,  2.87it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 6


Training MENACE:   0%|          | 60/20000 [00:20<1:48:46,  3.06it/s]

Exploiting: Chose move 0 with Q-value 1.24
Exploring: Chose move 1
Updated Q-value for move 1 on board X--OO-X--: -1.00
Updated Q-value for move 0 on board ----O-X--: 1.24
Updated Q-value for move 6 on board ---------: 1.52
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 61/20000 [00:20<1:40:51,  3.29it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 62/20000 [00:20<1:41:25,  3.28it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 63/20000 [00:21<1:45:42,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 64/20000 [00:21<1:44:35,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 65/20000 [00:21<1:41:24,  3.28it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 66/20000 [00:21<1:40:09,  3.32it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 67/20000 [00:22<1:40:30,  3.31it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 68/20000 [00:22<1:41:20,  3.28it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 69/20000 [00:22<1:41:54,  3.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 70/20000 [00:23<1:39:35,  3.34it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 71/20000 [00:23<1:38:21,  3.38it/s]

Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.94
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.33
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 5


Training MENACE:   0%|          | 72/20000 [00:23<1:38:38,  3.37it/s]

Exploiting: Chose move 0 with Q-value 1.15
Exploiting: Chose move 1 with Q-value 0.85
Exploiting: Chose move 6 with Q-value 0.48
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 6 on board XXOOOX---: 0.48
Updated Q-value for move 1 on board X-OO-X---: 0.86
Updated Q-value for move 0 on board --O--X---: 1.16
Updated Q-value for move 5 on board ---------: 1.39
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 73/20000 [00:23<1:34:14,  3.52it/s]

Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.93
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 1 on board O-OOXXX--: 0.50
Updated Q-value for move 6 on board O--OXX---: 0.93
Updated Q-value for move 5 on board O---X----: 1.30
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72

Training MENACE:   0%|          | 74/20000 [00:24<1:36:55,  3.43it/s]


Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 76/20000 [00:24<1:37:50,  3.39it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 77/20000 [00:25<1:37:04,  3.42it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 78/20000 [00:25<1:34:00,  3.53it/s]

Exploring: Chose move 1
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 79/20000 [00:25<1:35:57,  3.46it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 80/20000 [00:26<1:35:33,  3.47it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 81/20000 [00:26<1:42:34,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 82/20000 [00:26<1:40:14,  3.31it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35

Training MENACE:   0%|          | 83/20000 [00:27<1:43:36,  3.20it/s]


Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 84/20000 [00:27<1:38:47,  3.36it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 85/20000 [00:27<1:37:11,  3.42it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 86/20000 [00:27<1:40:31,  3.30it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:   0%|          | 87/20000 [00:28<1:37:16,  3.41it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.71
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 88/20000 [00:28<1:37:40,  3.40it/s]

Exploring: Chose move 6
Exploiting: Chose move 1 with Q-value 0.94
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 1 on board O-O-X-X--: 0.94
Updated Q-value for move 6 on board O---X----: 1.33
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 89/20000 [00:28<2:00:56,  2.74it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 90/20000 [00:29<2:15:20,  2.45it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 91/20000 [00:29<2:24:46,  2.29it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 92/20000 [00:30<2:25:00,  2.29it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 93/20000 [00:30<2:33:30,  2.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 94/20000 [00:31<2:33:49,  2.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 95/20000 [00:31<2:12:46,  2.50it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 96/20000 [00:31<2:05:10,  2.65it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 97/20000 [00:32<1:52:00,  2.96it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 98/20000 [00:32<1:48:53,  3.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 99/20000 [00:32<1:48:20,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   0%|          | 100/20000 [00:33<1:42:48,  3.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:   1%|          | 101/20000 [00:33<1:45:56,  3.13it/s]

Exploiting: Chose move 6 with Q-value 0.22
Exploiting: Chose move 2 with Q-value -0.37
Exploiting: Chose move 5 with Q-value -1.00
Updated Q-value for move 5 on board OOXXO-X--: -1.00
Updated Q-value for move 2 on board OO-X--X--: -0.37
Updated Q-value for move 6 on board O--X-----: 0.21
Updated Q-value for move 3 on board ---------: 0.73
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 102/20000 [00:33<1:45:53,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 103/20000 [00:34<1:43:05,  3.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 104/20000 [00:34<1:37:54,  3.39it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 105/20000 [00:34<1:34:04,  3.52it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 106/20000 [00:34<1:39:31,  3.33it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 108/20000 [00:35<1:36:21,  3.44it/s]

Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.92
Exploiting: Chose move 1 with Q-value 0.49
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 1 on board O-XXXOO--: 0.50
Updated Q-value for move 3 on board O-X-X-O--: 0.92
Updated Q-value for move 2 on board O---X----: 1.30
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 109/20000 [00:35<1:38:49,  3.35it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 110/20000 [00:36<1:38:11,  3.38it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 111/20000 [00:36<1:35:52,  3.46it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 8


Training MENACE:   1%|          | 112/20000 [00:36<1:34:40,  3.50it/s]

Exploiting: Chose move 0 with Q-value 1.21
Exploiting: Chose move 7 with Q-value 0.89
Exploiting: Chose move 2 with Q-value 0.49
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OOOXX: 0.00
Updated Q-value for move 2 on board XO--O-OXX: 0.49
Updated Q-value for move 7 on board XO--O---X: 0.89
Updated Q-value for move 0 on board ----O---X: 1.22
Updated Q-value for move 8 on board ---------: 1.48
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 114/20000 [00:37<1:39:25,  3.33it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:   1%|          | 115/20000 [00:37<1:36:37,  3.43it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.71
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 116/20000 [00:37<1:35:53,  3.46it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 118/20000 [00:38<1:32:35,  3.58it/s]

Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 119/20000 [00:38<1:35:00,  3.49it/s]

Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploring: Chose move 8
Updated Q-value for move 8 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 5


Training MENACE:   1%|          | 120/20000 [00:39<1:40:43,  3.29it/s]

Exploiting: Chose move 0 with Q-value 1.16
Exploiting: Chose move 1 with Q-value 0.86
Exploring: Chose move 8
Updated Q-value for move 8 on board XXOOOX---: -1.00
Updated Q-value for move 1 on board X-OO-X---: 0.87
Updated Q-value for move 0 on board --O--X---: 1.17
Updated Q-value for move 5 on board ---------: 1.41
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 121/20000 [00:39<1:39:10,  3.34it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 122/20000 [00:39<1:37:39,  3.39it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 123/20000 [00:39<1:35:13,  3.48it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 124/20000 [00:40<1:35:26,  3.47it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 126/20000 [00:40<1:40:25,  3.30it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 1
Exploiting: Chose move 2 with Q-value 0.33
Exploiting: Chose move 6 with Q-value -0.24
Exploiting: Chose move 5 with Q-value -1.00
Updated Q-value for move 5 on board OXXOO-X--: -1.00
Updated Q-value for move 6 on board OXXO-----: -0.26
Updated Q-value for move 2 on board OX-------: 0.33
Updated Q-value for move 1 on board ---------: 0.82
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q

Training MENACE:   1%|          | 127/20000 [00:41<1:43:22,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 7


Training MENACE:   1%|          | 128/20000 [00:41<1:49:05,  3.04it/s]

Exploiting: Chose move 0 with Q-value 1.21
Exploiting: Chose move 2 with Q-value 0.89
Exploiting: Chose move 3 with Q-value 0.49
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.49
Updated Q-value for move 2 on board XO----OX-: 0.89
Updated Q-value for move 0 on board -O-----X-: 1.22
Updated Q-value for move 7 on board ---------: 1.46
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 129/20000 [00:42<2:03:36,  2.68it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 130/20000 [00:42<2:18:46,  2.39it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 131/20000 [00:43<2:28:25,  2.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 132/20000 [00:43<2:34:17,  2.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 133/20000 [00:44<2:38:30,  2.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 134/20000 [00:44<2:29:07,  2.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 135/20000 [00:44<2:20:13,  2.36it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 136/20000 [00:45<2:06:13,  2.62it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 137/20000 [00:45<1:58:07,  2.80it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 138/20000 [00:45<1:49:29,  3.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 139/20000 [00:45<1:45:10,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 141/20000 [00:46<1:38:26,  3.36it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Explori

Training MENACE:   1%|          | 142/20000 [00:46<1:39:13,  3.34it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 143/20000 [00:47<1:40:26,  3.30it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 145/20000 [00:47<1:34:37,  3.50it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 146/20000 [00:47<1:33:41,  3.53it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 147/20000 [00:48<1:32:45,  3.57it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 148/20000 [00:48<1:36:50,  3.42it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 149/20000 [00:48<1:37:40,  3.39it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 150/20000 [00:49<1:31:40,  3.61it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 151/20000 [00:49<1:33:27,  3.54it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 152/20000 [00:49<1:39:48,  3.31it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 153/20000 [00:50<1:41:06,  3.27it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 154/20000 [00:50<1:44:31,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 155/20000 [00:50<1:40:44,  3.28it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 156/20000 [00:50<1:41:07,  3.27it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 157/20000 [00:51<1:43:24,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 158/20000 [00:51<1:43:40,  3.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 159/20000 [00:51<1:40:20,  3.30it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 160/20000 [00:52<1:40:39,  3.29it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 161/20000 [00:52<1:42:09,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 162/20000 [00:52<1:41:40,  3.25it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 8


Training MENACE:   1%|          | 163/20000 [00:53<1:42:50,  3.21it/s]

Exploiting: Chose move 0 with Q-value 1.22
Exploiting: Chose move 7 with Q-value 0.89
Exploiting: Chose move 2 with Q-value 0.49
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OOOXX: 0.00
Updated Q-value for move 2 on board XO--O-OXX: 0.49
Updated Q-value for move 7 on board XO--O---X: 0.90
Updated Q-value for move 0 on board ----O---X: 1.23
Updated Q-value for move 8 on board ---------: 1.50
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 164/20000 [00:53<1:41:30,  3.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 165/20000 [00:53<1:41:07,  3.27it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 166/20000 [00:54<1:37:30,  3.39it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 167/20000 [00:54<1:35:34,  3.46it/s]

Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 168/20000 [00:54<1:59:10,  2.77it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 169/20000 [00:55<2:19:27,  2.37it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 5


Training MENACE:   1%|          | 170/20000 [00:55<2:29:12,  2.22it/s]

Exploiting: Chose move 0 with Q-value 1.17
Exploiting: Chose move 1 with Q-value 0.87
Exploiting: Chose move 6 with Q-value 0.48
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 6 on board XXOOOX---: 0.48
Updated Q-value for move 1 on board X-OO-X---: 0.87
Updated Q-value for move 0 on board --O--X---: 1.19
Updated Q-value for move 5 on board ---------: 1.43
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 171/20000 [00:56<2:33:02,  2.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 172/20000 [00:56<2:39:45,  2.07it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 173/20000 [00:57<2:28:44,  2.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 174/20000 [00:57<2:09:43,  2.55it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 175/20000 [00:57<2:00:45,  2.74it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 176/20000 [00:58<1:53:24,  2.91it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 177/20000 [00:58<1:48:45,  3.04it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 179/20000 [00:59<1:41:47,  3.25it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.94
Exploiting: Chose move 3 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.94
Updated Q-value for move 7 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 180/20000 [00:59<1:40:24,  3.29it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 181/20000 [00:59<1:41:45,  3.25it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 4


Training MENACE:   1%|          | 182/20000 [00:59<1:42:56,  3.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 183/20000 [01:00<1:39:11,  3.33it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 184/20000 [01:00<1:36:45,  3.41it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 185/20000 [01:00<1:36:48,  3.41it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 186/20000 [01:01<1:33:31,  3.53it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 187/20000 [01:01<1:31:28,  3.61it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 188/20000 [01:01<1:33:58,  3.51it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 189/20000 [01:01<1:37:30,  3.39it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 190/20000 [01:02<1:36:11,  3.43it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.94
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.94
Updated Q-value for move 7 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 191/20000 [01:02<1:36:19,  3.43it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 6


Training MENACE:   1%|          | 192/20000 [01:02<1:37:29,  3.39it/s]

Exploiting: Chose move 0 with Q-value 1.24
Exploiting: Chose move 5 with Q-value 0.90
Exploiting: Chose move 7 with Q-value 0.49
Exploiting: Chose move 2 with Q-value 0.00
Updated Q-value for move 2 on board XO-OOXXXO: 0.00
Updated Q-value for move 7 on board XO-OOXX--: 0.49
Updated Q-value for move 5 on board X--OO-X--: 0.91
Updated Q-value for move 0 on board ----O-X--: 1.25
Updated Q-value for move 6 on board ---------: 1.53
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 193/20000 [01:03<1:39:47,  3.31it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 194/20000 [01:03<1:37:39,  3.38it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 195/20000 [01:03<1:34:51,  3.48it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 197/20000 [01:04<1:31:09,  3.62it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 198/20000 [01:04<1:35:03,  3.47it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 199/20000 [01:04<1:38:08,  3.36it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 200/20000 [01:05<1:36:53,  3.41it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 201/20000 [01:05<1:38:07,  3.36it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 202/20000 [01:05<1:36:58,  3.40it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 203/20000 [01:06<1:35:38,  3.45it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 204/20000 [01:06<1:37:38,  3.38it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 205/20000 [01:06<1:34:40,  3.48it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 206/20000 [01:06<1:35:05,  3.47it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 207/20000 [01:07<1:42:40,  3.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 7
Exploiting: Chose move 0 with Q-value 1.22
Exploiting: Chose move 2 with Q-value 0.89
Exploiting: Chose move 3 with Q-value 0.49
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.49
Updated Q-value for move 2 on board XO----OX-: 0.90
Updated Q-value for move 0 on board -O-----X-: 1.23
Updated Q-value for move 7 on board ---------: 1.48
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 208/20000 [01:07<2:02:13,  2.70it/s]

Exploring: Chose move 8
Exploring: Chose move 3
Updated Q-value for move 3 on board O-O-X---X: -1.00
Updated Q-value for move 8 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 210/20000 [01:08<2:22:01,  2.32it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.93
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 1 on board O-OOXXX--: 0.50
Updated Q-value for move 6 on board O--OXX---: 0.93
Updated Q-value for move 5 on board O---X----: 1.31
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 211/20000 [01:09<2:28:50,  2.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 212/20000 [01:09<2:38:30,  2.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 213/20000 [01:10<2:32:14,  2.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 214/20000 [01:10<2:15:53,  2.43it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 216/20000 [01:11<1:50:57,  2.97it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 217/20000 [01:11<1:48:54,  3.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 218/20000 [01:11<1:45:48,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:   1%|          | 219/20000 [01:11<1:43:12,  3.19it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 3
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.71
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 220/20000 [01:12<1:41:26,  3.25it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 221/20000 [01:12<1:38:01,  3.36it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 222/20000 [01:12<1:35:43,  3.44it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 223/20000 [01:13<1:31:30,  3.60it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 224/20000 [01:13<1:38:39,  3.34it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 225/20000 [01:13<1:39:26,  3.31it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 226/20000 [01:13<1:38:36,  3.34it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 227/20000 [01:14<1:36:46,  3.41it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 2


Training MENACE:   1%|          | 228/20000 [01:14<1:35:09,  3.46it/s]

Exploiting: Chose move 0 with Q-value 1.24
Exploiting: Chose move 7 with Q-value 0.91
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.49
Updated Q-value for move 7 on board XOX-O----: 0.91
Updated Q-value for move 0 on board --X-O----: 1.25
Updated Q-value for move 2 on board ---------: 1.54
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 229/20000 [01:14<1:35:35,  3.45it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 230/20000 [01:15<1:36:01,  3.43it/s]

Exploring: Chose move 3
Exploring: Chose move 6
Exploiting: Chose move 7 with Q-value 0.00
Updated Q-value for move 7 on board O-OXXOX--: -1.00
Updated Q-value for move 6 on board O--XXO---: 0.10
Updated Q-value for move 3 on board O---X----: 1.33
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 231/20000 [01:15<1:37:37,  3.37it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploring: Chose move 3
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:   1%|          | 233/20000 [01:16<1:36:39,  3.41it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 234/20000 [01:16<1:40:40,  3.27it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 235/20000 [01:16<1:39:32,  3.31it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploring: Chose move 8
Updated Q-value for move 8 on board OX-OXX-O-: -1.00
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 236/20000 [01:16<1:37:29,  3.38it/s]

Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.93
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 1 on board O-OOXXX--: 0.50
Updated Q-value for move 6 on board O--OXX---: 0.93
Updated Q-value for move 5 on board O---X----: 1.31
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 237/20000 [01:17<1:46:17,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 238/20000 [01:17<1:42:05,  3.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 239/20000 [01:17<1:41:25,  3.25it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 240/20000 [01:18<1:38:40,  3.34it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 241/20000 [01:18<1:42:55,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 6


Training MENACE:   1%|          | 242/20000 [01:18<1:43:01,  3.20it/s]

Exploiting: Chose move 0 with Q-value 1.25
Exploiting: Chose move 5 with Q-value 0.91
Exploiting: Chose move 7 with Q-value 0.49
Exploiting: Chose move 2 with Q-value 0.00
Updated Q-value for move 2 on board XO-OOXXXO: 0.00
Updated Q-value for move 7 on board XO-OOXX--: 0.49
Updated Q-value for move 5 on board X--OO-X--: 0.91
Updated Q-value for move 0 on board ----O-X--: 1.26
Updated Q-value for move 6 on board ---------: 1.54
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 244/20000 [01:19<1:39:11,  3.32it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 4
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 245/20000 [01:19<1:38:33,  3.34it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 246/20000 [01:20<1:36:40,  3.41it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 247/20000 [01:20<1:53:19,  2.90it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|          | 248/20000 [01:20<2:10:47,  2.52it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 5


Training MENACE:   1%|          | 249/20000 [01:21<2:26:30,  2.25it/s]

Exploiting: Chose move 0 with Q-value 1.19
Exploiting: Chose move 1 with Q-value 0.87
Exploiting: Chose move 6 with Q-value 0.48
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 6 on board XXOOOX---: 0.48
Updated Q-value for move 1 on board X-OO-X---: 0.88
Updated Q-value for move 0 on board --O--X---: 1.20
Updated Q-value for move 5 on board ---------: 1.44
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 250/20000 [01:22<2:33:02,  2.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 251/20000 [01:22<2:37:03,  2.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 253/20000 [01:23<2:17:04,  2.40it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.94
Exploiting: Chose move 3 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.94
Updated Q-value for move 7 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 255/20000 [01:23<1:55:20,  2.85it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 256/20000 [01:24<1:51:51,  2.94it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 257/20000 [01:24<1:47:41,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 258/20000 [01:24<1:43:54,  3.17it/s]

Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.93
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 1 on board O-OOXXX--: 0.50
Updated Q-value for move 6 on board O--OXX---: 0.93
Updated Q-value for move 5 on board O---X----: 1.31
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 7


Training MENACE:   1%|▏         | 259/20000 [01:25<1:40:39,  3.27it/s]

Exploiting: Chose move 0 with Q-value 1.23
Exploiting: Chose move 2 with Q-value 0.90
Exploiting: Chose move 3 with Q-value 0.49
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.49
Updated Q-value for move 2 on board XO----OX-: 0.90
Updated Q-value for move 0 on board -O-----X-: 1.23
Updated Q-value for move 7 on board ---------: 1.49
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 260/20000 [01:25<1:38:40,  3.33it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 261/20000 [01:25<1:38:28,  3.34it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 6


Training MENACE:   1%|▏         | 262/20000 [01:25<1:35:14,  3.45it/s]

Exploiting: Chose move 0 with Q-value 1.26
Exploiting: Chose move 5 with Q-value 0.91
Exploiting: Chose move 7 with Q-value 0.49
Exploiting: Chose move 2 with Q-value 0.00
Updated Q-value for move 2 on board XO-OOXXXO: 0.00
Updated Q-value for move 7 on board XO-OOXX--: 0.49
Updated Q-value for move 5 on board X--OO-X--: 0.91
Updated Q-value for move 0 on board ----O-X--: 1.26
Updated Q-value for move 6 on board ---------: 1.55
Exploring: Chose move 3


Training MENACE:   1%|▏         | 263/20000 [01:26<1:36:27,  3.41it/s]

Exploiting: Chose move 6 with Q-value 0.21
Exploiting: Chose move 2 with Q-value -0.37
Exploiting: Chose move 5 with Q-value -1.00
Updated Q-value for move 5 on board OOXXO-X--: -1.00
Updated Q-value for move 2 on board OO-X--X--: -0.37
Updated Q-value for move 6 on board O--X-----: 0.21
Updated Q-value for move 3 on board ---------: 0.72
Exploring: Chose move 3


Training MENACE:   1%|▏         | 264/20000 [01:26<1:39:40,  3.30it/s]

Exploiting: Chose move 6 with Q-value 0.21
Exploiting: Chose move 2 with Q-value -0.37
Exploiting: Chose move 5 with Q-value -1.00
Updated Q-value for move 5 on board OOXXO-X--: -1.00
Updated Q-value for move 2 on board OO-X--X--: -0.38
Updated Q-value for move 6 on board O--X-----: 0.20
Updated Q-value for move 3 on board ---------: 0.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 265/20000 [01:26<1:41:11,  3.25it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 266/20000 [01:27<1:44:28,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 268/20000 [01:27<1:43:48,  3.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 2
Exploiting: Chose move 0 with Q-value 1.25
Exploiting: Chose move 7 with Q-value 0.91
Exploiting: Chose move 5 with Q-value 0.49
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.49
Updated Q-value for move 7 on board XOX-O----: 0.91
Updated Q-value for move 0 on board --X-O----: 1.26
Updated Q-value for move 2 on board ---------: 1.55
Exploring: Chose move 8


Training MENACE:   1%|▏         | 269/20000 [01:28<1:46:08,  3.10it/s]

Exploiting: Chose move 0 with Q-value 1.23
Exploiting: Chose move 7 with Q-value 0.90
Exploiting: Chose move 2 with Q-value 0.49
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OOOXX: 0.00
Updated Q-value for move 2 on board XO--O-OXX: 0.49
Updated Q-value for move 7 on board XO--O---X: 0.90
Updated Q-value for move 0 on board ----O---X: 1.24
Updated Q-value for move 8 on board ---------: 1.51
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 270/20000 [01:28<1:46:16,  3.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 271/20000 [01:28<1:45:41,  3.11it/s]

Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.33
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 272/20000 [01:29<1:43:14,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 273/20000 [01:29<1:41:08,  3.25it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 274/20000 [01:29<1:48:35,  3.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 275/20000 [01:30<1:46:11,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 276/20000 [01:30<1:43:38,  3.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 277/20000 [01:30<1:41:08,  3.25it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 278/20000 [01:30<1:40:00,  3.29it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:   1%|▏         | 279/20000 [01:31<1:40:54,  3.26it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 280/20000 [01:31<1:45:23,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 4


Training MENACE:   1%|▏         | 281/20000 [01:31<1:43:09,  3.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 283/20000 [01:32<1:41:18,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 284/20000 [01:32<1:40:53,  3.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 285/20000 [01:33<1:52:03,  2.93it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 286/20000 [01:33<2:08:31,  2.56it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:   1%|▏         | 287/20000 [01:34<2:18:53,  2.37it/s]

Exploiting: Chose move 6 with Q-value 0.20
Exploring: Chose move 2
Exploiting: Chose move 5 with Q-value -1.00
Updated Q-value for move 5 on board OOXXO-X--: -1.00
Updated Q-value for move 2 on board OO-X--X--: -0.38
Updated Q-value for move 6 on board O--X-----: 0.20
Updated Q-value for move 3 on board ---------: 0.71
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 289/20000 [01:35<2:29:29,  2.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 291/20000 [01:36<2:17:25,  2.39it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 292/20000 [01:36<2:03:39,  2.66it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 293/20000 [01:36<1:55:34,  2.84it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 294/20000 [01:36<1:46:26,  3.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 295/20000 [01:37<1:46:56,  3.07it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:   1%|▏         | 296/20000 [01:37<1:40:54,  3.25it/s]

Exploring: Chose move 4
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 4 on board O--X-----: 0.18
Updated Q-value for move 3 on board ---------: 0.71
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 297/20000 [01:37<1:40:28,  3.27it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 298/20000 [01:38<1:39:03,  3.32it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   1%|▏         | 299/20000 [01:38<1:40:15,  3.28it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 300/20000 [01:38<1:38:21,  3.34it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 301/20000 [01:38<1:38:58,  3.32it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 302/20000 [01:39<1:43:02,  3.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 303/20000 [01:39<1:42:40,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 304/20000 [01:39<1:41:05,  3.25it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 305/20000 [01:40<1:41:34,  3.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 306/20000 [01:40<1:40:26,  3.27it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 308/20000 [01:41<1:49:41,  2.99it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 5


Training MENACE:   2%|▏         | 309/20000 [01:41<1:44:53,  3.13it/s]

Exploiting: Chose move 0 with Q-value 1.20
Exploiting: Chose move 1 with Q-value 0.88
Exploiting: Chose move 6 with Q-value 0.48
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 6 on board XXOOOX---: 0.49
Updated Q-value for move 1 on board X-OO-X---: 0.89
Updated Q-value for move 0 on board --O--X---: 1.21
Updated Q-value for move 5 on board ---------: 1.46
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 6


Training MENACE:   2%|▏         | 310/20000 [01:41<1:42:23,  3.21it/s]

Exploiting: Chose move 0 with Q-value 1.26
Exploiting: Chose move 5 with Q-value 0.91
Exploiting: Chose move 7 with Q-value 0.49
Exploiting: Chose move 2 with Q-value 0.00
Updated Q-value for move 2 on board XO-OOXXXO: 0.00
Updated Q-value for move 7 on board XO-OOXX--: 0.49
Updated Q-value for move 5 on board X--OO-X--: 0.92
Updated Q-value for move 0 on board ----O-X--: 1.27
Updated Q-value for move 6 on board ---------: 1.56
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 311/20000 [01:42<1:42:44,  3.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 312/20000 [01:42<1:42:36,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 313/20000 [01:42<1:39:04,  3.31it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 314/20000 [01:43<1:34:40,  3.47it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 315/20000 [01:43<1:36:38,  3.39it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 316/20000 [01:43<1:40:26,  3.27it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 317/20000 [01:43<1:38:53,  3.32it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 318/20000 [01:44<1:37:45,  3.36it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 319/20000 [01:44<1:36:35,  3.40it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploring: Chose move 3
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 320/20000 [01:44<1:35:10,  3.45it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 321/20000 [01:45<1:33:15,  3.52it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 322/20000 [01:45<1:35:16,  3.44it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 323/20000 [01:45<1:37:01,  3.38it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 324/20000 [01:46<1:50:57,  2.96it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 325/20000 [01:46<2:09:05,  2.54it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 326/20000 [01:47<2:29:35,  2.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 327/20000 [01:47<2:38:28,  2.07it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 328/20000 [01:48<2:39:23,  2.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 329/20000 [01:48<2:38:45,  2.07it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 330/20000 [01:49<2:22:11,  2.31it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 331/20000 [01:49<2:06:36,  2.59it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 332/20000 [01:49<1:59:02,  2.75it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 333/20000 [01:49<1:52:04,  2.92it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 334/20000 [01:50<1:46:09,  3.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 336/20000 [01:50<1:42:07,  3.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 337/20000 [01:51<1:43:07,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 338/20000 [01:51<1:43:09,  3.18it/s]

Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.93
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 1 on board O-OOXXX--: 0.50
Updated Q-value for move 6 on board O--OXX---: 0.94
Updated Q-value for move 5 on board O---X----: 1.31
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:   2%|▏         | 339/20000 [01:51<1:41:04,  3.24it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 340/20000 [01:52<1:39:54,  3.28it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 341/20000 [01:52<1:40:54,  3.25it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 342/20000 [01:52<1:42:43,  3.19it/s]

Exploring: Chose move 3
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board O-OXXOX--: -1.00
Updated Q-value for move 6 on board O--XXO---: 0.05
Updated Q-value for move 3 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 343/20000 [01:53<1:39:50,  3.28it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 344/20000 [01:53<1:41:16,  3.23it/s]

Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 345/20000 [01:53<1:43:16,  3.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 346/20000 [01:53<1:42:56,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 347/20000 [01:54<1:39:44,  3.28it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 348/20000 [01:54<1:38:58,  3.31it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 349/20000 [01:54<1:40:14,  3.27it/s]

Exploring: Chose move 6
Exploiting: Chose move 1 with Q-value 0.94
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 1 on board O-O-X-X--: 0.95
Updated Q-value for move 6 on board O---X----: 1.33
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 350/20000 [01:55<1:37:56,  3.34it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 352/20000 [01:55<1:46:42,  3.07it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 w

Training MENACE:   2%|▏         | 353/20000 [01:56<1:48:00,  3.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 354/20000 [01:56<1:46:47,  3.07it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 355/20000 [01:56<1:45:42,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 356/20000 [01:57<1:49:05,  3.00it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 357/20000 [01:57<1:49:08,  3.00it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 358/20000 [01:57<1:49:21,  2.99it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 359/20000 [01:58<1:41:34,  3.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 360/20000 [01:58<1:42:01,  3.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 361/20000 [01:58<1:45:44,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 362/20000 [01:59<2:01:58,  2.68it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 363/20000 [01:59<2:16:28,  2.40it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.94
Exploiting: Chose move 3 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 364/20000 [02:00<2:27:49,  2.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 365/20000 [02:00<2:34:34,  2.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 366/20000 [02:01<2:38:09,  2.07it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 367/20000 [02:01<2:29:08,  2.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 368/20000 [02:02<2:13:26,  2.45it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 369/20000 [02:02<2:01:54,  2.68it/s]

Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.92
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 1 on board O-XXXOO--: 0.50
Updated Q-value for move 3 on board O-X-X-O--: 0.93
Updated Q-value for move 2 on board O---X----: 1.30
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 370/20000 [02:02<1:52:25,  2.91it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 371/20000 [02:02<1:50:11,  2.97it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 372/20000 [02:03<1:45:21,  3.10it/s]

Exploring: Chose move 7
Exploring: Chose move 5
Updated Q-value for move 5 on board OO--X--X-: -1.00
Updated Q-value for move 7 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 373/20000 [02:03<1:42:50,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 375/20000 [02:04<1:43:01,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 w

Training MENACE:   2%|▏         | 376/20000 [02:04<1:44:22,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 377/20000 [02:04<1:44:31,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 378/20000 [02:05<1:41:07,  3.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 379/20000 [02:05<1:37:48,  3.34it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 380/20000 [02:05<1:38:41,  3.31it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 381/20000 [02:05<1:41:01,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:   2%|▏         | 382/20000 [02:06<1:43:37,  3.16it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 383/20000 [02:06<1:40:22,  3.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 384/20000 [02:06<1:41:16,  3.23it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 385/20000 [02:07<1:44:39,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 386/20000 [02:07<1:42:55,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 387/20000 [02:07<1:43:37,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 388/20000 [02:08<1:42:03,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 389/20000 [02:08<1:37:32,  3.35it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 390/20000 [02:08<1:40:44,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 391/20000 [02:09<1:38:16,  3.33it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 6
Exploiting: Chose move 0 with Q-value 1.27
Exploiting: Chose move 5 with Q-value 0.92
Exploiting: Chose move 7 with Q-value 0.49
Exploiting: Chose move 2 with Q-value 0.00
Updated Q-value for move 2 on board XO-OOXXXO: 0.00
Updated Q-value for move 7 on board XO-OOXX--: 0.49
Updated Q-value for move 5 on board X--OO-X--: 0.92
Updated Q-value for move 0 on board ----O-X--: 1.28
Updated Q-value for move 6 on board ---------: 1.57
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 392/20000 [02:09<1:40:09,  3.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 393/20000 [02:09<1:42:08,  3.20it/s]

Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 394/20000 [02:09<1:38:31,  3.32it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 395/20000 [02:10<1:42:45,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 396/20000 [02:10<1:39:41,  3.28it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 397/20000 [02:10<1:39:36,  3.28it/s]

Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 398/20000 [02:11<1:40:42,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploring: Chose move 6
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 399/20000 [02:11<1:40:23,  3.25it/s]

Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 400/20000 [02:12<2:02:13,  2.67it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 401/20000 [02:12<2:17:10,  2.38it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 402/20000 [02:13<2:23:51,  2.27it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 403/20000 [02:13<2:30:44,  2.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 404/20000 [02:14<2:32:59,  2.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 405/20000 [02:14<2:30:47,  2.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 406/20000 [02:14<2:14:01,  2.44it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 407/20000 [02:15<2:06:08,  2.59it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 408/20000 [02:15<2:00:06,  2.72it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 409/20000 [02:15<1:52:03,  2.91it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 410/20000 [02:16<1:51:34,  2.93it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 411/20000 [02:16<1:46:54,  3.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 412/20000 [02:16<1:44:45,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 414/20000 [02:17<1:38:54,  3.30it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 415/20000 [02:17<1:35:38,  3.41it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 416/20000 [02:17<1:36:56,  3.37it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 417/20000 [02:18<1:36:35,  3.38it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 418/20000 [02:18<1:40:55,  3.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 419/20000 [02:18<1:40:34,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 420/20000 [02:19<1:34:16,  3.46it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 421/20000 [02:19<1:42:30,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 423/20000 [02:20<1:40:09,  3.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 424/20000 [02:20<1:37:16,  3.35it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 425/20000 [02:20<1:41:24,  3.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 426/20000 [02:20<1:40:56,  3.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 427/20000 [02:21<1:43:09,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 428/20000 [02:21<1:45:22,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 429/20000 [02:21<1:43:54,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 430/20000 [02:22<1:42:55,  3.17it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 432/20000 [02:22<1:39:55,  3.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 433/20000 [02:23<1:43:59,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 434/20000 [02:23<1:41:07,  3.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 436/20000 [02:24<1:44:21,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 437/20000 [02:24<1:43:07,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 438/20000 [02:24<2:02:17,  2.67it/s]

Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 439/20000 [02:25<2:17:03,  2.38it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 440/20000 [02:26<2:25:40,  2.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 441/20000 [02:26<2:31:26,  2.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 442/20000 [02:27<2:34:31,  2.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 443/20000 [02:27<2:39:06,  2.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 444/20000 [02:27<2:20:09,  2.33it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 445/20000 [02:28<2:06:21,  2.58it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 5


Training MENACE:   2%|▏         | 446/20000 [02:28<1:57:40,  2.77it/s]

Exploiting: Chose move 0 with Q-value 1.21
Exploiting: Chose move 1 with Q-value 0.89
Exploiting: Chose move 6 with Q-value 0.49
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 6 on board XXOOOX---: 0.49
Updated Q-value for move 1 on board X-OO-X---: 0.89
Updated Q-value for move 0 on board --O--X---: 1.22
Updated Q-value for move 5 on board ---------: 1.47
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 447/20000 [02:28<1:51:48,  2.91it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 448/20000 [02:29<1:47:43,  3.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 7


Training MENACE:   2%|▏         | 449/20000 [02:29<1:44:37,  3.11it/s]

Exploiting: Chose move 0 with Q-value 1.23
Exploring: Chose move 4
Exploiting: Chose move 2 with Q-value 0.00
Exploiting: Chose move 5 with Q-value 0.00
Updated Q-value for move 5 on board XOXOX-OXO: 0.00
Updated Q-value for move 2 on board XO--X-OXO: 0.05
Updated Q-value for move 4 on board XO----OX-: 0.05
Updated Q-value for move 0 on board -O-----X-: 1.24
Updated Q-value for move 7 on board ---------: 1.50
Exploring: Chose move 7


Training MENACE:   2%|▏         | 450/20000 [02:29<1:41:21,  3.21it/s]

Exploiting: Chose move 0 with Q-value 1.24
Exploiting: Chose move 2 with Q-value 0.90
Exploiting: Chose move 3 with Q-value 0.49
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.49
Updated Q-value for move 2 on board XO----OX-: 0.91
Updated Q-value for move 0 on board -O-----X-: 1.25
Updated Q-value for move 7 on board ---------: 1.52
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 451/20000 [02:29<1:41:42,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 7


Training MENACE:   2%|▏         | 452/20000 [02:30<1:41:04,  3.22it/s]

Exploring: Chose move 2
Exploiting: Chose move 0 with Q-value 0.09
Exploiting: Chose move 3 with Q-value 0.49
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.49
Updated Q-value for move 0 on board -OX---OX-: 0.18
Updated Q-value for move 2 on board -O-----X-: 0.12
Updated Q-value for move 7 on board ---------: 1.53
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 453/20000 [02:30<1:37:34,  3.34it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 454/20000 [02:30<1:41:10,  3.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 455/20000 [02:31<1:37:57,  3.33it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 456/20000 [02:31<1:36:22,  3.38it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 457/20000 [02:31<1:40:33,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 458/20000 [02:32<1:41:30,  3.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 1


Training MENACE:   2%|▏         | 459/20000 [02:32<1:41:33,  3.21it/s]

Exploring: Chose move 8
Exploiting: Chose move 2 with Q-value 0.16
Exploiting: Chose move 3 with Q-value 0.14
Exploiting: Chose move 7 with Q-value 0.00
Updated Q-value for move 7 on board OXXXOOO-X: 0.00
Updated Q-value for move 3 on board OXX-OO--X: 0.17
Updated Q-value for move 2 on board OX--O---X: 0.21
Updated Q-value for move 8 on board OX-------: 0.22
Updated Q-value for move 1 on board ---------: 0.82
Exploring: Chose move 8


Training MENACE:   2%|▏         | 460/20000 [02:32<1:39:00,  3.29it/s]

Exploiting: Chose move 0 with Q-value 1.24
Exploiting: Chose move 7 with Q-value 0.90
Exploiting: Chose move 2 with Q-value 0.49
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OOOXX: 0.00
Updated Q-value for move 2 on board XO--O-OXX: 0.49
Updated Q-value for move 7 on board XO--O---X: 0.91
Updated Q-value for move 0 on board ----O---X: 1.24
Updated Q-value for move 8 on board ---------: 1.52
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 462/20000 [02:33<1:38:11,  3.32it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 463/20000 [02:33<1:39:18,  3.28it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 464/20000 [02:33<1:42:13,  3.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 5


Training MENACE:   2%|▏         | 465/20000 [02:34<1:46:36,  3.05it/s]

Exploiting: Chose move 0 with Q-value 1.22
Exploiting: Chose move 1 with Q-value 0.89
Exploiting: Chose move 6 with Q-value 0.49
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 6 on board XXOOOX---: 0.49
Updated Q-value for move 1 on board X-OO-X---: 0.90
Updated Q-value for move 0 on board --O--X---: 1.23
Updated Q-value for move 5 on board ---------: 1.48
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 467/20000 [02:34<1:46:36,  3.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 468/20000 [02:35<1:42:12,  3.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploring: Chose move 6
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 469/20000 [02:35<1:40:00,  3.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 470/20000 [02:35<1:40:23,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 4


Training MENACE:   2%|▏         | 472/20000 [02:36<1:42:14,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 473/20000 [02:36<1:40:24,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 474/20000 [02:37<1:41:38,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 475/20000 [02:37<1:40:30,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 476/20000 [02:37<1:51:02,  2.93it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 477/20000 [02:38<2:08:47,  2.53it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 478/20000 [02:38<2:22:34,  2.28it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 479/20000 [02:39<2:29:07,  2.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 480/20000 [02:39<2:32:59,  2.13it/s]

Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 482/20000 [02:40<2:22:15,  2.29it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 483/20000 [02:41<2:10:38,  2.49it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 484/20000 [02:41<2:06:59,  2.56it/s]

Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 4


Training MENACE:   2%|▏         | 485/20000 [02:41<1:59:15,  2.73it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 486/20000 [02:41<1:52:25,  2.89it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 488/20000 [02:42<1:44:27,  3.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 489/20000 [02:42<1:40:50,  3.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 490/20000 [02:43<1:40:17,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 491/20000 [02:43<1:37:12,  3.34it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 492/20000 [02:43<1:36:45,  3.36it/s]

Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.93
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 1 on board O-XXXOO--: 0.50
Updated Q-value for move 3 on board O-X-X-O--: 0.93
Updated Q-value for move 2 on board O---X----: 1.30
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 493/20000 [02:44<1:37:52,  3.32it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 494/20000 [02:44<1:37:07,  3.35it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 495/20000 [02:44<1:36:27,  3.37it/s]

Exploring: Chose move 6
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 1 on board O-O-X-X--: 0.95
Updated Q-value for move 6 on board O---X----: 1.33
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 2


Training MENACE:   2%|▏         | 496/20000 [02:44<1:42:09,  3.18it/s]

Exploiting: Chose move 0 with Q-value 1.26
Exploiting: Chose move 7 with Q-value 0.91
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.49
Updated Q-value for move 7 on board XOX-O----: 0.92
Updated Q-value for move 0 on board --X-O----: 1.26
Updated Q-value for move 2 on board ---------: 1.56
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 497/20000 [02:45<1:42:29,  3.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 498/20000 [02:45<1:41:50,  3.19it/s]

Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.93
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 1 on board O-XXXOO--: 0.50
Updated Q-value for move 3 on board O-X-X-O--: 0.93
Updated Q-value for move 2 on board O---X----: 1.31
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   2%|▏         | 499/20000 [02:45<1:42:41,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 501/20000 [02:46<1:40:26,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 502/20000 [02:46<1:40:32,  3.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:   3%|▎         | 504/20000 [02:47<1:42:17,  3.18it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 3
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 1
Exploiting: Chose move 2 with Q-value 0.33
Exploiting: Chose move 8 with Q-value -0.24
Exploiting: Chose move 4 with Q-value -1.00
Updated Q-value for move 4 on board OXXO-O--X: -1.00
Updated Q-value for move 8 on board OXXO-----: -0.26
Updated Q-value for move 2 on board OX-------: 0.32
Updated Q-value for move 1 on board ---------: 0.81
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 505/20000 [02:47<1:41:05,  3.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 506/20000 [02:48<1:40:41,  3.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 507/20000 [02:48<1:36:00,  3.38it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 508/20000 [02:48<1:38:56,  3.28it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 510/20000 [02:49<1:40:29,  3.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:   3%|▎         | 511/20000 [02:49<1:41:53,  3.19it/s]

Exploiting: Chose move 6 with Q-value 0.20
Exploiting: Chose move 2 with Q-value -0.38
Exploiting: Chose move 5 with Q-value -1.00
Updated Q-value for move 5 on board OOXXO-X--: -1.00
Updated Q-value for move 2 on board OO-X--X--: -0.38
Updated Q-value for move 6 on board O--X-----: 0.19
Updated Q-value for move 3 on board ---------: 0.71
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 512/20000 [02:49<1:41:06,  3.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 513/20000 [02:50<1:39:46,  3.25it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOOXX-OX: 0.00
Updated Q-value for move 5 on board OXO-X--OX: 0.17
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 514/20000 [02:50<1:44:47,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 515/20000 [02:51<2:03:57,  2.62it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 516/20000 [02:51<2:18:25,  2.35it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 517/20000 [02:52<2:26:10,  2.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 518/20000 [02:52<2:29:50,  2.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 519/20000 [02:53<2:35:34,  2.09it/s]

Exploring: Chose move 3
Exploring: Chose move 8
Exploiting: Chose move 2 with Q-value 0.00
Exploiting: Chose move 7 with Q-value 0.00
Updated Q-value for move 7 on board OOXXXOO-X: 0.00
Updated Q-value for move 2 on board OO-XXO--X: 0.05
Updated Q-value for move 8 on board O--XXO---: 0.05
Updated Q-value for move 3 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 520/20000 [02:53<2:29:57,  2.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 522/20000 [02:54<1:58:41,  2.74it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 523/20000 [02:54<1:52:54,  2.87it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 5


Training MENACE:   3%|▎         | 524/20000 [02:54<1:46:36,  3.04it/s]

Exploiting: Chose move 0 with Q-value 1.23
Exploiting: Chose move 1 with Q-value 0.90
Exploring: Chose move 7
Updated Q-value for move 7 on board XXOOOX---: -1.00
Updated Q-value for move 1 on board X-OO-X---: 0.90
Updated Q-value for move 0 on board --O--X---: 1.23
Updated Q-value for move 5 on board ---------: 1.50
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 525/20000 [02:55<1:50:05,  2.95it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 526/20000 [02:55<1:45:15,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 527/20000 [02:55<1:42:41,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 528/20000 [02:56<1:41:37,  3.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 529/20000 [02:56<1:38:44,  3.29it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 2


Training MENACE:   3%|▎         | 530/20000 [02:56<1:37:55,  3.31it/s]

Exploiting: Chose move 0 with Q-value 1.26
Exploiting: Chose move 7 with Q-value 0.92
Exploiting: Chose move 5 with Q-value 0.49
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.49
Updated Q-value for move 7 on board XOX-O----: 0.92
Updated Q-value for move 0 on board --X-O----: 1.27
Updated Q-value for move 2 on board ---------: 1.56
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 531/20000 [02:56<1:41:33,  3.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 532/20000 [02:57<1:42:50,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 533/20000 [02:57<1:42:14,  3.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 534/20000 [02:57<1:41:31,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 535/20000 [02:58<1:39:30,  3.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 536/20000 [02:58<1:37:11,  3.34it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 537/20000 [02:58<1:36:43,  3.35it/s]

Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.93
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 1 on board O-XXXOO--: 0.50
Updated Q-value for move 3 on board O-X-X-O--: 0.93
Updated Q-value for move 2 on board O---X----: 1.31
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 4


Training MENACE:   3%|▎         | 538/20000 [02:59<1:35:27,  3.40it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 539/20000 [02:59<1:33:18,  3.48it/s]

Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 540/20000 [02:59<1:34:09,  3.44it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 541/20000 [02:59<1:35:33,  3.39it/s]

Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 542/20000 [03:00<1:35:29,  3.40it/s]

Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 543/20000 [03:00<1:35:19,  3.40it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 544/20000 [03:00<1:42:49,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:   3%|▎         | 545/20000 [03:01<1:44:23,  3.11it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 546/20000 [03:01<1:37:30,  3.33it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 547/20000 [03:01<1:39:09,  3.27it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 548/20000 [03:02<1:37:24,  3.33it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:   3%|▎         | 549/20000 [03:02<1:36:12,  3.37it/s]

Exploiting: Chose move 6 with Q-value 0.19
Exploiting: Chose move 2 with Q-value -0.38
Exploiting: Chose move 5 with Q-value -1.00
Updated Q-value for move 5 on board OOXXO-X--: -1.00
Updated Q-value for move 2 on board OO-X--X--: -0.38
Updated Q-value for move 6 on board O--X-----: 0.19
Updated Q-value for move 3 on board ---------: 0.70
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 551/20000 [03:02<1:35:56,  3.38it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 553/20000 [03:03<1:30:54,  3.57it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 554/20000 [03:03<1:54:28,  2.83it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 555/20000 [03:04<2:11:53,  2.46it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 556/20000 [03:05<2:24:50,  2.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 557/20000 [03:05<2:30:20,  2.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 558/20000 [03:06<2:32:41,  2.12it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 559/20000 [03:06<2:30:06,  2.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 560/20000 [03:06<2:14:03,  2.42it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 561/20000 [03:07<2:04:20,  2.61it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 562/20000 [03:07<1:58:25,  2.74it/s]

Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.94
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 1 on board O-OOXXX--: 0.50
Updated Q-value for move 6 on board O--OXX---: 0.94
Updated Q-value for move 5 on board O---X----: 1.32
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 563/20000 [03:07<2:00:04,  2.70it/s]

Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.93
Exploring: Chose move 7
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 7 on board O-XXXOO--: 0.10
Updated Q-value for move 3 on board O-X-X-O--: 0.93
Updated Q-value for move 2 on board O---X----: 1.31
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 564/20000 [03:08<1:51:40,  2.90it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 565/20000 [03:08<1:48:32,  2.98it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 566/20000 [03:08<1:47:44,  3.01it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 567/20000 [03:09<1:42:52,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 568/20000 [03:09<1:41:03,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 570/20000 [03:09<1:39:48,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:   3%|▎         | 571/20000 [03:10<1:39:28,  3.26it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 572/20000 [03:10<1:39:26,  3.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 573/20000 [03:10<1:40:05,  3.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 574/20000 [03:11<1:42:27,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 576/20000 [03:11<1:35:50,  3.38it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 577/20000 [03:12<1:36:11,  3.37it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 578/20000 [03:12<1:36:21,  3.36it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 579/20000 [03:12<1:39:30,  3.25it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 580/20000 [03:12<1:36:39,  3.35it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 581/20000 [03:13<1:37:18,  3.33it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 582/20000 [03:13<1:36:24,  3.36it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 5


Training MENACE:   3%|▎         | 583/20000 [03:13<1:36:31,  3.35it/s]

Exploiting: Chose move 0 with Q-value 1.23
Exploiting: Chose move 1 with Q-value 0.90
Exploiting: Chose move 6 with Q-value 0.49
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 6 on board XXOOOX---: 0.49
Updated Q-value for move 1 on board X-OO-X---: 0.90
Updated Q-value for move 0 on board --O--X---: 1.24
Updated Q-value for move 5 on board ---------: 1.51
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 584/20000 [03:14<1:37:27,  3.32it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 585/20000 [03:14<1:35:01,  3.41it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 7


Training MENACE:   3%|▎         | 586/20000 [03:14<1:41:23,  3.19it/s]

Exploiting: Chose move 0 with Q-value 1.25
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.49
Exploring: Chose move 8
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.49
Updated Q-value for move 2 on board XO----OX-: 0.91
Updated Q-value for move 0 on board -O-----X-: 1.26
Updated Q-value for move 7 on board ---------: 1.54
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 587/20000 [03:15<1:36:58,  3.34it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 588/20000 [03:15<1:36:46,  3.34it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 589/20000 [03:15<1:38:05,  3.30it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 8


Training MENACE:   3%|▎         | 590/20000 [03:15<1:37:56,  3.30it/s]

Exploiting: Chose move 0 with Q-value 1.24
Exploiting: Chose move 7 with Q-value 0.91
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OOOXX: 0.00
Updated Q-value for move 2 on board XO--O-OXX: 0.49
Updated Q-value for move 7 on board XO--O---X: 0.91
Updated Q-value for move 0 on board ----O---X: 1.25
Updated Q-value for move 8 on board ---------: 1.53
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 591/20000 [03:16<1:35:21,  3.39it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 592/20000 [03:16<1:45:58,  3.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 593/20000 [03:17<2:07:55,  2.53it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 595/20000 [03:18<2:24:45,  2.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 596/20000 [03:18<2:30:24,  2.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 597/20000 [03:19<2:34:24,  2.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 598/20000 [03:19<2:26:07,  2.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 599/20000 [03:19<2:17:45,  2.35it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 600/20000 [03:20<2:05:37,  2.57it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:   3%|▎         | 601/20000 [03:20<1:56:25,  2.78it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 602/20000 [03:20<1:51:46,  2.89it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 603/20000 [03:21<1:49:55,  2.94it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 604/20000 [03:21<1:46:37,  3.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 605/20000 [03:21<1:44:16,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 606/20000 [03:22<1:44:24,  3.10it/s]

Exploring: Chose move 8
Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 607/20000 [03:22<1:43:58,  3.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 608/20000 [03:22<1:47:06,  3.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 609/20000 [03:23<1:45:07,  3.07it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 610/20000 [03:23<1:44:36,  3.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 611/20000 [03:23<1:42:32,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 612/20000 [03:24<1:43:25,  3.12it/s]

Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:   3%|▎         | 613/20000 [03:24<1:40:57,  3.20it/s]

Exploiting: Chose move 6 with Q-value 0.19
Exploiting: Chose move 2 with Q-value -0.38
Exploring: Chose move 7
Updated Q-value for move 7 on board OOXXO-X--: -1.00
Updated Q-value for move 2 on board OO-X--X--: -0.38
Updated Q-value for move 6 on board O--X-----: 0.19
Updated Q-value for move 3 on board ---------: 0.70
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 614/20000 [03:24<1:40:11,  3.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 615/20000 [03:24<1:39:35,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 616/20000 [03:25<1:39:40,  3.24it/s]

Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.93
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 1 on board O-XXXOO--: 0.50
Updated Q-value for move 3 on board O-X-X-O--: 0.93
Updated Q-value for move 2 on board O---X----: 1.32
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 8


Training MENACE:   3%|▎         | 617/20000 [03:25<1:38:06,  3.29it/s]

Exploiting: Chose move 0 with Q-value 1.25
Exploiting: Chose move 7 with Q-value 0.91
Exploiting: Chose move 2 with Q-value 0.49
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OOOXX: 0.00
Updated Q-value for move 2 on board XO--O-OXX: 0.49
Updated Q-value for move 7 on board XO--O---X: 0.91
Updated Q-value for move 0 on board ----O---X: 1.26
Updated Q-value for move 8 on board ---------: 1.54
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 618/20000 [03:25<1:41:29,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 619/20000 [03:26<1:37:57,  3.30it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 620/20000 [03:26<1:35:38,  3.38it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 4


Training MENACE:   3%|▎         | 622/20000 [03:27<1:38:32,  3.28it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 1


Training MENACE:   3%|▎         | 623/20000 [03:27<1:42:18,  3.16it/s]

Exploiting: Chose move 2 with Q-value 0.32
Exploiting: Chose move 7 with Q-value -0.26
Exploiting: Chose move 5 with Q-value -1.00
Updated Q-value for move 5 on board OXXOO--X-: -1.00
Updated Q-value for move 7 on board OXXO-----: -0.27
Updated Q-value for move 2 on board OX-------: 0.32
Updated Q-value for move 1 on board ---------: 0.81
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 624/20000 [03:27<1:43:33,  3.12it/s]

Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 626/20000 [03:28<1:37:21,  3.32it/s]

Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.94
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 1 on board O-OOXXX--: 0.50
Updated Q-value for move 6 on board O--OXX---: 0.94
Updated Q-value for move 5 on board O---X----: 1.32
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 627/20000 [03:28<1:32:52,  3.48it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 7


Training MENACE:   3%|▎         | 628/20000 [03:28<1:33:48,  3.44it/s]

Exploiting: Chose move 0 with Q-value 1.26
Exploiting: Chose move 2 with Q-value 0.91
Exploiting: Chose move 3 with Q-value 0.49
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.49
Updated Q-value for move 2 on board XO----OX-: 0.91
Updated Q-value for move 0 on board -O-----X-: 1.26
Updated Q-value for move 7 on board ---------: 1.55
Exploring: Chose move 3


Training MENACE:   3%|▎         | 629/20000 [03:29<1:36:59,  3.33it/s]

Exploiting: Chose move 6 with Q-value 0.19
Exploiting: Chose move 2 with Q-value -0.38
Exploiting: Chose move 5 with Q-value -1.00
Updated Q-value for move 5 on board OOXXO-X--: -1.00
Updated Q-value for move 2 on board OO-X--X--: -0.39
Updated Q-value for move 6 on board O--X-----: 0.18
Updated Q-value for move 3 on board ---------: 0.70
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 630/20000 [03:29<1:40:37,  3.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 631/20000 [03:30<2:06:07,  2.56it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 632/20000 [03:30<2:21:20,  2.28it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:   3%|▎         | 633/20000 [03:31<2:27:34,  2.19it/s]

Exploiting: Chose move 4 with Q-value 0.18
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 4 on board O--X-----: 0.30
Updated Q-value for move 3 on board ---------: 0.70
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 634/20000 [03:31<2:32:31,  2.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 635/20000 [03:32<2:37:34,  2.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 636/20000 [03:32<2:23:22,  2.25it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 637/20000 [03:32<2:10:36,  2.47it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 638/20000 [03:33<2:01:24,  2.66it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 639/20000 [03:33<1:53:17,  2.85it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 4


Training MENACE:   3%|▎         | 640/20000 [03:33<1:45:54,  3.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 641/20000 [03:34<1:43:17,  3.12it/s]

Exploring: Chose move 6
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 1 on board O-O-X-X--: 0.95
Updated Q-value for move 6 on board O---X----: 1.33
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 642/20000 [03:34<1:46:39,  3.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 643/20000 [03:34<1:43:50,  3.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 645/20000 [03:35<1:43:46,  3.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 646/20000 [03:35<1:40:52,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 647/20000 [03:36<1:43:28,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 649/20000 [03:36<1:35:39,  3.37it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 650/20000 [03:36<1:38:04,  3.29it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 651/20000 [03:37<1:38:15,  3.28it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 652/20000 [03:37<1:39:38,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 653/20000 [03:37<1:39:26,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 654/20000 [03:38<1:35:47,  3.37it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 655/20000 [03:38<1:40:16,  3.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 656/20000 [03:38<1:39:32,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 657/20000 [03:38<1:34:46,  3.40it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 658/20000 [03:39<1:34:35,  3.41it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 659/20000 [03:39<1:35:10,  3.39it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 660/20000 [03:39<1:37:30,  3.31it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 661/20000 [03:40<1:38:52,  3.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 662/20000 [03:40<1:39:33,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:   3%|▎         | 663/20000 [03:40<1:40:21,  3.21it/s]

Exploiting: Chose move 4 with Q-value 0.30
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 4 on board O--X-----: 0.41
Updated Q-value for move 3 on board ---------: 0.72
Exploring: Chose move 4


Training MENACE:   3%|▎         | 664/20000 [03:41<1:41:24,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 666/20000 [03:41<1:35:24,  3.38it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 667/20000 [03:42<1:38:50,  3.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 668/20000 [03:42<1:39:59,  3.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 669/20000 [03:42<1:55:33,  2.79it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 670/20000 [03:43<2:13:11,  2.42it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 671/20000 [03:43<2:22:39,  2.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 672/20000 [03:44<2:28:20,  2.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 673/20000 [03:44<2:32:37,  2.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 674/20000 [03:45<2:32:29,  2.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 676/20000 [03:46<2:05:43,  2.56it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 677/20000 [03:46<1:59:08,  2.70it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 679/20000 [03:46<1:46:37,  3.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 680/20000 [03:47<1:43:20,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 5
Exploiting: Chose move 0 with Q-value 1.24
Exploiting: Chose move 1 with Q-value 0.90
Exploiting: Chose move 6 with Q-value 0.49
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 6 on board XXOOOX---: 0.49
Updated Q-value for move 1 on board X-OO-X---: 0.91
Updated Q-value for move 0 on board --O--X---: 1.25
Updated Q-value for move 5 on board ---------: 1.52
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 682/20000 [03:47<1:48:10,  2.98it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 683/20000 [03:48<1:45:05,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 0
Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 685/20000 [03:48<1:44:47,  3.07it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 6
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 1 on board O-O-X-X--: 0.95
Updated Q-value for move 6 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 686/20000 [03:49<1:42:54,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 687/20000 [03:49<1:43:03,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 689/20000 [03:50<1:41:48,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 690/20000 [03:50<1:38:35,  3.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 692/20000 [03:51<1:36:58,  3.32it/s]

Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.94
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 1 on board O-OOXXX--: 0.50
Updated Q-value for move 6 on board O--OXX---: 0.94
Updated Q-value for move 5 on board O---X----: 1.32
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 693/20000 [03:51<1:35:05,  3.38it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 694/20000 [03:51<1:35:25,  3.37it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 695/20000 [03:51<1:39:54,  3.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 696/20000 [03:52<1:37:33,  3.30it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 697/20000 [03:52<1:37:51,  3.29it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 698/20000 [03:52<1:38:20,  3.27it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   3%|▎         | 699/20000 [03:53<1:38:50,  3.25it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 700/20000 [03:53<1:37:02,  3.31it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 701/20000 [03:53<1:41:39,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.93
Exploring: Chose move 7
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 7 on board O-XXXOO--: 0.14
Updated Q-value for move 3 on board O-X-X-O--: 0.94
Updated Q-value for move 2 on board O---X----: 1.32
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 702/20000 [03:54<1:39:27,  3.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 704/20000 [03:54<1:40:28,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploit

Training MENACE:   4%|▎         | 705/20000 [03:55<1:43:34,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 706/20000 [03:55<1:51:53,  2.87it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 707/20000 [03:56<2:08:19,  2.51it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 708/20000 [03:56<2:18:52,  2.32it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 709/20000 [03:57<2:23:57,  2.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 710/20000 [03:57<2:30:22,  2.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 712/20000 [03:58<2:24:39,  2.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 713/20000 [03:58<2:05:47,  2.56it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 1


Training MENACE:   4%|▎         | 714/20000 [03:58<1:55:48,  2.78it/s]

Exploiting: Chose move 2 with Q-value 0.32
Exploiting: Chose move 6 with Q-value -0.26
Exploiting: Chose move 5 with Q-value -1.00
Updated Q-value for move 5 on board OXXOO-X--: -1.00
Updated Q-value for move 6 on board OXXO-----: -0.27
Updated Q-value for move 2 on board OX-------: 0.31
Updated Q-value for move 1 on board ---------: 0.81
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 715/20000 [03:59<1:48:57,  2.95it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 716/20000 [03:59<1:45:48,  3.04it/s]

Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 717/20000 [03:59<1:44:57,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 719/20000 [04:00<1:41:01,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.94
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 1 on board O-OOXXX--: 0.50
Updated Q-value for move 6 on board O--OXX---: 0.94
Updated Q-value for move 5 on board O---X----: 1.33
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 720/20000 [04:00<1:40:43,  3.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 721/20000 [04:01<1:41:48,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 722/20000 [04:01<1:39:09,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 723/20000 [04:01<1:40:54,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 725/20000 [04:02<1:38:23,  3.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 w

Training MENACE:   4%|▎         | 726/20000 [04:02<1:42:28,  3.13it/s]

Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 727/20000 [04:02<1:39:38,  3.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 728/20000 [04:03<1:38:09,  3.27it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploring: Chose move 8
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 729/20000 [04:03<1:38:03,  3.28it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 8


Training MENACE:   4%|▎         | 730/20000 [04:03<1:37:18,  3.30it/s]

Exploiting: Chose move 0 with Q-value 1.26
Exploiting: Chose move 7 with Q-value 0.91
Exploiting: Chose move 2 with Q-value 0.49
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OOOXX: 0.00
Updated Q-value for move 2 on board XO--O-OXX: 0.49
Updated Q-value for move 7 on board XO--O---X: 0.92
Updated Q-value for move 0 on board ----O---X: 1.27
Updated Q-value for move 8 on board ---------: 1.55
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 731/20000 [04:04<1:35:46,  3.35it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 732/20000 [04:04<1:36:08,  3.34it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 733/20000 [04:04<1:35:55,  3.35it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 734/20000 [04:05<1:36:49,  3.32it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 735/20000 [04:05<1:40:02,  3.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 736/20000 [04:05<1:40:04,  3.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 2


Training MENACE:   4%|▎         | 737/20000 [04:06<1:41:05,  3.18it/s]

Exploiting: Chose move 0 with Q-value 1.27
Exploiting: Chose move 7 with Q-value 0.92
Exploiting: Chose move 5 with Q-value 0.49
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.49
Updated Q-value for move 7 on board XOX-O----: 0.92
Updated Q-value for move 0 on board --X-O----: 1.28
Updated Q-value for move 2 on board ---------: 1.57
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 738/20000 [04:06<1:41:20,  3.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 739/20000 [04:06<1:38:42,  3.25it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 1


Training MENACE:   4%|▎         | 740/20000 [04:06<1:38:08,  3.27it/s]

Exploiting: Chose move 2 with Q-value 0.31
Exploiting: Chose move 8 with Q-value -0.26
Exploiting: Chose move 4 with Q-value -1.00
Updated Q-value for move 4 on board OXXO-O--X: -1.00
Updated Q-value for move 8 on board OXXO-----: -0.27
Updated Q-value for move 2 on board OX-------: 0.30
Updated Q-value for move 1 on board ---------: 0.81
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 742/20000 [04:07<1:39:56,  3.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 5


Training MENACE:   4%|▎         | 744/20000 [04:08<1:35:54,  3.35it/s]

Exploiting: Chose move 0 with Q-value 1.25
Exploiting: Chose move 1 with Q-value 0.91
Exploiting: Chose move 6 with Q-value 0.49
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 6 on board XXOOOX---: 0.49
Updated Q-value for move 1 on board X-OO-X---: 0.91
Updated Q-value for move 0 on board --O--X---: 1.26
Updated Q-value for move 5 on board ---------: 1.53
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploit

Training MENACE:   4%|▎         | 745/20000 [04:08<1:53:58,  2.82it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 746/20000 [04:09<2:11:31,  2.44it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 747/20000 [04:09<2:23:51,  2.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 748/20000 [04:10<2:29:45,  2.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▎         | 749/20000 [04:10<2:32:40,  2.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 750/20000 [04:11<2:34:47,  2.07it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 751/20000 [04:11<2:19:44,  2.30it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 753/20000 [04:12<1:58:55,  2.70it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 754/20000 [04:12<1:51:00,  2.89it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 6
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 1 on board O-O-X-X--: 0.95
Updated Q-value for move 6 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 2


Training MENACE:   4%|▍         | 755/20000 [04:12<1:47:28,  2.98it/s]

Exploiting: Chose move 0 with Q-value 1.28
Exploiting: Chose move 7 with Q-value 0.92
Exploiting: Chose move 5 with Q-value 0.49
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.49
Updated Q-value for move 7 on board XOX-O----: 0.92
Updated Q-value for move 0 on board --X-O----: 1.28
Updated Q-value for move 2 on board ---------: 1.58
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 756/20000 [04:13<1:44:08,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 757/20000 [04:13<1:41:41,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 758/20000 [04:13<1:41:27,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 759/20000 [04:14<1:43:01,  3.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 760/20000 [04:14<1:39:57,  3.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 761/20000 [04:14<1:38:30,  3.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 762/20000 [04:14<1:42:11,  3.14it/s]

Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.94
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 1 on board O-OOXXX--: 0.50
Updated Q-value for move 6 on board O--OXX---: 0.94
Updated Q-value for move 5 on board O---X----: 1.33
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 763/20000 [04:15<1:42:49,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 764/20000 [04:15<1:42:05,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 765/20000 [04:15<1:43:18,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 766/20000 [04:16<1:41:15,  3.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 767/20000 [04:16<1:40:50,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 768/20000 [04:16<1:42:49,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 770/20000 [04:17<1:38:03,  3.27it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 771/20000 [04:17<1:39:58,  3.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 772/20000 [04:18<1:35:26,  3.36it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 773/20000 [04:18<1:37:22,  3.29it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 774/20000 [04:18<1:46:53,  3.00it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 775/20000 [04:19<2:05:38,  2.55it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 776/20000 [04:19<2:19:49,  2.29it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 777/20000 [04:20<2:30:05,  2.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 778/20000 [04:20<2:31:12,  2.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 779/20000 [04:21<2:43:41,  1.96it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 780/20000 [04:22<2:48:03,  1.91it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 781/20000 [04:22<2:48:52,  1.90it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 782/20000 [04:23<2:50:00,  1.88it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 783/20000 [04:23<2:44:51,  1.94it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 785/20000 [04:24<2:38:39,  2.02it/s]

Exploring: Chose move 6
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 1 on board O-O-X-X--: 0.95
Updated Q-value for move 6 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 786/20000 [04:24<2:19:57,  2.29it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 787/20000 [04:25<2:10:11,  2.46it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 788/20000 [04:25<1:59:20,  2.68it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 1


Training MENACE:   4%|▍         | 789/20000 [04:25<1:56:01,  2.76it/s]

Exploiting: Chose move 2 with Q-value 0.30
Exploiting: Chose move 7 with Q-value -0.27
Exploiting: Chose move 5 with Q-value -1.00
Updated Q-value for move 5 on board OXXOO--X-: -1.00
Updated Q-value for move 7 on board OXXO-----: -0.29
Updated Q-value for move 2 on board OX-------: 0.30
Updated Q-value for move 1 on board ---------: 0.80
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 790/20000 [04:26<1:51:52,  2.86it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 791/20000 [04:26<1:53:00,  2.83it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 792/20000 [04:26<1:51:27,  2.87it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 2


Training MENACE:   4%|▍         | 793/20000 [04:27<1:48:21,  2.95it/s]

Exploiting: Chose move 0 with Q-value 1.28
Exploiting: Chose move 7 with Q-value 0.92
Exploiting: Chose move 5 with Q-value 0.49
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.49
Updated Q-value for move 7 on board XOX-O----: 0.93
Updated Q-value for move 0 on board --X-O----: 1.29
Updated Q-value for move 2 on board ---------: 1.59
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 794/20000 [04:27<1:45:43,  3.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 795/20000 [04:27<1:43:19,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 796/20000 [04:28<1:40:30,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 797/20000 [04:28<1:37:27,  3.28it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 798/20000 [04:28<1:38:02,  3.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 799/20000 [04:29<1:42:33,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 800/20000 [04:29<1:40:37,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 801/20000 [04:29<1:40:01,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 802/20000 [04:29<1:39:05,  3.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 803/20000 [04:30<1:39:07,  3.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 2


Training MENACE:   4%|▍         | 805/20000 [04:30<1:35:19,  3.36it/s]

Exploiting: Chose move 0 with Q-value 1.29
Exploiting: Chose move 7 with Q-value 0.93
Exploiting: Chose move 5 with Q-value 0.49
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.50
Updated Q-value for move 7 on board XOX-O----: 0.93
Updated Q-value for move 0 on board --X-O----: 1.29
Updated Q-value for move 2 on board ---------: 1.60
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 806/20000 [04:31<1:33:26,  3.42it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.94
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 1 on board O-XXXOO--: 0.50
Updated Q-value for move 3 on board O-X-X-O--: 0.94
Updated Q-value for move 2 on board O---X----: 1.32
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 807/20000 [04:31<1:38:45,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 808/20000 [04:31<1:38:04,  3.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 809/20000 [04:32<1:36:07,  3.33it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 810/20000 [04:32<1:34:41,  3.38it/s]

Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.94
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 1 on board O-XXXOO--: 0.50
Updated Q-value for move 3 on board O-X-X-O--: 0.94
Updated Q-value for move 2 on board O---X----: 1.32
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 811/20000 [04:32<1:37:11,  3.29it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 812/20000 [04:32<1:36:13,  3.32it/s]

Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:   4%|▍         | 813/20000 [04:33<1:34:18,  3.39it/s]

Exploring: Chose move 7
Exploiting: Chose move 1 with Q-value 0.08
Exploiting: Chose move 5 with Q-value -1.00
Updated Q-value for move 5 on board XXOOO--X-: -1.00
Updated Q-value for move 1 on board X--OO--X-: 0.03
Updated Q-value for move 7 on board X---O----: 0.40
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 814/20000 [04:33<1:35:50,  3.34it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 815/20000 [04:33<1:35:12,  3.36it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 8


Training MENACE:   4%|▍         | 816/20000 [04:34<1:36:38,  3.31it/s]

Exploiting: Chose move 0 with Q-value 1.27
Exploiting: Chose move 7 with Q-value 0.92
Exploiting: Chose move 2 with Q-value 0.49
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OOOXX: 0.00
Updated Q-value for move 2 on board XO--O-OXX: 0.49
Updated Q-value for move 7 on board XO--O---X: 0.92
Updated Q-value for move 0 on board ----O---X: 1.27
Updated Q-value for move 8 on board ---------: 1.56
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 817/20000 [04:34<1:39:53,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 818/20000 [04:34<1:59:51,  2.67it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 819/20000 [04:35<2:17:35,  2.32it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 820/20000 [04:36<2:25:39,  2.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 821/20000 [04:36<2:31:42,  2.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 822/20000 [04:37<2:36:31,  2.04it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 823/20000 [04:37<2:30:47,  2.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 824/20000 [04:37<2:16:19,  2.34it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 6


Training MENACE:   4%|▍         | 825/20000 [04:38<2:05:21,  2.55it/s]

Exploiting: Chose move 0 with Q-value 1.28
Exploiting: Chose move 5 with Q-value 0.92
Exploiting: Chose move 7 with Q-value 0.49
Exploiting: Chose move 2 with Q-value 0.00
Updated Q-value for move 2 on board XO-OOXXXO: 0.00
Updated Q-value for move 7 on board XO-OOXX--: 0.49
Updated Q-value for move 5 on board X--OO-X--: 0.92
Updated Q-value for move 0 on board ----O-X--: 1.28
Updated Q-value for move 6 on board ---------: 1.58
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 826/20000 [04:38<1:54:56,  2.78it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 827/20000 [04:38<1:54:50,  2.78it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 828/20000 [04:39<1:51:46,  2.86it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 4


Training MENACE:   4%|▍         | 829/20000 [04:39<1:45:18,  3.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 830/20000 [04:39<1:43:18,  3.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 831/20000 [04:40<1:40:32,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 5
Exploring: Chose move 1
Updated Q-value for move 1 on board O--OXX---: -1.00
Updated Q-value for move 5 on board O---X----: 1.33
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 832/20000 [04:40<1:41:39,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 833/20000 [04:40<1:41:26,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 834/20000 [04:40<1:40:38,  3.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploring: Chose move 6
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 835/20000 [04:41<1:37:52,  3.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 836/20000 [04:41<1:36:50,  3.30it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 837/20000 [04:41<1:39:36,  3.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 838/20000 [04:42<1:36:16,  3.32it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 840/20000 [04:42<1:37:46,  3.27it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 841/20000 [04:43<1:37:09,  3.29it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 842/20000 [04:43<1:38:14,  3.25it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 843/20000 [04:43<1:39:03,  3.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 844/20000 [04:44<1:38:47,  3.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 845/20000 [04:44<1:35:12,  3.35it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 846/20000 [04:44<1:35:14,  3.35it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 847/20000 [04:44<1:37:16,  3.28it/s]

Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.94
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 1 on board O-XXXOO--: 0.50
Updated Q-value for move 3 on board O-X-X-O--: 0.94
Updated Q-value for move 2 on board O---X----: 1.33
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 848/20000 [04:45<1:39:15,  3.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35


Training MENACE:   4%|▍         | 849/20000 [04:45<1:38:29,  3.24it/s]

Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 850/20000 [04:45<1:32:10,  3.46it/s]

Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 851/20000 [04:46<1:32:37,  3.45it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 853/20000 [04:46<1:33:40,  3.41it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 854/20000 [04:46<1:33:08,  3.43it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 855/20000 [04:47<1:36:26,  3.31it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 856/20000 [04:47<1:51:11,  2.87it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 857/20000 [04:48<2:11:30,  2.43it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 858/20000 [04:48<2:26:18,  2.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 859/20000 [04:49<2:32:12,  2.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 860/20000 [04:49<2:37:30,  2.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 861/20000 [04:50<2:38:44,  2.01it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 862/20000 [04:50<2:20:41,  2.27it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 2


Training MENACE:   4%|▍         | 863/20000 [04:51<2:06:31,  2.52it/s]

Exploiting: Chose move 0 with Q-value 1.29
Exploiting: Chose move 7 with Q-value 0.93
Exploiting: Chose move 5 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.50
Updated Q-value for move 7 on board XOX-O----: 0.93
Updated Q-value for move 0 on board --X-O----: 1.30
Updated Q-value for move 2 on board ---------: 1.60
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 864/20000 [04:51<2:01:09,  2.63it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 865/20000 [04:51<1:56:19,  2.74it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 866/20000 [04:52<1:52:08,  2.84it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 867/20000 [04:52<1:47:59,  2.95it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 868/20000 [04:52<1:49:24,  2.91it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 869/20000 [04:53<1:47:04,  2.98it/s]

Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 870/20000 [04:53<1:45:39,  3.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 871/20000 [04:53<1:42:33,  3.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 872/20000 [04:53<1:40:39,  3.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 873/20000 [04:54<1:42:16,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 874/20000 [04:54<1:37:00,  3.29it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 875/20000 [04:54<1:37:21,  3.27it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 876/20000 [04:55<1:37:42,  3.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 877/20000 [04:55<1:38:46,  3.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 878/20000 [04:55<1:36:17,  3.31it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 879/20000 [04:56<1:37:12,  3.28it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 880/20000 [04:56<1:38:26,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 881/20000 [04:56<1:36:38,  3.30it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 882/20000 [04:56<1:37:37,  3.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 883/20000 [04:57<1:39:29,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 884/20000 [04:57<1:40:44,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 1


Training MENACE:   4%|▍         | 886/20000 [04:58<1:32:09,  3.46it/s]

Exploiting: Chose move 2 with Q-value 0.30
Exploiting: Chose move 6 with Q-value -0.27
Exploiting: Chose move 5 with Q-value -1.00
Updated Q-value for move 5 on board OXXOO-X--: -1.00
Updated Q-value for move 6 on board OXXO-----: -0.29
Updated Q-value for move 2 on board OX-------: 0.29
Updated Q-value for move 1 on board ---------: 0.80
Exploring: Chose move 3


Training MENACE:   4%|▍         | 887/20000 [04:58<1:35:13,  3.35it/s]

Exploring: Chose move 1
Exploiting: Chose move 5 with Q-value 0.05
Exploiting: Chose move 6 with Q-value -1.00
Updated Q-value for move 6 on board OXOXOX---: -1.00
Updated Q-value for move 5 on board OX-XO----: 0.00
Updated Q-value for move 1 on board O--X-----: 0.10
Updated Q-value for move 3 on board ---------: 0.73
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 888/20000 [04:58<1:37:50,  3.26it/s]

Exploring: Chose move 6
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 1 on board O-O-X-X--: 0.95
Updated Q-value for move 6 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 889/20000 [04:59<1:36:28,  3.30it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 890/20000 [04:59<1:38:11,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Episode 5000 completed
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 891/20000 [04:59<1:34:46,  3.36it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 892/20000 [05:00<1:38:15,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:   4%|▍         | 893/20000 [05:00<1:34:30,  3.37it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 4
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 894/20000 [05:00<1:49:12,  2.92it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 895/20000 [05:01<2:05:17,  2.54it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 2


Training MENACE:   4%|▍         | 896/20000 [05:01<2:19:10,  2.29it/s]

Exploiting: Chose move 0 with Q-value 1.30
Exploiting: Chose move 7 with Q-value 0.93
Exploiting: Chose move 5 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.50
Updated Q-value for move 7 on board XOX-O----: 0.93
Updated Q-value for move 0 on board --X-O----: 1.30
Updated Q-value for move 2 on board ---------: 1.61
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 897/20000 [05:02<2:23:27,  2.22it/s]

Exploring: Chose move 1
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 898/20000 [05:02<2:27:52,  2.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 899/20000 [05:03<2:36:29,  2.03it/s]

Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.94
Exploiting: Chose move 1 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 1 on board O-XXXOO--: 0.50
Updated Q-value for move 3 on board O-X-X-O--: 0.94
Updated Q-value for move 2 on board O---X----: 1.33
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   4%|▍         | 900/20000 [05:03<2:21:16,  2.25it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 901/20000 [05:03<2:04:04,  2.57it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 903/20000 [05:04<1:47:17,  2.97it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 904/20000 [05:04<1:48:04,  2.95it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 905/20000 [05:05<1:42:44,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 906/20000 [05:05<1:39:09,  3.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 907/20000 [05:05<1:41:22,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 908/20000 [05:06<1:40:58,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 909/20000 [05:06<1:37:48,  3.25it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 910/20000 [05:06<1:38:26,  3.23it/s]

Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 911/20000 [05:06<1:39:08,  3.21it/s]

Exploring: Chose move 5
Exploring: Chose move 8
Exploiting: Chose move 7 with Q-value 0.00
Updated Q-value for move 7 on board O-OOXX--X: -1.00
Updated Q-value for move 8 on board O--OXX---: 0.05
Updated Q-value for move 5 on board O---X----: 1.33
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:   5%|▍         | 913/20000 [05:07<1:39:33,  3.20it/s]

Exploiting: Chose move 4 with Q-value 0.41
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 4 on board O--X-----: 0.50
Updated Q-value for move 3 on board ---------: 0.76
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 914/20000 [05:07<1:39:42,  3.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 7


Training MENACE:   5%|▍         | 916/20000 [05:08<1:32:09,  3.45it/s]

Exploiting: Chose move 0 with Q-value 1.26
Exploiting: Chose move 2 with Q-value 0.91
Exploiting: Chose move 3 with Q-value 0.49
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.49
Updated Q-value for move 2 on board XO----OX-: 0.92
Updated Q-value for move 0 on board -O-----X-: 1.27
Updated Q-value for move 7 on board ---------: 1.56
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 917/20000 [05:08<1:35:17,  3.34it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 918/20000 [05:09<1:31:34,  3.47it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 919/20000 [05:09<1:33:22,  3.41it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:   5%|▍         | 920/20000 [05:09<1:36:18,  3.30it/s]

Exploiting: Chose move 4 with Q-value 0.50
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 4 on board O--X-----: 0.59
Updated Q-value for move 3 on board ---------: 0.78
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 921/20000 [05:10<1:37:19,  3.27it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 922/20000 [05:10<1:40:12,  3.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 923/20000 [05:10<1:40:15,  3.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 924/20000 [05:10<1:39:55,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 925/20000 [05:11<1:44:05,  3.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 926/20000 [05:11<1:41:15,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploring: Chose move 6
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 927/20000 [05:11<1:43:04,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 8
Exploiting: Chose move 0 with Q-value 1.27
Exploiting: Chose move 7 with Q-value 0.92
Exploiting: Chose move 2 with Q-value 0.49
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OOOXX: 0.00
Updated Q-value for move 2 on board XO--O-OXX: 0.50
Updated Q-value for move 7 on board XO--O---X: 0.92
Updated Q-value for move 0 on board ----O---X: 1.28
Updated Q-value for move 8 on board ---------: 1.57
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 929/20000 [05:12<1:41:35,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 930/20000 [05:12<1:41:54,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 931/20000 [05:13<1:42:36,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 932/20000 [05:13<1:53:28,  2.80it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 933/20000 [05:14<2:08:06,  2.48it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 934/20000 [05:14<2:19:20,  2.28it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 935/20000 [05:15<2:29:40,  2.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 936/20000 [05:15<2:36:43,  2.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 937/20000 [05:16<2:36:22,  2.03it/s]

Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.94
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 1 on board O-XXXOO--: 0.50
Updated Q-value for move 3 on board O-X-X-O--: 0.94
Updated Q-value for move 2 on board O---X----: 1.33
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 938/20000 [05:16<2:23:27,  2.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 939/20000 [05:16<2:09:31,  2.45it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 940/20000 [05:17<2:04:37,  2.55it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploring: Chose move 6
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 6


Training MENACE:   5%|▍         | 942/20000 [05:17<1:50:46,  2.87it/s]

Exploring: Chose move 1
Exploiting: Chose move 2 with Q-value 0.00
Exploiting: Chose move 5 with Q-value -1.00
Updated Q-value for move 5 on board OXXOO-X--: -1.00
Updated Q-value for move 2 on board OX--O-X--: -0.04
Updated Q-value for move 1 on board ----O-X--: 0.05
Updated Q-value for move 6 on board ---------: 1.58
Exploring: Chose move 5
Exploiting: Chose move 0 with Q-value 1.26
Exploiting: Chose move 1 with Q-value 0.91
Exploiting: Chose move 6 with Q-value 0.49
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 6 on board XXOOOX---: 0.49
Updated Q-value for move 1 on board X-OO-X---: 0.92
Updated Q-value for move 0 on board --O--X---: 1.26
Updated Q-value for move 5 on board ---------: 1.54
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 944/20000 [05:18<1:43:49,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 945/20000 [05:18<1:38:17,  3.23it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 946/20000 [05:19<1:38:28,  3.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 947/20000 [05:19<1:39:42,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 949/20000 [05:20<1:40:27,  3.16it/s]

Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.94
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 1 on board O-XXXOO--: 0.50
Updated Q-value for move 3 on board O-X-X-O--: 0.94
Updated Q-value for move 2 on board O---X----: 1.33
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 950/20000 [05:20<1:37:55,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 951/20000 [05:20<1:37:09,  3.27it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 952/20000 [05:21<1:40:03,  3.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 953/20000 [05:21<1:36:39,  3.28it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 954/20000 [05:21<1:37:10,  3.27it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 955/20000 [05:21<1:37:53,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 956/20000 [05:22<1:34:58,  3.34it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 957/20000 [05:22<1:32:19,  3.44it/s]

Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 958/20000 [05:22<1:35:35,  3.32it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 960/20000 [05:23<1:31:51,  3.45it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 961/20000 [05:23<1:32:27,  3.43it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 962/20000 [05:23<1:34:04,  3.37it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 963/20000 [05:24<1:32:27,  3.43it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 4
Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose mov

Training MENACE:   5%|▍         | 965/20000 [05:24<1:36:17,  3.29it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 966/20000 [05:25<1:35:16,  3.33it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 967/20000 [05:25<1:34:35,  3.35it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 968/20000 [05:25<1:33:18,  3.40it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 969/20000 [05:26<1:33:53,  3.38it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 970/20000 [05:26<1:39:10,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 971/20000 [05:26<1:56:42,  2.72it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 972/20000 [05:27<2:10:00,  2.44it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 973/20000 [05:27<2:19:42,  2.27it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 974/20000 [05:28<2:23:19,  2.21it/s]

Exploring: Chose move 6
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 1 on board O-O-X-X--: 0.95
Updated Q-value for move 6 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 975/20000 [05:28<2:28:57,  2.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 977/20000 [05:29<2:20:23,  2.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 978/20000 [05:30<2:03:52,  2.56it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 979/20000 [05:30<1:56:28,  2.72it/s]

Exploring: Chose move 8
Exploring: Chose move 1
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOOXX-OX: 0.00
Updated Q-value for move 5 on board OXO-X--OX: 0.20
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 980/20000 [05:30<1:50:57,  2.86it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 5


Training MENACE:   5%|▍         | 981/20000 [05:31<1:47:23,  2.95it/s]

Exploiting: Chose move 0 with Q-value 1.26
Exploiting: Chose move 1 with Q-value 0.92
Exploiting: Chose move 6 with Q-value 0.49
Exploring: Chose move 8
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 6 on board XXOOOX---: 0.49
Updated Q-value for move 1 on board X-OO-X---: 0.92
Updated Q-value for move 0 on board --O--X---: 1.27
Updated Q-value for move 5 on board ---------: 1.55
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 982/20000 [05:31<1:44:19,  3.04it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 983/20000 [05:31<1:40:46,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 984/20000 [05:31<1:39:47,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 985/20000 [05:32<1:40:43,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 986/20000 [05:32<1:42:19,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 987/20000 [05:32<1:45:19,  3.01it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 989/20000 [05:33<1:34:03,  3.37it/s]

Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.94
Exploring: Chose move 1
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 1 on board O-OOXXX--: 0.50
Updated Q-value for move 6 on board O--OXX---: 0.94
Updated Q-value for move 5 on board O---X----: 1.33
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 990/20000 [05:33<1:39:00,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 992/20000 [05:34<1:33:21,  3.39it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 993/20000 [05:34<1:35:44,  3.31it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 994/20000 [05:35<1:38:27,  3.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 995/20000 [05:35<1:37:38,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 996/20000 [05:35<1:38:55,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▍         | 997/20000 [05:35<1:40:22,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploring: Chose move 8
Updated Q-value for move 8 on board OX-OXX-O-: -1.00
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 7


Training MENACE:   5%|▍         | 998/20000 [05:36<1:37:20,  3.25it/s]

Exploiting: Chose move 0 with Q-value 1.27
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.49
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.49
Updated Q-value for move 2 on board XO----OX-: 0.92
Updated Q-value for move 0 on board -O-----X-: 1.27
Updated Q-value for move 7 on board ---------: 1.57
Exploring: Chose move 8


Training MENACE:   5%|▍         | 999/20000 [05:36<1:36:21,  3.29it/s]

Exploiting: Chose move 0 with Q-value 1.28
Exploiting: Chose move 7 with Q-value 0.92
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OOOXX: 0.00
Updated Q-value for move 2 on board XO--O-OXX: 0.50
Updated Q-value for move 7 on board XO--O---X: 0.92
Updated Q-value for move 0 on board ----O---X: 1.28
Updated Q-value for move 8 on board ---------: 1.58
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1000/20000 [05:36<1:41:24,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1001/20000 [05:37<1:39:40,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1002/20000 [05:37<1:38:46,  3.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1003/20000 [05:37<1:36:55,  3.27it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1004/20000 [05:38<1:39:17,  3.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1005/20000 [05:38<1:38:33,  3.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1006/20000 [05:38<1:37:58,  3.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1007/20000 [05:39<1:35:28,  3.32it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1008/20000 [05:39<1:36:46,  3.27it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1009/20000 [05:39<1:46:03,  2.98it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1010/20000 [05:40<2:06:15,  2.51it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1011/20000 [05:40<2:15:47,  2.33it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1012/20000 [05:41<2:23:52,  2.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1013/20000 [05:41<2:28:49,  2.13it/s]

Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1014/20000 [05:42<2:30:27,  2.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1016/20000 [05:43<2:13:01,  2.38it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1017/20000 [05:43<2:04:28,  2.54it/s]

Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.94
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 1 on board O-OOXXX--: 0.50
Updated Q-value for move 6 on board O--OXX---: 0.94
Updated Q-value for move 5 on board O---X----: 1.33
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1018/20000 [05:43<1:56:58,  2.70it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1019/20000 [05:44<1:49:38,  2.89it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1020/20000 [05:44<1:45:43,  2.99it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 2


Training MENACE:   5%|▌         | 1021/20000 [05:44<1:48:58,  2.90it/s]

Exploiting: Chose move 0 with Q-value 1.30
Exploiting: Chose move 7 with Q-value 0.93
Exploiting: Chose move 5 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.50
Updated Q-value for move 7 on board XOX-O----: 0.93
Updated Q-value for move 0 on board --X-O----: 1.30
Updated Q-value for move 2 on board ---------: 1.62
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1022/20000 [05:45<1:47:36,  2.94it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1023/20000 [05:45<1:39:30,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1024/20000 [05:45<1:38:39,  3.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1025/20000 [05:45<1:38:48,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1026/20000 [05:46<1:40:39,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1027/20000 [05:46<1:37:32,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1028/20000 [05:46<1:37:37,  3.24it/s]

Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1029/20000 [05:47<1:40:23,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 7


Training MENACE:   5%|▌         | 1030/20000 [05:47<1:40:27,  3.15it/s]

Exploiting: Chose move 0 with Q-value 1.27
Exploiting: Chose move 2 with Q-value 0.92
Exploiting: Chose move 3 with Q-value 0.49
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.50
Updated Q-value for move 2 on board XO----OX-: 0.92
Updated Q-value for move 0 on board -O-----X-: 1.28
Updated Q-value for move 7 on board ---------: 1.57
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1031/20000 [05:47<1:39:07,  3.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1032/20000 [05:48<1:38:10,  3.22it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1033/20000 [05:48<1:38:28,  3.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1034/20000 [05:48<1:40:06,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 7


Training MENACE:   5%|▌         | 1035/20000 [05:49<1:39:25,  3.18it/s]

Exploiting: Chose move 0 with Q-value 1.28
Exploiting: Chose move 2 with Q-value 0.92
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.50
Updated Q-value for move 2 on board XO----OX-: 0.92
Updated Q-value for move 0 on board -O-----X-: 1.29
Updated Q-value for move 7 on board ---------: 1.58
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1036/20000 [05:49<1:39:59,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1037/20000 [05:49<1:40:19,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1038/20000 [05:49<1:41:09,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1039/20000 [05:50<1:41:00,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1040/20000 [05:50<1:40:06,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1042/20000 [05:51<1:35:40,  3.30it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 2
Exploiting: Chose move 0 with Q-value 1.30
Exploiting: Chose move 7 with Q-value 0.93
Exploiting: Chose move 5 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.50
Updated Q-value for move 7 on board XOX-O----: 0.93
Updated Q-value for move 0 on board --X-O----: 1.31
Updated Q-value for move 2 on board ---------: 1.62
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1043/20000 [05:51<1:37:32,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1044/20000 [05:51<1:37:09,  3.25it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 6
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 1 on board O-O-X-X--: 0.95
Updated Q-value for move 6 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1045/20000 [05:52<1:38:43,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploring: Chose move 8
Exploiting: Chose move 5 with Q-value 0.00
Updated Q-value for move 5 on board OXOOX-XOX: 0.00
Updated Q-value for move 8 on board OXO-X-XO-: 0.20
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1046/20000 [05:52<1:38:00,  3.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1047/20000 [05:52<1:40:16,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1048/20000 [05:53<1:56:49,  2.70it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1049/20000 [05:53<2:15:19,  2.33it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1050/20000 [05:54<2:25:27,  2.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1051/20000 [05:54<2:32:51,  2.07it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1052/20000 [05:55<2:37:27,  2.01it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1053/20000 [05:55<2:23:30,  2.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 6


Training MENACE:   5%|▌         | 1054/20000 [05:56<2:10:17,  2.42it/s]

Exploiting: Chose move 0 with Q-value 1.28
Exploiting: Chose move 5 with Q-value 0.92
Exploiting: Chose move 7 with Q-value 0.49
Exploring: Chose move 2
Updated Q-value for move 2 on board XO-OOXXXO: 0.00
Updated Q-value for move 7 on board XO-OOXX--: 0.50
Updated Q-value for move 5 on board X--OO-X--: 0.92
Updated Q-value for move 0 on board ----O-X--: 1.29
Updated Q-value for move 6 on board ---------: 1.59
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1055/20000 [05:56<1:59:12,  2.65it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1056/20000 [05:56<1:54:01,  2.77it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1057/20000 [05:56<1:41:54,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 5


Training MENACE:   5%|▌         | 1058/20000 [05:57<1:40:39,  3.14it/s]

Exploiting: Chose move 0 with Q-value 1.27
Exploiting: Chose move 1 with Q-value 0.92
Exploiting: Chose move 6 with Q-value 0.49
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 6 on board XXOOOX---: 0.49
Updated Q-value for move 1 on board X-OO-X---: 0.92
Updated Q-value for move 0 on board --O--X---: 1.28
Updated Q-value for move 5 on board ---------: 1.56
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1059/20000 [05:57<1:38:19,  3.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1060/20000 [05:57<1:41:29,  3.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1061/20000 [05:58<1:40:26,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1062/20000 [05:58<1:41:52,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1063/20000 [05:58<1:43:53,  3.04it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1064/20000 [05:59<1:42:19,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 7


Training MENACE:   5%|▌         | 1065/20000 [05:59<1:43:27,  3.05it/s]

Exploiting: Chose move 0 with Q-value 1.29
Exploring: Chose move 5
Exploring: Chose move 3
Updated Q-value for move 3 on board XO--OXOX-: -1.00
Updated Q-value for move 5 on board XO----OX-: 0.10
Updated Q-value for move 0 on board -O-----X-: 1.29
Updated Q-value for move 7 on board ---------: 1.59
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1066/20000 [05:59<1:47:09,  2.94it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1067/20000 [06:00<1:45:47,  2.98it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1068/20000 [06:00<1:43:12,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1069/20000 [06:00<1:43:27,  3.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1070/20000 [06:01<1:46:17,  2.97it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 6


Training MENACE:   5%|▌         | 1071/20000 [06:01<1:42:38,  3.07it/s]

Exploiting: Chose move 0 with Q-value 1.29
Exploiting: Chose move 5 with Q-value 0.92
Exploiting: Chose move 7 with Q-value 0.50
Exploring: Chose move 2
Updated Q-value for move 2 on board XO-OOXXXO: 0.00
Updated Q-value for move 7 on board XO-OOXX--: 0.50
Updated Q-value for move 5 on board X--OO-X--: 0.93
Updated Q-value for move 0 on board ----O-X--: 1.29
Updated Q-value for move 6 on board ---------: 1.60
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1073/20000 [06:02<1:39:32,  3.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.94
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 1 on board O-XXXOO--: 0.50
Updated Q-value for move 3 on board O-X-X-O--: 0.94
Updated Q-value for move 2 on board O---X----: 1.33
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 w

Training MENACE:   5%|▌         | 1074/20000 [06:02<1:40:10,  3.15it/s]

Exploring: Chose move 1
Exploiting: Chose move 5 with Q-value 0.00
Exploiting: Chose move 6 with Q-value -1.00
Updated Q-value for move 6 on board OXOXOX---: -1.00
Updated Q-value for move 5 on board OX-XO----: -0.04
Updated Q-value for move 1 on board O--X-----: 0.14
Updated Q-value for move 3 on board ---------: 0.81
Exploring: Chose move 2


Training MENACE:   5%|▌         | 1076/20000 [06:03<1:35:24,  3.31it/s]

Exploiting: Chose move 0 with Q-value 1.31
Exploiting: Chose move 7 with Q-value 0.93
Exploiting: Chose move 5 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.50
Updated Q-value for move 7 on board XOX-O----: 0.94
Updated Q-value for move 0 on board --X-O----: 1.31
Updated Q-value for move 2 on board ---------: 1.63
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1077/20000 [06:03<1:37:30,  3.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1078/20000 [06:03<1:37:26,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1079/20000 [06:04<1:37:03,  3.25it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1080/20000 [06:04<1:35:49,  3.29it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1081/20000 [06:04<1:34:43,  3.33it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1082/20000 [06:04<1:39:03,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1083/20000 [06:05<1:40:25,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 5


Training MENACE:   5%|▌         | 1084/20000 [06:05<1:42:00,  3.09it/s]

Exploiting: Chose move 0 with Q-value 1.28
Exploring: Chose move 1
Exploiting: Chose move 6 with Q-value 0.49
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 6 on board XXOOOX---: 0.49
Updated Q-value for move 1 on board X-OO-X---: 0.92
Updated Q-value for move 0 on board --O--X---: 1.28
Updated Q-value for move 5 on board ---------: 1.57
Exploring: Chose move 0


Training MENACE:   5%|▌         | 1085/20000 [06:06<1:58:47,  2.65it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 3
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1086/20000 [06:06<2:14:01,  2.35it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1087/20000 [06:07<2:22:11,  2.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1088/20000 [06:07<2:30:46,  2.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1090/20000 [06:08<2:28:13,  2.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 1


Training MENACE:   5%|▌         | 1091/20000 [06:08<2:13:06,  2.37it/s]

Exploiting: Chose move 2 with Q-value 0.29
Exploring: Chose move 8
Exploiting: Chose move 4 with Q-value -1.00
Updated Q-value for move 4 on board OXXO-O--X: -1.00
Updated Q-value for move 8 on board OXXO-----: -0.29
Updated Q-value for move 2 on board OX-------: 0.29
Updated Q-value for move 1 on board ---------: 0.79
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1092/20000 [06:09<2:03:45,  2.55it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1093/20000 [06:09<1:49:51,  2.87it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1094/20000 [06:09<1:47:44,  2.92it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploring: Chose move 5
Updated Q-value for move 5 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1095/20000 [06:10<1:43:40,  3.04it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1096/20000 [06:10<1:42:04,  3.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1097/20000 [06:10<1:42:05,  3.09it/s]

Exploring: Chose move 6
Exploring: Chose move 3
Updated Q-value for move 3 on board O-O-X-X--: -1.00
Updated Q-value for move 6 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 5


Training MENACE:   5%|▌         | 1098/20000 [06:11<1:40:10,  3.15it/s]

Exploiting: Chose move 0 with Q-value 1.28
Exploiting: Chose move 1 with Q-value 0.92
Exploring: Chose move 6
Exploring: Chose move 8
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 6 on board XXOOOX---: 0.50
Updated Q-value for move 1 on board X-OO-X---: 0.93
Updated Q-value for move 0 on board --O--X---: 1.29
Updated Q-value for move 5 on board ---------: 1.58
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   5%|▌         | 1099/20000 [06:11<1:39:24,  3.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1101/20000 [06:11<1:33:13,  3.38it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 7


Training MENACE:   6%|▌         | 1102/20000 [06:12<1:39:49,  3.16it/s]

Exploiting: Chose move 0 with Q-value 1.29
Exploiting: Chose move 2 with Q-value 0.92
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.50
Updated Q-value for move 2 on board XO----OX-: 0.93
Updated Q-value for move 0 on board -O-----X-: 1.29
Updated Q-value for move 7 on board ---------: 1.60
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1103/20000 [06:12<1:41:08,  3.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1105/20000 [06:13<1:38:53,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1106/20000 [06:13<1:39:02,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1107/20000 [06:13<1:41:19,  3.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1108/20000 [06:14<1:43:52,  3.03it/s]

Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOOXX-OX: 0.00
Updated Q-value for move 5 on board OXO-X--OX: 0.23
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1110/20000 [06:14<1:42:55,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 5
Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value -1.00
Updated Q-value for move 1 on board O-OOXX--X: -1.00
Updated Q-value for move 8 on board O--OXX---: 0.00
Updated Q-value for move 5 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1111/20000 [06:15<1:40:30,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1112/20000 [06:15<1:40:15,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1113/20000 [06:15<1:40:32,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1114/20000 [06:16<1:39:02,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 4


Training MENACE:   6%|▌         | 1115/20000 [06:16<1:40:38,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1116/20000 [06:16<1:40:12,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1117/20000 [06:17<1:35:07,  3.31it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1118/20000 [06:17<1:36:28,  3.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1119/20000 [06:17<1:35:32,  3.29it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1120/20000 [06:18<1:36:29,  3.26it/s]

Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1121/20000 [06:18<1:37:29,  3.23it/s]

Exploring: Chose move 6
Exploring: Chose move 5
Updated Q-value for move 5 on board O-O-X-X--: -1.00
Updated Q-value for move 6 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1122/20000 [06:18<1:49:05,  2.88it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1123/20000 [06:19<2:05:23,  2.51it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 5


Training MENACE:   6%|▌         | 1124/20000 [06:19<2:17:06,  2.29it/s]

Exploiting: Chose move 0 with Q-value 1.29
Exploiting: Chose move 1 with Q-value 0.93
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 6 on board XXOOOX---: 0.50
Updated Q-value for move 1 on board X-OO-X---: 0.93
Updated Q-value for move 0 on board --O--X---: 1.29
Updated Q-value for move 5 on board ---------: 1.59
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1125/20000 [06:20<2:24:21,  2.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1126/20000 [06:20<2:28:43,  2.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1127/20000 [06:21<2:32:50,  2.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1129/20000 [06:22<2:08:26,  2.45it/s]

Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1130/20000 [06:22<1:59:14,  2.64it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1131/20000 [06:22<1:55:40,  2.72it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1132/20000 [06:23<1:50:54,  2.84it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1133/20000 [06:23<1:47:55,  2.91it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1134/20000 [06:23<1:46:27,  2.95it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 8


Training MENACE:   6%|▌         | 1135/20000 [06:23<1:41:08,  3.11it/s]

Exploiting: Chose move 0 with Q-value 1.28
Exploiting: Chose move 7 with Q-value 0.92
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OOOXX: 0.00
Updated Q-value for move 2 on board XO--O-OXX: 0.50
Updated Q-value for move 7 on board XO--O---X: 0.93
Updated Q-value for move 0 on board ----O---X: 1.29
Updated Q-value for move 8 on board ---------: 1.58
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1137/20000 [06:24<1:37:14,  3.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploit

Training MENACE:   6%|▌         | 1138/20000 [06:24<1:37:55,  3.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1139/20000 [06:25<1:38:32,  3.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 6


Training MENACE:   6%|▌         | 1140/20000 [06:25<1:37:45,  3.22it/s]

Exploiting: Chose move 0 with Q-value 1.29
Exploiting: Chose move 5 with Q-value 0.93
Exploiting: Chose move 7 with Q-value 0.50
Exploiting: Chose move 2 with Q-value 0.00
Updated Q-value for move 2 on board XO-OOXXXO: 0.00
Updated Q-value for move 7 on board XO-OOXX--: 0.50
Updated Q-value for move 5 on board X--OO-X--: 0.93
Updated Q-value for move 0 on board ----O-X--: 1.30
Updated Q-value for move 6 on board ---------: 1.60
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1142/20000 [06:26<1:30:42,  3.46it/s]

Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.94
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 1 on board O-OOXXX--: 0.50
Updated Q-value for move 6 on board O--OXX---: 0.94
Updated Q-value for move 5 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1143/20000 [06:26<1:32:26,  3.40it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1144/20000 [06:26<1:32:43,  3.39it/s]

Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.94
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 1 on board O-XXXOO--: 0.50
Updated Q-value for move 3 on board O-X-X-O--: 0.94
Updated Q-value for move 2 on board O---X----: 1.33
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1145/20000 [06:26<1:33:24,  3.36it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1146/20000 [06:27<1:31:46,  3.42it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1147/20000 [06:27<1:33:14,  3.37it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1149/20000 [06:28<1:34:22,  3.33it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:   6%|▌         | 1151/20000 [06:28<1:34:16,  3.33it/s]

Exploiting: Chose move 4 with Q-value 0.59
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 4 on board O--X-----: 0.66
Updated Q-value for move 3 on board ---------: 0.84
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1152/20000 [06:29<1:35:48,  3.28it/s]

Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.94
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 1 on board O-OOXXX--: 0.50
Updated Q-value for move 6 on board O--OXX---: 0.94
Updated Q-value for move 5 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:   6%|▌         | 1153/20000 [06:29<1:35:54,  3.27it/s]

Exploiting: Chose move 4 with Q-value 0.66
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 4 on board O--X-----: 0.73
Updated Q-value for move 3 on board ---------: 0.87
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1154/20000 [06:29<1:39:00,  3.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1155/20000 [06:30<1:40:15,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1157/20000 [06:30<1:35:11,  3.30it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1158/20000 [06:30<1:37:47,  3.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1159/20000 [06:31<1:38:03,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1160/20000 [06:31<1:37:06,  3.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1161/20000 [06:32<1:56:54,  2.69it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1162/20000 [06:32<2:08:46,  2.44it/s]

Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.94
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 1 on board O-XXXOO--: 0.50
Updated Q-value for move 3 on board O-X-X-O--: 0.94
Updated Q-value for move 2 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1163/20000 [06:33<2:21:05,  2.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1164/20000 [06:33<2:26:42,  2.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1165/20000 [06:34<2:28:41,  2.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1166/20000 [06:34<2:35:26,  2.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1167/20000 [06:34<2:16:56,  2.29it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1168/20000 [06:35<2:06:59,  2.47it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1169/20000 [06:35<1:56:49,  2.69it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1170/20000 [06:35<1:50:43,  2.83it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1171/20000 [06:36<1:49:00,  2.88it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 7


Training MENACE:   6%|▌         | 1172/20000 [06:36<1:43:05,  3.04it/s]

Exploiting: Chose move 0 with Q-value 1.29
Exploiting: Chose move 2 with Q-value 0.93
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.50
Updated Q-value for move 2 on board XO----OX-: 0.93
Updated Q-value for move 0 on board -O-----X-: 1.30
Updated Q-value for move 7 on board ---------: 1.61
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1173/20000 [06:36<1:46:07,  2.96it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1174/20000 [06:37<1:49:50,  2.86it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 8


Training MENACE:   6%|▌         | 1175/20000 [06:37<1:43:18,  3.04it/s]

Exploiting: Chose move 0 with Q-value 1.29
Exploiting: Chose move 7 with Q-value 0.93
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OOOXX: 0.00
Updated Q-value for move 2 on board XO--O-OXX: 0.50
Updated Q-value for move 7 on board XO--O---X: 0.93
Updated Q-value for move 0 on board ----O---X: 1.29
Updated Q-value for move 8 on board ---------: 1.59
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1176/20000 [06:37<1:41:07,  3.10it/s]

Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:   6%|▌         | 1177/20000 [06:38<1:40:28,  3.12it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 8
Updated Q-value for move 8 on board XO--OXOX-: -1.00
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1178/20000 [06:38<1:41:35,  3.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1179/20000 [06:38<1:42:34,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 2


Training MENACE:   6%|▌         | 1180/20000 [06:39<1:38:34,  3.18it/s]

Exploiting: Chose move 0 with Q-value 1.31
Exploiting: Chose move 7 with Q-value 0.94
Exploiting: Chose move 5 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.50
Updated Q-value for move 7 on board XOX-O----: 0.94
Updated Q-value for move 0 on board --X-O----: 1.31
Updated Q-value for move 2 on board ---------: 1.63
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1181/20000 [06:39<1:39:10,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1183/20000 [06:40<1:36:45,  3.24it/s]

Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1184/20000 [06:40<1:37:14,  3.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1185/20000 [06:40<1:36:07,  3.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1186/20000 [06:41<1:42:15,  3.07it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1187/20000 [06:41<1:41:12,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1188/20000 [06:41<1:38:11,  3.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1189/20000 [06:41<1:40:50,  3.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 2


Training MENACE:   6%|▌         | 1190/20000 [06:42<1:40:50,  3.11it/s]

Exploiting: Chose move 0 with Q-value 1.31
Exploiting: Chose move 7 with Q-value 0.94
Exploiting: Chose move 5 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.50
Updated Q-value for move 7 on board XOX-O----: 0.94
Updated Q-value for move 0 on board --X-O----: 1.32
Updated Q-value for move 2 on board ---------: 1.64
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1191/20000 [06:42<1:39:48,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1192/20000 [06:42<1:41:45,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploring: Chose move 6
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1193/20000 [06:43<1:35:02,  3.30it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1194/20000 [06:43<1:38:19,  3.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1195/20000 [06:43<1:38:20,  3.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:   6%|▌         | 1196/20000 [06:44<1:35:50,  3.27it/s]

Exploring: Chose move 5
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1197/20000 [06:44<1:39:48,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1198/20000 [06:44<1:43:15,  3.03it/s]

Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.94
Exploiting: Chose move 1 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 1 on board O-XXXOO--: 0.50
Updated Q-value for move 3 on board O-X-X-O--: 0.94
Updated Q-value for move 2 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1199/20000 [06:45<1:59:49,  2.61it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1200/20000 [06:45<2:18:54,  2.26it/s]

Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.94
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 1 on board O-OOXXX--: 0.50
Updated Q-value for move 6 on board O--OXX---: 0.94
Updated Q-value for move 5 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1201/20000 [06:46<2:25:55,  2.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1202/20000 [06:46<2:27:50,  2.12it/s]

Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.94
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 1 on board O-XXXOO--: 0.50
Updated Q-value for move 3 on board O-X-X-O--: 0.94
Updated Q-value for move 2 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1203/20000 [06:47<2:28:41,  2.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1204/20000 [06:47<2:25:19,  2.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploring: Chose move 6
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1205/20000 [06:48<2:10:22,  2.40it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1206/20000 [06:48<1:58:29,  2.64it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1207/20000 [06:48<1:52:55,  2.77it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1208/20000 [06:49<1:48:10,  2.90it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1209/20000 [06:49<1:46:28,  2.94it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1210/20000 [06:49<1:45:28,  2.97it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1211/20000 [06:50<1:43:06,  3.04it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1212/20000 [06:50<1:43:07,  3.04it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1213/20000 [06:50<1:41:31,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 2


Training MENACE:   6%|▌         | 1214/20000 [06:50<1:41:03,  3.10it/s]

Exploiting: Chose move 0 with Q-value 1.32
Exploiting: Chose move 7 with Q-value 0.94
Exploiting: Chose move 5 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.50
Updated Q-value for move 7 on board XOX-O----: 0.94
Updated Q-value for move 0 on board --X-O----: 1.32
Updated Q-value for move 2 on board ---------: 1.64
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1215/20000 [06:51<1:38:33,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 8


Training MENACE:   6%|▌         | 1216/20000 [06:51<1:36:15,  3.25it/s]

Exploiting: Chose move 0 with Q-value 1.29
Exploiting: Chose move 7 with Q-value 0.93
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OOOXX: 0.00
Updated Q-value for move 2 on board XO--O-OXX: 0.50
Updated Q-value for move 7 on board XO--O---X: 0.93
Updated Q-value for move 0 on board ----O---X: 1.30
Updated Q-value for move 8 on board ---------: 1.60
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1217/20000 [06:51<1:40:23,  3.12it/s]

Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:   6%|▌         | 1218/20000 [06:52<1:38:05,  3.19it/s]

Exploiting: Chose move 4 with Q-value 0.73
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 4 on board O--X-----: 0.79
Updated Q-value for move 3 on board ---------: 0.90
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1219/20000 [06:52<1:38:54,  3.16it/s]

Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1220/20000 [06:52<1:43:15,  3.03it/s]

Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1222/20000 [06:53<1:39:25,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploit

Training MENACE:   6%|▌         | 1223/20000 [06:53<1:49:07,  2.87it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 4


Training MENACE:   6%|▌         | 1224/20000 [06:54<1:43:43,  3.02it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1225/20000 [06:54<1:41:16,  3.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1227/20000 [06:55<1:38:27,  3.18it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:   6%|▌         | 1228/20000 [06:55<1:38:52,  3.16it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1229/20000 [06:55<1:32:50,  3.37it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1230/20000 [06:56<1:33:38,  3.34it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1231/20000 [06:56<1:34:38,  3.31it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1232/20000 [06:56<1:33:34,  3.34it/s]

Exploring: Chose move 5
Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value -1.00
Updated Q-value for move 1 on board O-OOXX--X: -1.00
Updated Q-value for move 8 on board O--OXX---: -0.04
Updated Q-value for move 5 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1233/20000 [06:56<1:37:59,  3.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1234/20000 [06:57<1:35:54,  3.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1235/20000 [06:57<1:36:56,  3.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 8


Training MENACE:   6%|▌         | 1236/20000 [06:58<1:48:14,  2.89it/s]

Exploiting: Chose move 0 with Q-value 1.30
Exploiting: Chose move 7 with Q-value 0.93
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OOOXX: 0.00
Updated Q-value for move 2 on board XO--O-OXX: 0.50
Updated Q-value for move 7 on board XO--O---X: 0.93
Updated Q-value for move 0 on board ----O---X: 1.30
Updated Q-value for move 8 on board ---------: 1.61
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1237/20000 [06:58<2:05:48,  2.49it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1238/20000 [06:59<2:16:30,  2.29it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1239/20000 [06:59<2:20:13,  2.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1240/20000 [07:00<2:25:49,  2.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploring: Chose move 6
Exploring: Chose move 8
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1242/20000 [07:00<2:24:40,  2.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1243/20000 [07:01<2:11:24,  2.38it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1244/20000 [07:01<2:04:58,  2.50it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1245/20000 [07:01<1:56:36,  2.68it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1246/20000 [07:02<1:52:58,  2.77it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1247/20000 [07:02<1:49:08,  2.86it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▌         | 1248/20000 [07:02<1:46:00,  2.95it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 7


Training MENACE:   6%|▌         | 1249/20000 [07:03<1:43:54,  3.01it/s]

Exploiting: Chose move 0 with Q-value 1.30
Exploiting: Chose move 2 with Q-value 0.93
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.50
Updated Q-value for move 2 on board XO----OX-: 0.93
Updated Q-value for move 0 on board -O-----X-: 1.30
Updated Q-value for move 7 on board ---------: 1.61
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1250/20000 [07:03<1:42:46,  3.04it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1251/20000 [07:03<1:41:35,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1252/20000 [07:04<1:41:37,  3.07it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1253/20000 [07:04<1:40:59,  3.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 5


Training MENACE:   6%|▋         | 1254/20000 [07:04<1:41:00,  3.09it/s]

Exploiting: Chose move 0 with Q-value 1.29
Exploiting: Chose move 1 with Q-value 0.93
Exploiting: Chose move 6 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 6 on board XXOOOX---: 0.50
Updated Q-value for move 1 on board X-OO-X---: 0.93
Updated Q-value for move 0 on board --O--X---: 1.30
Updated Q-value for move 5 on board ---------: 1.60
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1255/20000 [07:05<1:44:09,  3.00it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1256/20000 [07:05<1:44:20,  2.99it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1257/20000 [07:05<1:42:58,  3.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1258/20000 [07:06<1:42:43,  3.04it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1259/20000 [07:06<1:41:09,  3.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1260/20000 [07:06<1:42:50,  3.04it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1261/20000 [07:07<1:44:24,  2.99it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1262/20000 [07:07<1:38:08,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1263/20000 [07:07<1:39:01,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1264/20000 [07:08<1:42:07,  3.06it/s]

Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1265/20000 [07:08<1:41:19,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1266/20000 [07:08<1:42:41,  3.04it/s]

Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.94
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 1 on board O-XXXOO--: 0.50
Updated Q-value for move 3 on board O-X-X-O--: 0.95
Updated Q-value for move 2 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1267/20000 [07:09<1:41:21,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1268/20000 [07:09<1:39:50,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1269/20000 [07:09<1:38:21,  3.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1270/20000 [07:10<1:42:13,  3.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1271/20000 [07:10<1:40:49,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1272/20000 [07:10<1:41:19,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1273/20000 [07:11<1:57:42,  2.65it/s]

Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.94
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 1 on board O-OOXXX--: 0.50
Updated Q-value for move 6 on board O--OXX---: 0.94
Updated Q-value for move 5 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1274/20000 [07:11<2:12:05,  2.36it/s]

Exploring: Chose move 6
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 1 on board O-O-X-X--: 0.95
Updated Q-value for move 6 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1275/20000 [07:12<2:19:34,  2.24it/s]

Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.94
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 1 on board O-OOXXX--: 0.50
Updated Q-value for move 6 on board O--OXX---: 0.95
Updated Q-value for move 5 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1276/20000 [07:12<2:23:43,  2.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1277/20000 [07:13<2:28:47,  2.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1278/20000 [07:13<2:37:10,  1.99it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1279/20000 [07:14<2:23:13,  2.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1280/20000 [07:14<2:10:26,  2.39it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1281/20000 [07:14<2:03:23,  2.53it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1282/20000 [07:15<1:58:33,  2.63it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1283/20000 [07:15<1:51:55,  2.79it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1284/20000 [07:15<1:55:21,  2.70it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1285/20000 [07:16<1:48:59,  2.86it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1286/20000 [07:16<1:45:20,  2.96it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1287/20000 [07:16<1:47:21,  2.91it/s]

Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.95
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 1 on board O-OOXXX--: 0.50
Updated Q-value for move 6 on board O--OXX---: 0.95
Updated Q-value for move 5 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1288/20000 [07:17<1:43:30,  3.01it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:   6%|▋         | 1289/20000 [07:17<1:42:47,  3.03it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 8
Updated Q-value for move 8 on board XO--OXOX-: -1.00
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1290/20000 [07:17<1:39:41,  3.13it/s]

Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1291/20000 [07:18<1:38:46,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1292/20000 [07:18<1:39:30,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1293/20000 [07:18<1:40:19,  3.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1294/20000 [07:19<1:39:13,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 4


Training MENACE:   6%|▋         | 1295/20000 [07:19<1:36:28,  3.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1296/20000 [07:19<1:37:42,  3.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1297/20000 [07:20<1:38:17,  3.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1298/20000 [07:20<1:37:16,  3.20it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1299/20000 [07:20<1:32:45,  3.36it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   6%|▋         | 1300/20000 [07:20<1:36:29,  3.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1301/20000 [07:21<1:37:08,  3.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1302/20000 [07:21<1:36:59,  3.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1303/20000 [07:21<1:37:54,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1304/20000 [07:22<1:36:36,  3.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1305/20000 [07:22<1:39:08,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1306/20000 [07:22<1:39:13,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1307/20000 [07:23<1:40:22,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1308/20000 [07:23<1:42:20,  3.04it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1309/20000 [07:23<1:40:06,  3.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1310/20000 [07:24<1:49:05,  2.86it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1311/20000 [07:24<2:04:54,  2.49it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1312/20000 [07:25<2:16:09,  2.29it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 6


Training MENACE:   7%|▋         | 1313/20000 [07:25<2:23:31,  2.17it/s]

Exploiting: Chose move 0 with Q-value 1.30
Exploring: Chose move 7
Updated Q-value for move 7 on board X--OO-X--: -1.00
Updated Q-value for move 0 on board ----O-X--: 1.30
Updated Q-value for move 6 on board ---------: 1.61
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1314/20000 [07:26<2:31:05,  2.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1316/20000 [07:27<2:24:42,  2.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1317/20000 [07:27<2:11:01,  2.38it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1318/20000 [07:27<2:01:25,  2.56it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1319/20000 [07:28<1:54:48,  2.71it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1320/20000 [07:28<1:47:14,  2.90it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1321/20000 [07:28<1:45:57,  2.94it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1322/20000 [07:29<1:46:10,  2.93it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1323/20000 [07:29<1:42:57,  3.02it/s]

Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.95
Exploring: Chose move 7
Updated Q-value for move 7 on board O-OOXXX--: -1.00
Updated Q-value for move 6 on board O--OXX---: 0.95
Updated Q-value for move 5 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1324/20000 [07:29<1:39:39,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1325/20000 [07:30<1:39:04,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1326/20000 [07:30<1:39:52,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 4


Training MENACE:   7%|▋         | 1327/20000 [07:30<1:41:45,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 7


Training MENACE:   7%|▋         | 1328/20000 [07:31<1:43:59,  2.99it/s]

Exploiting: Chose move 0 with Q-value 1.30
Exploring: Chose move 3
Exploring: Chose move 8
Updated Q-value for move 8 on board XO-XO-OX-: -1.00
Updated Q-value for move 3 on board XO----OX-: 0.05
Updated Q-value for move 0 on board -O-----X-: 1.31
Updated Q-value for move 7 on board ---------: 1.62
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1329/20000 [07:31<1:46:27,  2.92it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 2


Training MENACE:   7%|▋         | 1331/20000 [07:32<1:39:00,  3.14it/s]

Exploiting: Chose move 0 with Q-value 1.32
Exploiting: Chose move 7 with Q-value 0.94
Exploiting: Chose move 5 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.50
Updated Q-value for move 7 on board XOX-O----: 0.94
Updated Q-value for move 0 on board --X-O----: 1.32
Updated Q-value for move 2 on board ---------: 1.65
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 8


Training MENACE:   7%|▋         | 1332/20000 [07:32<1:37:13,  3.20it/s]

Exploiting: Chose move 0 with Q-value 1.30
Exploiting: Chose move 7 with Q-value 0.93
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 3
Updated Q-value for move 3 on board XOX-OOOXX: 0.00
Updated Q-value for move 2 on board XO--O-OXX: 0.50
Updated Q-value for move 7 on board XO--O---X: 0.93
Updated Q-value for move 0 on board ----O---X: 1.30
Updated Q-value for move 8 on board ---------: 1.61
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1333/20000 [07:32<1:35:57,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1334/20000 [07:32<1:37:09,  3.20it/s]

Exploring: Chose move 3
Exploring: Chose move 8
Exploiting: Chose move 2 with Q-value 0.05
Exploiting: Chose move 7 with Q-value 0.00
Updated Q-value for move 7 on board OOXXXOO-X: 0.00
Updated Q-value for move 2 on board OO-XXO--X: 0.10
Updated Q-value for move 8 on board O--XXO---: 0.11
Updated Q-value for move 3 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1336/20000 [07:33<1:37:20,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.95
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 1 on board O-OOXXX--: 0.50
Updated Q-value for move 6 on board O--OXX---: 0.95
Updated Q-value for move 5 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1338/20000 [07:34<1:37:24,  3.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1339/20000 [07:34<1:32:58,  3.34it/s]

Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1340/20000 [07:34<1:36:27,  3.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1341/20000 [07:35<1:37:14,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploring: Chose move 2
Updated Q-value for move 2 on board OX-OXX-O-: -1.00
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1342/20000 [07:35<1:41:00,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1343/20000 [07:35<1:38:41,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1344/20000 [07:36<1:37:18,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1345/20000 [07:36<1:39:44,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1346/20000 [07:36<1:38:28,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1347/20000 [07:37<1:34:24,  3.29it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1348/20000 [07:37<1:47:28,  2.89it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1349/20000 [07:38<2:05:44,  2.47it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1350/20000 [07:38<2:19:51,  2.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1351/20000 [07:39<2:28:19,  2.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 6


Training MENACE:   7%|▋         | 1352/20000 [07:39<2:34:07,  2.02it/s]

Exploiting: Chose move 0 with Q-value 1.30
Exploiting: Chose move 5 with Q-value 0.93
Exploiting: Chose move 7 with Q-value 0.50
Exploiting: Chose move 2 with Q-value 0.00
Updated Q-value for move 2 on board XO-OOXXXO: 0.00
Updated Q-value for move 7 on board XO-OOXX--: 0.50
Updated Q-value for move 5 on board X--OO-X--: 0.93
Updated Q-value for move 0 on board ----O-X--: 1.30
Updated Q-value for move 6 on board ---------: 1.62
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1353/20000 [07:40<2:40:30,  1.94it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1354/20000 [07:40<2:21:57,  2.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1355/20000 [07:40<2:12:05,  2.35it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1356/20000 [07:41<2:01:07,  2.57it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1357/20000 [07:41<1:54:44,  2.71it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1358/20000 [07:41<1:50:44,  2.81it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1359/20000 [07:42<1:49:43,  2.83it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1360/20000 [07:42<1:45:15,  2.95it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1361/20000 [07:42<1:45:49,  2.94it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 2


Training MENACE:   7%|▋         | 1362/20000 [07:43<1:38:41,  3.15it/s]

Exploiting: Chose move 0 with Q-value 1.32
Exploiting: Chose move 7 with Q-value 0.94
Exploring: Chose move 8
Updated Q-value for move 8 on board XOXOO--X-: -1.00
Updated Q-value for move 7 on board XOX-O----: 0.94
Updated Q-value for move 0 on board --X-O----: 1.33
Updated Q-value for move 2 on board ---------: 1.65
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1363/20000 [07:43<1:35:01,  3.27it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1364/20000 [07:43<1:36:10,  3.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1365/20000 [07:44<1:38:32,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1366/20000 [07:44<1:37:47,  3.18it/s]

Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1367/20000 [07:44<1:39:19,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 7


Training MENACE:   7%|▋         | 1368/20000 [07:44<1:39:46,  3.11it/s]

Exploiting: Chose move 0 with Q-value 1.31
Exploiting: Chose move 2 with Q-value 0.93
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.50
Updated Q-value for move 2 on board XO----OX-: 0.93
Updated Q-value for move 0 on board -O-----X-: 1.31
Updated Q-value for move 7 on board ---------: 1.62
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1369/20000 [07:45<1:41:32,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1370/20000 [07:45<1:41:29,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1371/20000 [07:46<1:43:38,  3.00it/s]

Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 1 on board O-XXXOO--: 0.50
Updated Q-value for move 3 on board O-X-X-O--: 0.95
Updated Q-value for move 2 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1372/20000 [07:46<1:42:21,  3.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1373/20000 [07:46<1:43:46,  2.99it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1374/20000 [07:47<1:44:27,  2.97it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1375/20000 [07:47<1:45:26,  2.94it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1376/20000 [07:47<1:37:46,  3.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1377/20000 [07:47<1:37:38,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1378/20000 [07:48<1:37:40,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1379/20000 [07:48<1:40:33,  3.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1380/20000 [07:48<1:44:34,  2.97it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1381/20000 [07:49<1:43:28,  3.00it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1382/20000 [07:49<1:42:40,  3.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1383/20000 [07:49<1:42:48,  3.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1384/20000 [07:50<1:40:38,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1385/20000 [07:50<2:01:09,  2.56it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploring: Chose move 6
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1386/20000 [07:51<2:14:18,  2.31it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:   7%|▋         | 1387/20000 [07:51<2:23:14,  2.17it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploring: Chose move 7
Exploring: Chose move 3
Updated Q-value for move 3 on board XO--OXOX-: -1.00
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 4


Training MENACE:   7%|▋         | 1388/20000 [07:52<2:30:41,  2.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1389/20000 [07:52<2:32:04,  2.04it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1390/20000 [07:53<2:32:58,  2.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1391/20000 [07:53<2:16:11,  2.28it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1392/20000 [07:54<2:03:42,  2.51it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1393/20000 [07:54<1:58:07,  2.63it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1394/20000 [07:54<1:52:34,  2.75it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1395/20000 [07:54<1:48:48,  2.85it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1396/20000 [07:55<1:45:27,  2.94it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1397/20000 [07:55<1:40:47,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1398/20000 [07:55<1:42:37,  3.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1399/20000 [07:56<1:44:45,  2.96it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1400/20000 [07:56<1:40:56,  3.07it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploring: Chose move 8
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1401/20000 [07:56<1:38:45,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1402/20000 [07:57<1:38:53,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1403/20000 [07:57<1:37:19,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1404/20000 [07:57<1:38:14,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1405/20000 [07:58<1:34:18,  3.29it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1406/20000 [07:58<1:36:52,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1407/20000 [07:58<1:38:27,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 1


Training MENACE:   7%|▋         | 1408/20000 [07:59<1:42:55,  3.01it/s]

Exploiting: Chose move 2 with Q-value 0.29
Exploiting: Chose move 7 with Q-value -0.29
Exploiting: Chose move 5 with Q-value -1.00
Updated Q-value for move 5 on board OXXOO--X-: -1.00
Updated Q-value for move 7 on board OXXO-----: -0.30
Updated Q-value for move 2 on board OX-------: 0.28
Updated Q-value for move 1 on board ---------: 0.79
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:   7%|▋         | 1409/20000 [07:59<1:45:00,  2.95it/s]

Exploiting: Chose move 4 with Q-value 0.79
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 4 on board O--X-----: 0.85
Updated Q-value for move 3 on board ---------: 0.94
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1410/20000 [07:59<1:43:06,  3.01it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1411/20000 [08:00<1:43:38,  2.99it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1412/20000 [08:00<1:40:59,  3.07it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1413/20000 [08:00<1:36:04,  3.22it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1414/20000 [08:01<1:34:52,  3.27it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1415/20000 [08:01<1:36:08,  3.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1416/20000 [08:01<1:36:43,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1417/20000 [08:01<1:35:04,  3.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1418/20000 [08:02<1:36:17,  3.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1419/20000 [08:02<1:37:36,  3.17it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1420/20000 [08:02<1:38:08,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1421/20000 [08:03<1:38:20,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1422/20000 [08:03<1:54:30,  2.70it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1423/20000 [08:04<2:10:11,  2.38it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1424/20000 [08:04<2:20:07,  2.21it/s]

Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1425/20000 [08:05<2:27:28,  2.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1426/20000 [08:05<2:35:11,  1.99it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1427/20000 [08:06<2:34:06,  2.01it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1428/20000 [08:06<2:19:48,  2.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1429/20000 [08:07<2:07:08,  2.43it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1430/20000 [08:07<1:56:18,  2.66it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1431/20000 [08:07<1:53:47,  2.72it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1432/20000 [08:07<1:46:51,  2.90it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1433/20000 [08:08<1:41:29,  3.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1434/20000 [08:08<1:44:52,  2.95it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 4


Training MENACE:   7%|▋         | 1435/20000 [08:08<1:42:15,  3.03it/s]

Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1437/20000 [08:09<1:39:25,  3.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1438/20000 [08:09<1:41:05,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1439/20000 [08:10<1:40:20,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1440/20000 [08:10<1:38:51,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:   7%|▋         | 1441/20000 [08:10<1:43:53,  2.98it/s]

Exploiting: Chose move 4 with Q-value 0.85
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 4 on board O--X-----: 0.90
Updated Q-value for move 3 on board ---------: 0.98
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1442/20000 [08:11<1:41:08,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1443/20000 [08:11<1:42:50,  3.01it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1444/20000 [08:11<1:46:58,  2.89it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1445/20000 [08:12<1:44:30,  2.96it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1446/20000 [08:12<1:42:15,  3.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1447/20000 [08:12<1:42:58,  3.00it/s]

Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 1 on board O-XXXOO--: 0.50
Updated Q-value for move 3 on board O-X-X-O--: 0.95
Updated Q-value for move 2 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1448/20000 [08:13<1:40:09,  3.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1449/20000 [08:13<1:39:04,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 6
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 1 on board O-O-X-X--: 0.95
Updated Q-value for move 6 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1451/20000 [08:14<1:36:43,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1452/20000 [08:14<1:36:42,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1453/20000 [08:14<1:39:06,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1454/20000 [08:15<1:40:08,  3.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 1


Training MENACE:   7%|▋         | 1455/20000 [08:15<1:39:17,  3.11it/s]

Exploiting: Chose move 2 with Q-value 0.28
Exploiting: Chose move 6 with Q-value -0.29
Exploiting: Chose move 5 with Q-value -1.00
Updated Q-value for move 5 on board OXXOO-X--: -1.00
Updated Q-value for move 6 on board OXXO-----: -0.30
Updated Q-value for move 2 on board OX-------: 0.28
Updated Q-value for move 1 on board ---------: 0.79
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1456/20000 [08:15<1:40:29,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1457/20000 [08:16<1:40:46,  3.07it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1458/20000 [08:16<1:50:11,  2.80it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1459/20000 [08:17<2:04:05,  2.49it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1460/20000 [08:17<2:12:01,  2.34it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1461/20000 [08:18<2:18:54,  2.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:   7%|▋         | 1462/20000 [08:18<2:24:50,  2.13it/s]

Exploiting: Chose move 4 with Q-value 0.90
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 4 on board O--X-----: 0.95
Updated Q-value for move 3 on board ---------: 1.01
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1463/20000 [08:19<2:29:41,  2.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1464/20000 [08:19<2:33:37,  2.01it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1465/20000 [08:19<2:19:02,  2.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1466/20000 [08:20<2:09:04,  2.39it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1467/20000 [08:20<2:01:09,  2.55it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1468/20000 [08:20<1:54:48,  2.69it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 8


Training MENACE:   7%|▋         | 1469/20000 [08:21<1:49:02,  2.83it/s]

Exploring: Chose move 1
Exploiting: Chose move 2 with Q-value 0.21
Exploiting: Chose move 3 with Q-value 0.17
Exploiting: Chose move 7 with Q-value 0.00
Updated Q-value for move 7 on board OXXXOOO-X: 0.00
Updated Q-value for move 3 on board OXX-OO--X: 0.20
Updated Q-value for move 2 on board OX--O---X: 0.26
Updated Q-value for move 1 on board ----O---X: 0.07
Updated Q-value for move 8 on board ---------: 1.62
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1470/20000 [08:21<1:53:26,  2.72it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 2


Training MENACE:   7%|▋         | 1471/20000 [08:21<1:48:25,  2.85it/s]

Exploiting: Chose move 0 with Q-value 1.33
Exploiting: Chose move 7 with Q-value 0.94
Exploiting: Chose move 5 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.50
Updated Q-value for move 7 on board XOX-O----: 0.94
Updated Q-value for move 0 on board --X-O----: 1.33
Updated Q-value for move 2 on board ---------: 1.66
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1472/20000 [08:22<1:48:27,  2.85it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1473/20000 [08:22<1:43:50,  2.97it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1474/20000 [08:22<1:40:58,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1475/20000 [08:23<1:43:37,  2.98it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1476/20000 [08:23<1:41:45,  3.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1477/20000 [08:23<1:40:59,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1478/20000 [08:24<1:40:02,  3.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1479/20000 [08:24<1:38:46,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1481/20000 [08:25<1:38:29,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1482/20000 [08:25<1:38:51,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1483/20000 [08:25<1:37:44,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1484/20000 [08:26<1:39:29,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1485/20000 [08:26<1:39:31,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1486/20000 [08:26<1:36:41,  3.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1487/20000 [08:27<1:34:28,  3.27it/s]

Exploring: Chose move 6
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 1 on board O-O-X-X--: 0.95
Updated Q-value for move 6 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1489/20000 [08:27<1:34:14,  3.27it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 6


Training MENACE:   7%|▋         | 1490/20000 [08:27<1:33:38,  3.29it/s]

Exploiting: Chose move 0 with Q-value 1.30
Exploiting: Chose move 5 with Q-value 0.93
Exploring: Chose move 7
Exploring: Chose move 2
Updated Q-value for move 2 on board XO-OOXXXO: 0.00
Updated Q-value for move 7 on board XO-OOXX--: 0.50
Updated Q-value for move 5 on board X--OO-X--: 0.93
Updated Q-value for move 0 on board ----O-X--: 1.31
Updated Q-value for move 6 on board ---------: 1.62
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1491/20000 [08:28<1:35:08,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1492/20000 [08:28<1:36:48,  3.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1494/20000 [08:29<1:35:23,  3.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1495/20000 [08:29<1:35:45,  3.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1496/20000 [08:30<1:56:38,  2.64it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1497/20000 [08:30<2:10:33,  2.36it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1498/20000 [08:31<2:17:50,  2.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   7%|▋         | 1499/20000 [08:31<2:21:54,  2.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1500/20000 [08:32<2:27:35,  2.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1501/20000 [08:32<2:37:47,  1.95it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1502/20000 [08:33<2:21:27,  2.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1503/20000 [08:33<2:04:11,  2.48it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1504/20000 [08:33<1:59:18,  2.58it/s]

Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploring: Chose move 6
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1505/20000 [08:33<1:51:38,  2.76it/s]

Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1506/20000 [08:34<1:47:59,  2.85it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1507/20000 [08:34<1:45:18,  2.93it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1508/20000 [08:34<1:42:08,  3.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1509/20000 [08:35<1:37:50,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1510/20000 [08:35<1:40:59,  3.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1512/20000 [08:36<1:35:11,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1513/20000 [08:36<1:35:33,  3.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1514/20000 [08:36<1:40:13,  3.07it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1515/20000 [08:37<1:39:59,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1516/20000 [08:37<1:38:16,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1517/20000 [08:37<1:40:38,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1518/20000 [08:38<1:38:26,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1519/20000 [08:38<1:38:43,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1520/20000 [08:38<1:35:02,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1521/20000 [08:39<1:38:59,  3.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1522/20000 [08:39<1:33:14,  3.30it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1523/20000 [08:39<1:28:49,  3.47it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1525/20000 [08:40<1:30:25,  3.41it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1526/20000 [08:40<1:33:43,  3.29it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1527/20000 [08:40<1:36:12,  3.20it/s]

Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1528/20000 [08:41<1:36:22,  3.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1529/20000 [08:41<1:39:22,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 4


Training MENACE:   8%|▊         | 1530/20000 [08:41<1:39:48,  3.08it/s]

Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1531/20000 [08:42<1:38:23,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1532/20000 [08:42<1:40:16,  3.07it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1533/20000 [08:42<1:42:43,  3.00it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1534/20000 [08:43<2:02:18,  2.52it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1535/20000 [08:43<2:13:50,  2.30it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1536/20000 [08:44<2:23:42,  2.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1537/20000 [08:44<2:30:04,  2.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1538/20000 [08:45<2:32:28,  2.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1539/20000 [08:45<2:33:31,  2.00it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1540/20000 [08:46<2:17:11,  2.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1541/20000 [08:46<2:04:23,  2.47it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1542/20000 [08:46<1:54:28,  2.69it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1543/20000 [08:47<1:48:14,  2.84it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1544/20000 [08:47<1:46:43,  2.88it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1545/20000 [08:47<1:42:39,  3.00it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1546/20000 [08:48<1:42:07,  3.01it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:   8%|▊         | 1547/20000 [08:48<1:41:56,  3.02it/s]

Exploiting: Chose move 4 with Q-value 0.95
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 4 on board O--X-----: 0.99
Updated Q-value for move 3 on board ---------: 1.05
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1548/20000 [08:48<1:38:32,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1549/20000 [08:49<1:37:46,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 2


Training MENACE:   8%|▊         | 1550/20000 [08:49<1:38:19,  3.13it/s]

Exploiting: Chose move 0 with Q-value 1.33
Exploiting: Chose move 7 with Q-value 0.94
Exploiting: Chose move 5 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.50
Updated Q-value for move 7 on board XOX-O----: 0.94
Updated Q-value for move 0 on board --X-O----: 1.33
Updated Q-value for move 2 on board ---------: 1.66
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1551/20000 [08:49<1:41:44,  3.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1552/20000 [08:50<1:43:15,  2.98it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 7


Training MENACE:   8%|▊         | 1553/20000 [08:50<1:40:27,  3.06it/s]

Exploiting: Chose move 0 with Q-value 1.31
Exploiting: Chose move 2 with Q-value 0.93
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.50
Updated Q-value for move 2 on board XO----OX-: 0.93
Updated Q-value for move 0 on board -O-----X-: 1.31
Updated Q-value for move 7 on board ---------: 1.63
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1554/20000 [08:50<1:41:37,  3.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1555/20000 [08:51<1:39:50,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1556/20000 [08:51<1:40:06,  3.07it/s]

Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.95
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 1 on board O-OOXXX--: 0.50
Updated Q-value for move 6 on board O--OXX---: 0.95
Updated Q-value for move 5 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1557/20000 [08:51<1:37:23,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1558/20000 [08:52<1:37:09,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1559/20000 [08:52<1:37:47,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1560/20000 [08:52<1:38:47,  3.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1561/20000 [08:52<1:35:19,  3.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1562/20000 [08:53<1:37:01,  3.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 4


Training MENACE:   8%|▊         | 1563/20000 [08:53<1:37:46,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1564/20000 [08:53<1:37:15,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1565/20000 [08:54<1:40:27,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1566/20000 [08:54<1:41:32,  3.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1567/20000 [08:54<1:38:23,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1569/20000 [08:55<1:37:57,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1570/20000 [08:55<1:39:12,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 8
Exploring: Chose move 5
Updated Q-value for move 5 on board O-O-X---X: -1.00
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1571/20000 [08:56<1:57:03,  2.62it/s]

Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:   8%|▊         | 1573/20000 [08:57<2:15:42,  2.26it/s]

Exploiting: Chose move 4 with Q-value 0.99
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 4 on board O--X-----: 1.02
Updated Q-value for move 3 on board ---------: 1.09
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1575/20000 [08:58<2:24:28,  2.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1576/20000 [08:58<2:28:47,  2.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1577/20000 [08:59<2:17:33,  2.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1578/20000 [08:59<2:02:04,  2.52it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1579/20000 [08:59<1:53:58,  2.69it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1580/20000 [09:00<1:46:34,  2.88it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1581/20000 [09:00<1:41:18,  3.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1582/20000 [09:00<1:40:28,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1583/20000 [09:01<1:37:46,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1584/20000 [09:01<1:35:18,  3.22it/s]

Exploring: Chose move 6
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 1 on board O-O-X-X--: 0.95
Updated Q-value for move 6 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1585/20000 [09:01<1:36:52,  3.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1586/20000 [09:02<1:38:23,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1587/20000 [09:02<1:37:11,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1588/20000 [09:02<1:37:09,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1589/20000 [09:02<1:37:37,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1590/20000 [09:03<1:36:57,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1591/20000 [09:03<1:37:13,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1593/20000 [09:04<1:36:02,  3.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 6


Training MENACE:   8%|▊         | 1594/20000 [09:04<1:36:43,  3.17it/s]

Exploiting: Chose move 0 with Q-value 1.31
Exploiting: Chose move 5 with Q-value 0.93
Exploiting: Chose move 7 with Q-value 0.50
Exploiting: Chose move 2 with Q-value 0.00
Updated Q-value for move 2 on board XO-OOXXXO: 0.00
Updated Q-value for move 7 on board XO-OOXX--: 0.50
Updated Q-value for move 5 on board X--OO-X--: 0.93
Updated Q-value for move 0 on board ----O-X--: 1.31
Updated Q-value for move 6 on board ---------: 1.63
Exploring: Chose move 3
Exploiting: Chose move 4 with Q-value 1.02
Exploiting: Chose move 1 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 4 on board O--X-----: 1.06
Updated Q-value for move 3 on board ---------: 1.13
Exploring: Chose move 5


Training MENACE:   8%|▊         | 1596/20000 [09:05<1:36:01,  3.19it/s]

Exploiting: Chose move 0 with Q-value 1.30
Exploiting: Chose move 1 with Q-value 0.93
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 6 on board XXOOOX---: 0.50
Updated Q-value for move 1 on board X-OO-X---: 0.93
Updated Q-value for move 0 on board --O--X---: 1.30
Updated Q-value for move 5 on board ---------: 1.60
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1597/20000 [09:05<1:38:09,  3.12it/s]

Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1598/20000 [09:05<1:36:06,  3.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1599/20000 [09:06<1:34:42,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1600/20000 [09:06<1:36:54,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 1


Training MENACE:   8%|▊         | 1601/20000 [09:06<1:35:52,  3.20it/s]

Exploiting: Chose move 2 with Q-value 0.28
Exploiting: Chose move 8 with Q-value -0.29
Exploiting: Chose move 4 with Q-value -1.00
Updated Q-value for move 4 on board OXXO-O--X: -1.00
Updated Q-value for move 8 on board OXXO-----: -0.30
Updated Q-value for move 2 on board OX-------: 0.28
Updated Q-value for move 1 on board ---------: 0.78
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1602/20000 [09:07<1:38:19,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1603/20000 [09:07<1:38:59,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1604/20000 [09:07<1:40:03,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1606/20000 [09:08<1:37:36,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1607/20000 [09:08<1:33:58,  3.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1608/20000 [09:09<1:37:03,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 0
Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1609/20000 [09:09<1:55:30,  2.65it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1610/20000 [09:10<2:09:41,  2.36it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1611/20000 [09:10<2:18:38,  2.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1612/20000 [09:11<2:27:20,  2.08it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1613/20000 [09:11<2:33:36,  1.99it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1614/20000 [09:12<2:34:32,  1.98it/s]

Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1615/20000 [09:12<2:19:13,  2.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1616/20000 [09:12<2:05:23,  2.44it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1617/20000 [09:13<1:56:09,  2.64it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1618/20000 [09:13<1:48:54,  2.81it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1619/20000 [09:13<1:45:29,  2.90it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1620/20000 [09:14<1:41:26,  3.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploring: Chose move 8
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1621/20000 [09:14<1:41:46,  3.01it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1622/20000 [09:14<1:42:45,  2.98it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1623/20000 [09:15<2:00:02,  2.55it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1624/20000 [09:15<2:18:54,  2.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1625/20000 [09:16<2:27:36,  2.07it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1626/20000 [09:16<2:30:14,  2.04it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1627/20000 [09:17<2:31:27,  2.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1629/20000 [09:18<2:14:35,  2.27it/s]

Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1630/20000 [09:18<2:00:53,  2.53it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1631/20000 [09:18<1:55:27,  2.65it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1632/20000 [09:19<1:51:00,  2.76it/s]

Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1633/20000 [09:19<1:48:00,  2.83it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1634/20000 [09:19<1:43:03,  2.97it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1635/20000 [09:20<1:43:20,  2.96it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1636/20000 [09:20<1:42:37,  2.98it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1637/20000 [09:20<1:41:32,  3.01it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 4


Training MENACE:   8%|▊         | 1638/20000 [09:21<1:40:07,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1639/20000 [09:21<1:41:09,  3.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1640/20000 [09:21<1:43:40,  2.95it/s]

Exploring: Chose move 3
Exploring: Chose move 1
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:   8%|▊         | 1641/20000 [09:22<1:41:35,  3.01it/s]

Exploiting: Chose move 4 with Q-value 1.06
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 4 on board O--X-----: 1.09
Updated Q-value for move 3 on board ---------: 1.16
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1642/20000 [09:22<1:55:50,  2.64it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1643/20000 [09:23<2:12:09,  2.31it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1644/20000 [09:23<2:20:41,  2.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1645/20000 [09:24<2:23:46,  2.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1646/20000 [09:24<2:27:10,  2.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1647/20000 [09:25<2:36:49,  1.95it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:   8%|▊         | 1648/20000 [09:25<2:21:17,  2.16it/s]

Exploiting: Chose move 4 with Q-value 1.09
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 4 on board O--X-----: 1.11
Updated Q-value for move 3 on board ---------: 1.19
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1649/20000 [09:25<2:08:31,  2.38it/s]

Exploring: Chose move 6
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 1 on board O-O-X-X--: 0.95
Updated Q-value for move 6 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1650/20000 [09:26<2:00:32,  2.54it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1651/20000 [09:26<1:51:22,  2.75it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1652/20000 [09:26<1:49:30,  2.79it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:   8%|▊         | 1653/20000 [09:27<1:44:25,  2.93it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 1


Training MENACE:   8%|▊         | 1654/20000 [09:27<1:41:39,  3.01it/s]

Exploiting: Chose move 2 with Q-value 0.28
Exploiting: Chose move 7 with Q-value -0.30
Exploiting: Chose move 5 with Q-value -1.00
Updated Q-value for move 5 on board OXXOO--X-: -1.00
Updated Q-value for move 7 on board OXXO-----: -0.31
Updated Q-value for move 2 on board OX-------: 0.27
Updated Q-value for move 1 on board ---------: 0.78
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1655/20000 [09:27<1:43:32,  2.95it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1656/20000 [09:28<1:43:20,  2.96it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1657/20000 [09:28<1:43:29,  2.95it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploring: Chose move 8
Updated Q-value for move 8 on board OOX-X-OX-: -1.00
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1658/20000 [09:28<1:40:58,  3.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1659/20000 [09:29<1:39:02,  3.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1660/20000 [09:29<1:37:25,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1661/20000 [09:29<1:37:32,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1662/20000 [09:30<1:39:40,  3.07it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1663/20000 [09:30<1:39:53,  3.06it/s]

Exploring: Chose move 6
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 1 on board O-O-X-X--: 0.95
Updated Q-value for move 6 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1664/20000 [09:30<1:40:13,  3.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1665/20000 [09:31<1:42:30,  2.98it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:   8%|▊         | 1666/20000 [09:31<1:39:44,  3.06it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:   8%|▊         | 1667/20000 [09:31<1:39:23,  3.07it/s]

Exploiting: Chose move 4 with Q-value 1.11
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 4 on board O--X-----: 1.14
Updated Q-value for move 3 on board ---------: 1.23
Exploring: Chose move 1


Training MENACE:   8%|▊         | 1668/20000 [09:32<1:39:14,  3.08it/s]

Exploiting: Chose move 2 with Q-value 0.27
Exploiting: Chose move 6 with Q-value -0.30
Exploring: Chose move 5
Updated Q-value for move 5 on board OXXOO-X--: -1.00
Updated Q-value for move 6 on board OXXO-----: -0.31
Updated Q-value for move 2 on board OX-------: 0.27
Updated Q-value for move 1 on board ---------: 0.78
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1669/20000 [09:32<1:39:15,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1670/20000 [09:32<1:39:10,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploring: Chose move 5
Updated Q-value for move 5 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1671/20000 [09:33<1:39:25,  3.07it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1672/20000 [09:33<1:39:29,  3.07it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1674/20000 [09:34<1:40:10,  3.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 w

Training MENACE:   8%|▊         | 1675/20000 [09:34<1:37:09,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1676/20000 [09:34<1:40:14,  3.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1677/20000 [09:35<1:39:59,  3.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 1


Training MENACE:   8%|▊         | 1678/20000 [09:35<1:39:47,  3.06it/s]

Exploiting: Chose move 2 with Q-value 0.27
Exploiting: Chose move 8 with Q-value -0.30
Exploring: Chose move 6
Updated Q-value for move 6 on board OXXO-O--X: -1.00
Updated Q-value for move 8 on board OXXO-----: -0.31
Updated Q-value for move 2 on board OX-------: 0.26
Updated Q-value for move 1 on board ---------: 0.77
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1679/20000 [09:35<2:00:41,  2.53it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1680/20000 [09:36<2:12:55,  2.30it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1681/20000 [09:36<2:20:52,  2.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1682/20000 [09:37<2:25:56,  2.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1683/20000 [09:37<2:26:29,  2.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1684/20000 [09:38<2:30:30,  2.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 7


Training MENACE:   8%|▊         | 1685/20000 [09:38<2:20:21,  2.17it/s]

Exploiting: Chose move 0 with Q-value 1.31
Exploiting: Chose move 2 with Q-value 0.93
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.50
Updated Q-value for move 2 on board XO----OX-: 0.93
Updated Q-value for move 0 on board -O-----X-: 1.32
Updated Q-value for move 7 on board ---------: 1.64
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1686/20000 [09:39<2:08:09,  2.38it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1687/20000 [09:39<2:00:09,  2.54it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1688/20000 [09:39<1:58:07,  2.58it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1689/20000 [09:40<1:51:00,  2.75it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1690/20000 [09:40<1:49:11,  2.79it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1691/20000 [09:40<1:43:37,  2.94it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1692/20000 [09:41<1:40:19,  3.04it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1693/20000 [09:41<1:40:21,  3.04it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1694/20000 [09:41<1:44:22,  2.92it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1695/20000 [09:42<1:40:55,  3.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1696/20000 [09:42<1:42:14,  2.98it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1697/20000 [09:42<1:44:41,  2.91it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   8%|▊         | 1698/20000 [09:43<1:46:34,  2.86it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 1


Training MENACE:   8%|▊         | 1699/20000 [09:43<1:45:49,  2.88it/s]

Exploring: Chose move 8
Exploiting: Chose move 2 with Q-value 0.26
Exploiting: Chose move 3 with Q-value 0.20
Exploiting: Chose move 7 with Q-value 0.00
Updated Q-value for move 7 on board OXXXOOO-X: 0.00
Updated Q-value for move 3 on board OXX-OO--X: 0.23
Updated Q-value for move 2 on board OX--O---X: 0.30
Updated Q-value for move 8 on board OX-------: 0.27
Updated Q-value for move 1 on board ---------: 0.77
Exploring: Chose move 8


Training MENACE:   8%|▊         | 1700/20000 [09:43<1:42:47,  2.97it/s]

Exploiting: Chose move 0 with Q-value 1.30
Exploiting: Chose move 7 with Q-value 0.93
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OOOXX: 0.00
Updated Q-value for move 2 on board XO--O-OXX: 0.50
Updated Q-value for move 7 on board XO--O---X: 0.93
Updated Q-value for move 0 on board ----O---X: 1.31
Updated Q-value for move 8 on board ---------: 1.63
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1701/20000 [09:44<1:38:50,  3.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1702/20000 [09:44<1:38:42,  3.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1703/20000 [09:44<1:38:37,  3.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1704/20000 [09:45<1:39:09,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1705/20000 [09:45<1:32:56,  3.28it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1706/20000 [09:45<1:37:28,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1707/20000 [09:46<1:37:55,  3.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 5


Training MENACE:   9%|▊         | 1708/20000 [09:46<1:38:38,  3.09it/s]

Exploiting: Chose move 0 with Q-value 1.30
Exploiting: Chose move 1 with Q-value 0.93
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 6 on board XXOOOX---: 0.50
Updated Q-value for move 1 on board X-OO-X---: 0.93
Updated Q-value for move 0 on board --O--X---: 1.30
Updated Q-value for move 5 on board ---------: 1.61
Exploring: Chose move 4


Training MENACE:   9%|▊         | 1709/20000 [09:46<1:39:03,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1710/20000 [09:47<1:34:23,  3.23it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 5


Training MENACE:   9%|▊         | 1711/20000 [09:47<1:37:46,  3.12it/s]

Exploiting: Chose move 0 with Q-value 1.30
Exploiting: Chose move 1 with Q-value 0.93
Exploiting: Chose move 6 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 6 on board XXOOOX---: 0.50
Updated Q-value for move 1 on board X-OO-X---: 0.93
Updated Q-value for move 0 on board --O--X---: 1.31
Updated Q-value for move 5 on board ---------: 1.62
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1712/20000 [09:47<1:38:28,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1713/20000 [09:48<1:35:23,  3.19it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1714/20000 [09:48<1:36:15,  3.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1715/20000 [09:48<1:44:14,  2.92it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1716/20000 [09:49<2:02:11,  2.49it/s]

Exploring: Chose move 6
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 1 on board O-O-X-X--: 0.95
Updated Q-value for move 6 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1717/20000 [09:49<2:16:46,  2.23it/s]

Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1718/20000 [09:50<2:26:40,  2.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:   9%|▊         | 1719/20000 [09:50<2:31:01,  2.02it/s]

Exploiting: Chose move 4 with Q-value 1.14
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 4 on board O--X-----: 1.16
Updated Q-value for move 3 on board ---------: 1.26
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1720/20000 [09:51<2:34:34,  1.97it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1721/20000 [09:51<2:25:49,  2.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 8


Training MENACE:   9%|▊         | 1722/20000 [09:52<2:10:32,  2.33it/s]

Exploiting: Chose move 0 with Q-value 1.31
Exploiting: Chose move 7 with Q-value 0.93
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OOOXX: 0.00
Updated Q-value for move 2 on board XO--O-OXX: 0.50
Updated Q-value for move 7 on board XO--O---X: 0.94
Updated Q-value for move 0 on board ----O---X: 1.31
Updated Q-value for move 8 on board ---------: 1.63
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1723/20000 [09:52<2:00:27,  2.53it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1724/20000 [09:52<1:56:48,  2.61it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1725/20000 [09:53<1:52:13,  2.71it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1726/20000 [09:53<1:47:37,  2.83it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1727/20000 [09:53<1:45:54,  2.88it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1728/20000 [09:54<1:43:06,  2.95it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1729/20000 [09:54<1:38:09,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1730/20000 [09:54<1:41:06,  3.01it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1731/20000 [09:55<1:36:26,  3.16it/s]

Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1732/20000 [09:55<1:35:13,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1733/20000 [09:55<1:37:49,  3.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1734/20000 [09:56<1:37:58,  3.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1735/20000 [09:56<1:36:45,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1736/20000 [09:56<1:39:12,  3.07it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploring: Chose move 8
Exploiting: Chose move 5 with Q-value 0.00
Updated Q-value for move 5 on board OXOOX-XOX: 0.00
Updated Q-value for move 8 on board OXO-X-XO-: 0.23
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1737/20000 [09:57<1:39:03,  3.07it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1738/20000 [09:57<1:41:49,  2.99it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1739/20000 [09:57<1:39:48,  3.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:   9%|▊         | 1740/20000 [09:58<1:40:43,  3.02it/s]

Exploiting: Chose move 4 with Q-value 1.16
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 4 on board O--X-----: 1.18
Updated Q-value for move 3 on board ---------: 1.29
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1742/20000 [09:58<1:35:37,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1743/20000 [09:58<1:33:23,  3.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1744/20000 [09:59<1:34:53,  3.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1745/20000 [09:59<1:37:26,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1746/20000 [09:59<1:38:09,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1747/20000 [10:00<1:35:57,  3.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1748/20000 [10:00<1:36:26,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▊         | 1749/20000 [10:00<1:40:41,  3.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1750/20000 [10:01<1:41:29,  3.00it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1751/20000 [10:01<1:39:58,  3.04it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1752/20000 [10:01<1:47:07,  2.84it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1753/20000 [10:02<2:02:10,  2.49it/s]

Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1754/20000 [10:03<2:16:39,  2.23it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1755/20000 [10:03<2:24:46,  2.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1756/20000 [10:04<2:28:39,  2.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:   9%|▉         | 1758/20000 [10:04<2:20:47,  2.16it/s]

Exploiting: Chose move 4 with Q-value 1.18
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 4 on board O--X-----: 1.20
Updated Q-value for move 3 on board ---------: 1.32
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1759/20000 [10:05<2:08:24,  2.37it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1760/20000 [10:05<2:00:00,  2.53it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1761/20000 [10:05<1:55:28,  2.63it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1762/20000 [10:06<1:50:43,  2.75it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1763/20000 [10:06<1:48:49,  2.79it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1764/20000 [10:07<1:47:44,  2.82it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1765/20000 [10:07<1:48:06,  2.81it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1766/20000 [10:07<1:42:20,  2.97it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1767/20000 [10:07<1:40:23,  3.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1768/20000 [10:08<1:39:46,  3.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:   9%|▉         | 1769/20000 [10:08<1:39:07,  3.07it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1770/20000 [10:08<1:38:26,  3.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1771/20000 [10:09<1:36:31,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 8


Training MENACE:   9%|▉         | 1772/20000 [10:09<1:36:38,  3.14it/s]

Exploiting: Chose move 0 with Q-value 1.31
Exploring: Chose move 6
Updated Q-value for move 6 on board XO--O---X: -1.00
Updated Q-value for move 0 on board ----O---X: 1.32
Updated Q-value for move 8 on board ---------: 1.64
Exploring: Chose move 7


Training MENACE:   9%|▉         | 1773/20000 [10:09<1:35:43,  3.17it/s]

Exploiting: Chose move 0 with Q-value 1.32
Exploiting: Chose move 2 with Q-value 0.93
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.50
Updated Q-value for move 2 on board XO----OX-: 0.94
Updated Q-value for move 0 on board -O-----X-: 1.32
Updated Q-value for move 7 on board ---------: 1.64
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1774/20000 [10:10<1:36:28,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1775/20000 [10:10<1:41:03,  3.01it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1776/20000 [10:10<1:37:42,  3.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:   9%|▉         | 1777/20000 [10:11<1:37:34,  3.11it/s]

Exploring: Chose move 7
Exploiting: Chose move 1 with Q-value 0.05
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXO--X-: -1.00
Updated Q-value for move 1 on board O-OX---X-: 0.10
Updated Q-value for move 7 on board O--X-----: 0.11
Updated Q-value for move 3 on board ---------: 1.34
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1778/20000 [10:11<1:36:34,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1779/20000 [10:11<1:39:31,  3.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1780/20000 [10:12<1:41:52,  2.98it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1781/20000 [10:12<1:36:18,  3.15it/s]

Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1782/20000 [10:12<1:32:11,  3.29it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1783/20000 [10:13<1:31:35,  3.31it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1784/20000 [10:13<1:31:13,  3.33it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1785/20000 [10:13<1:34:51,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1786/20000 [10:13<1:34:42,  3.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1788/20000 [10:14<1:29:38,  3.39it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1789/20000 [10:14<1:36:47,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1790/20000 [10:15<1:56:32,  2.60it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1791/20000 [10:16<2:12:53,  2.28it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 5


Training MENACE:   9%|▉         | 1792/20000 [10:16<2:24:31,  2.10it/s]

Exploiting: Chose move 0 with Q-value 1.31
Exploiting: Chose move 1 with Q-value 0.93
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 6 on board XXOOOX---: 0.50
Updated Q-value for move 1 on board X-OO-X---: 0.94
Updated Q-value for move 0 on board --O--X---: 1.31
Updated Q-value for move 5 on board ---------: 1.62
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1793/20000 [10:17<2:29:35,  2.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1794/20000 [10:17<2:32:34,  1.99it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1795/20000 [10:18<2:28:12,  2.05it/s]

Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploring: Chose move 6
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1796/20000 [10:18<2:14:14,  2.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1797/20000 [10:18<2:04:37,  2.43it/s]

Exploring: Chose move 5
Exploring: Chose move 1
Updated Q-value for move 1 on board O--OXX---: -1.00
Updated Q-value for move 5 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1798/20000 [10:19<1:57:54,  2.57it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1799/20000 [10:19<1:49:11,  2.78it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1800/20000 [10:19<1:49:30,  2.77it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1801/20000 [10:20<1:45:42,  2.87it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:   9%|▉         | 1802/20000 [10:20<1:41:24,  2.99it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 8
Updated Q-value for move 8 on board XO--OXOX-: -1.00
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1803/20000 [10:20<1:38:36,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1804/20000 [10:21<1:42:46,  2.95it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1805/20000 [10:21<1:39:18,  3.05it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1806/20000 [10:21<1:39:01,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1807/20000 [10:22<1:43:41,  2.92it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1808/20000 [10:22<1:43:05,  2.94it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1809/20000 [10:22<1:44:58,  2.89it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1811/20000 [10:23<1:38:42,  3.07it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1812/20000 [10:23<1:36:15,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1813/20000 [10:24<1:38:14,  3.09it/s]

Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.95
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 1 on board O-OOXXX--: 0.50
Updated Q-value for move 6 on board O--OXX---: 0.95
Updated Q-value for move 5 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1814/20000 [10:24<1:39:52,  3.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1815/20000 [10:24<1:40:53,  3.00it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1816/20000 [10:25<1:39:28,  3.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Episode 6000 completed
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1817/20000 [10:25<1:39:03,  3.06it/s]

Exploring: Chose move 6
Exploiting: Chose move 1 with Q-value 0.95
Exploring: Chose move 5
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 5 on board OXO-X-XO-: 0.23
Updated Q-value for move 1 on board O-O-X-X--: 0.95
Updated Q-value for move 6 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1818/20000 [10:25<1:36:16,  3.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1819/20000 [10:25<1:39:11,  3.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1820/20000 [10:26<1:42:01,  2.97it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 6
Exploiting: Chose move 0 with Q-value 1.31
Exploiting: Chose move 5 with Q-value 0.93
Exploiting: Chose move 7 with Q-value 0.50
Exploiting: Chose move 2 with Q-value 0.00
Updated Q-value for move 2 on board XO-OOXXXO: 0.00
Updated Q-value for move 7 on board XO-OOXX--: 0.50
Updated Q-value for move 5 on board X--OO-X--: 0.93
Updated Q-value for move 0 on board ----O-X--: 1.31
Updated Q-value for move 6 on board ---------: 1.63
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1821/20000 [10:26<1:44:54,  2.89it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 7


Training MENACE:   9%|▉         | 1822/20000 [10:27<1:42:34,  2.95it/s]

Exploiting: Chose move 0 with Q-value 1.32
Exploiting: Chose move 2 with Q-value 0.94
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.50
Updated Q-value for move 2 on board XO----OX-: 0.94
Updated Q-value for move 0 on board -O-----X-: 1.32
Updated Q-value for move 7 on board ---------: 1.65
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1823/20000 [10:27<1:40:17,  3.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1824/20000 [10:27<1:42:42,  2.95it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1825/20000 [10:28<1:51:06,  2.73it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1826/20000 [10:28<2:01:45,  2.49it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1827/20000 [10:29<2:10:55,  2.31it/s]

Exploring: Chose move 6
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 1 on board O-O-X-X--: 0.95
Updated Q-value for move 6 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1828/20000 [10:29<2:19:26,  2.17it/s]

Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 7


Training MENACE:   9%|▉         | 1829/20000 [10:30<2:25:02,  2.09it/s]

Exploiting: Chose move 0 with Q-value 1.32
Exploiting: Chose move 2 with Q-value 0.94
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.50
Updated Q-value for move 2 on board XO----OX-: 0.94
Updated Q-value for move 0 on board -O-----X-: 1.32
Updated Q-value for move 7 on board ---------: 1.65
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1830/20000 [10:30<2:31:08,  2.00it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1831/20000 [10:31<2:34:54,  1.95it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1832/20000 [10:31<2:21:40,  2.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1833/20000 [10:31<2:07:36,  2.37it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1834/20000 [10:32<1:56:25,  2.60it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1835/20000 [10:32<1:49:25,  2.77it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1836/20000 [10:32<1:45:54,  2.86it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1837/20000 [10:33<1:41:33,  2.98it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1838/20000 [10:33<1:38:22,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1839/20000 [10:33<1:38:52,  3.06it/s]

Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1840/20000 [10:34<1:36:23,  3.14it/s]

Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1841/20000 [10:34<1:38:16,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1842/20000 [10:34<1:40:01,  3.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1843/20000 [10:35<1:36:24,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1844/20000 [10:35<1:34:32,  3.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1845/20000 [10:35<1:33:59,  3.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1846/20000 [10:35<1:31:34,  3.30it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1847/20000 [10:36<1:37:13,  3.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 7


Training MENACE:   9%|▉         | 1848/20000 [10:36<1:38:12,  3.08it/s]

Exploiting: Chose move 0 with Q-value 1.32
Exploiting: Chose move 2 with Q-value 0.94
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.50
Updated Q-value for move 2 on board XO----OX-: 0.94
Updated Q-value for move 0 on board -O-----X-: 1.33
Updated Q-value for move 7 on board ---------: 1.65
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1849/20000 [10:36<1:38:29,  3.07it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploring: Chose move 6
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1850/20000 [10:37<1:39:11,  3.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1851/20000 [10:37<1:37:02,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1852/20000 [10:37<1:37:18,  3.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1853/20000 [10:38<1:35:24,  3.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:   9%|▉         | 1854/20000 [10:38<1:34:24,  3.20it/s]

Exploiting: Chose move 4 with Q-value 1.20
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 4 on board O--X-----: 1.21
Updated Q-value for move 3 on board ---------: 1.37
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1855/20000 [10:38<1:37:59,  3.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1856/20000 [10:39<1:40:14,  3.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploring: Chose move 8
Updated Q-value for move 8 on board OX-OXX-O-: -1.00
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1857/20000 [10:39<1:41:16,  2.99it/s]

Exploring: Chose move 7
Exploring: Chose move 6
Updated Q-value for move 6 on board OO--X--X-: -1.00
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1858/20000 [10:39<1:40:51,  3.00it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 8


Training MENACE:   9%|▉         | 1859/20000 [10:40<1:41:13,  2.99it/s]

Exploiting: Chose move 0 with Q-value 1.32
Exploiting: Chose move 7 with Q-value 0.94
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 3
Updated Q-value for move 3 on board XOX-OOOXX: 0.00
Updated Q-value for move 2 on board XO--O-OXX: 0.50
Updated Q-value for move 7 on board XO--O---X: 0.94
Updated Q-value for move 0 on board ----O---X: 1.32
Updated Q-value for move 8 on board ---------: 1.64
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1860/20000 [10:40<1:41:19,  2.98it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1861/20000 [10:40<1:45:30,  2.87it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1862/20000 [10:41<1:42:53,  2.94it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1863/20000 [10:41<1:59:05,  2.54it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1864/20000 [10:42<2:11:36,  2.30it/s]

Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 1 on board O-XXXOO--: 0.50
Updated Q-value for move 3 on board O-X-X-O--: 0.95
Updated Q-value for move 2 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1865/20000 [10:42<2:20:15,  2.15it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 1


Training MENACE:   9%|▉         | 1866/20000 [10:43<2:23:05,  2.11it/s]

Exploiting: Chose move 8 with Q-value 0.27
Exploring: Chose move 3
Exploiting: Chose move 5 with Q-value 0.00
Updated Q-value for move 5 on board OXOXO---X: -1.00
Updated Q-value for move 3 on board OX--O---X: 0.05
Updated Q-value for move 8 on board OX-------: 0.32
Updated Q-value for move 1 on board ---------: 0.77
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1867/20000 [10:43<2:22:48,  2.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1868/20000 [10:44<2:28:51,  2.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1869/20000 [10:44<2:21:20,  2.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1870/20000 [10:45<2:07:34,  2.37it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1871/20000 [10:45<1:57:29,  2.57it/s]

Exploring: Chose move 6
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 1 on board O-O-X-X--: 0.95
Updated Q-value for move 6 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1872/20000 [10:45<1:49:13,  2.77it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1873/20000 [10:46<1:47:06,  2.82it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1874/20000 [10:46<1:42:41,  2.94it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1875/20000 [10:46<1:43:17,  2.92it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1876/20000 [10:47<1:39:40,  3.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1877/20000 [10:47<1:39:06,  3.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1878/20000 [10:47<1:39:37,  3.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1879/20000 [10:48<1:42:58,  2.93it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1880/20000 [10:48<1:43:51,  2.91it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1881/20000 [10:48<1:42:34,  2.94it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1882/20000 [10:49<1:45:23,  2.87it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1883/20000 [10:49<1:45:22,  2.87it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1884/20000 [10:49<1:44:28,  2.89it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1885/20000 [10:50<1:44:04,  2.90it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1886/20000 [10:50<1:43:49,  2.91it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1887/20000 [10:50<1:45:06,  2.87it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1888/20000 [10:51<1:45:17,  2.87it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1889/20000 [10:51<1:42:48,  2.94it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1890/20000 [10:51<1:42:33,  2.94it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1891/20000 [10:52<1:44:46,  2.88it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1892/20000 [10:52<1:40:29,  3.00it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1893/20000 [10:52<1:41:44,  2.97it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1894/20000 [10:53<1:41:31,  2.97it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1895/20000 [10:53<1:36:17,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 7


Training MENACE:   9%|▉         | 1896/20000 [10:53<1:37:02,  3.11it/s]

Exploring: Chose move 6
Exploiting: Chose move 0 with Q-value 0.05
Exploring: Chose move 5
Exploiting: Chose move 4 with Q-value 0.00
Updated Q-value for move 4 on board XOOO-XXXO: 0.00
Updated Q-value for move 5 on board XO-O--XXO: 0.05
Updated Q-value for move 0 on board -O----XXO: 0.10
Updated Q-value for move 6 on board -O-----X-: 0.11
Updated Q-value for move 7 on board ---------: 1.66
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1897/20000 [10:54<1:36:17,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1898/20000 [10:54<1:38:10,  3.07it/s]

Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:   9%|▉         | 1899/20000 [10:54<1:45:17,  2.87it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1900/20000 [10:55<1:58:48,  2.54it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1901/20000 [10:55<2:11:30,  2.29it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1902/20000 [10:56<2:16:56,  2.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1903/20000 [10:56<2:19:24,  2.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1904/20000 [10:57<2:26:53,  2.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 8


Training MENACE:  10%|▉         | 1906/20000 [10:58<2:17:15,  2.20it/s]

Exploiting: Chose move 0 with Q-value 1.32
Exploiting: Chose move 7 with Q-value 0.94
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OOOXX: 0.00
Updated Q-value for move 2 on board XO--O-OXX: 0.50
Updated Q-value for move 7 on board XO--O---X: 0.94
Updated Q-value for move 0 on board ----O---X: 1.32
Updated Q-value for move 8 on board ---------: 1.65
Exploring: Chose move 4
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1908/20000 [10:58<1:55:53,  2.60it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1909/20000 [10:59<1:51:09,  2.71it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1910/20000 [10:59<1:49:19,  2.76it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1911/20000 [10:59<1:47:49,  2.80it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1912/20000 [11:00<1:44:15,  2.89it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1913/20000 [11:00<1:42:40,  2.94it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1914/20000 [11:00<1:39:52,  3.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1915/20000 [11:01<1:41:55,  2.96it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1916/20000 [11:01<1:44:22,  2.89it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1917/20000 [11:01<1:41:59,  2.95it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1918/20000 [11:02<1:35:25,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1919/20000 [11:02<1:36:08,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1920/20000 [11:02<1:37:01,  3.11it/s]

Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploring: Chose move 2
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1921/20000 [11:03<1:38:37,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1922/20000 [11:03<1:36:05,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1923/20000 [11:03<1:36:48,  3.11it/s]

Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1925/20000 [11:04<1:33:35,  3.22it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3
Exploiting: Chose move 4 with Q-value 1.21
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 4 on board O--X-----: 1.23
Updated Q-value for move 3 on board ---------: 1.39
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1926/20000 [11:04<1:36:04,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1927/20000 [11:05<1:33:52,  3.21it/s]

Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 1 on board O-XXXOO--: 0.50
Updated Q-value for move 3 on board O-X-X-O--: 0.95
Updated Q-value for move 2 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1928/20000 [11:05<1:37:21,  3.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1929/20000 [11:05<1:40:01,  3.01it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1930/20000 [11:06<1:42:40,  2.93it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploring: Chose move 8
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1931/20000 [11:06<1:39:17,  3.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1932/20000 [11:06<1:39:13,  3.04it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1933/20000 [11:07<1:37:19,  3.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1934/20000 [11:07<1:39:00,  3.04it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1935/20000 [11:07<1:39:10,  3.04it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1936/20000 [11:08<1:40:48,  2.99it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1937/20000 [11:08<1:59:25,  2.52it/s]

Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1938/20000 [11:09<2:11:50,  2.28it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1939/20000 [11:09<2:20:40,  2.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploring: Chose move 5
Updated Q-value for move 5 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1940/20000 [11:10<2:24:45,  2.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1941/20000 [11:10<2:26:52,  2.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1942/20000 [11:11<2:29:58,  2.01it/s]

Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1943/20000 [11:11<2:15:56,  2.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1944/20000 [11:11<2:06:39,  2.38it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1945/20000 [11:12<2:00:44,  2.49it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1946/20000 [11:12<1:55:58,  2.59it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1947/20000 [11:12<1:49:25,  2.75it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1948/20000 [11:13<1:46:25,  2.83it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1949/20000 [11:13<1:42:14,  2.94it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1950/20000 [11:13<1:37:47,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1951/20000 [11:14<1:38:33,  3.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploring: Chose move 8
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1952/20000 [11:14<1:36:06,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1953/20000 [11:14<1:36:11,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.95
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 1 on board O-OOXXX--: 0.50
Updated Q-value for move 6 on board O--OXX---: 0.95
Updated Q-value for move 5 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1954/20000 [11:15<1:39:40,  3.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 8


Training MENACE:  10%|▉         | 1956/20000 [11:15<1:35:35,  3.15it/s]

Exploring: Chose move 0
Exploiting: Chose move 7 with Q-value 0.94
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OOOXX: 0.00
Updated Q-value for move 2 on board XO--O-OXX: 0.50
Updated Q-value for move 7 on board XO--O---X: 0.94
Updated Q-value for move 0 on board ----O---X: 1.32
Updated Q-value for move 8 on board ---------: 1.65
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1957/20000 [11:16<1:32:52,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1958/20000 [11:16<1:36:53,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1959/20000 [11:16<1:37:28,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1960/20000 [11:17<1:38:27,  3.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1961/20000 [11:17<1:37:36,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1962/20000 [11:17<1:39:21,  3.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 4


Training MENACE:  10%|▉         | 1963/20000 [11:18<1:39:27,  3.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1964/20000 [11:18<1:39:25,  3.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1965/20000 [11:18<1:38:36,  3.05it/s]

Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.95
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 1 on board O-OOXXX--: 0.50
Updated Q-value for move 6 on board O--OXX---: 0.95
Updated Q-value for move 5 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1966/20000 [11:19<1:39:31,  3.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 4


Training MENACE:  10%|▉         | 1968/20000 [11:19<1:29:44,  3.35it/s]

Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 1 on board O-XXXOO--: 0.50
Updated Q-value for move 3 on board O-X-X-O--: 0.95
Updated Q-value for move 2 on board O---X----: 1.34
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1969/20000 [11:19<1:31:55,  3.27it/s]

Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.95
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 1 on board O-OOXXX--: 0.50
Updated Q-value for move 6 on board O--OXX---: 0.95
Updated Q-value for move 5 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1970/20000 [11:20<1:35:53,  3.13it/s]

Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1971/20000 [11:20<1:38:16,  3.06it/s]

Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1972/20000 [11:20<1:39:15,  3.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1973/20000 [11:21<1:34:38,  3.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1974/20000 [11:21<1:52:02,  2.68it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1975/20000 [11:22<2:01:15,  2.48it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1976/20000 [11:22<2:13:33,  2.25it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1977/20000 [11:23<2:23:37,  2.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1978/20000 [11:23<2:28:05,  2.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1979/20000 [11:24<2:31:31,  1.98it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:  10%|▉         | 1981/20000 [11:25<2:01:20,  2.47it/s]

Exploiting: Chose move 4 with Q-value 1.23
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 4 on board O--X-----: 1.24
Updated Q-value for move 3 on board ---------: 1.41
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1982/20000 [11:25<1:53:46,  2.64it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1983/20000 [11:25<1:48:31,  2.77it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1984/20000 [11:25<1:43:12,  2.91it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1985/20000 [11:26<1:38:22,  3.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1986/20000 [11:26<1:38:52,  3.04it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1987/20000 [11:26<1:42:13,  2.94it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1988/20000 [11:27<1:45:03,  2.86it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1989/20000 [11:27<1:42:50,  2.92it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1990/20000 [11:27<1:39:53,  3.00it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1991/20000 [11:28<1:37:06,  3.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1992/20000 [11:28<1:35:52,  3.13it/s]

Exploring: Chose move 6
Exploring: Chose move 3
Updated Q-value for move 3 on board O-O-X-X--: -1.00
Updated Q-value for move 6 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1993/20000 [11:28<1:39:23,  3.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1995/20000 [11:29<1:35:56,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1996/20000 [11:29<1:38:51,  3.04it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 6
Exploring: Chose move 7
Updated Q-value for move 7 on board O-O-X-X--: -1.00
Updated Q-value for move 6 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1997/20000 [11:30<1:37:09,  3.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|▉         | 1998/20000 [11:30<1:36:15,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 6


Training MENACE:  10%|▉         | 1999/20000 [11:30<1:39:03,  3.03it/s]

Exploiting: Chose move 0 with Q-value 1.31
Exploiting: Chose move 5 with Q-value 0.93
Exploiting: Chose move 7 with Q-value 0.50
Exploiting: Chose move 2 with Q-value 0.00
Updated Q-value for move 2 on board XO-OOXXXO: 0.00
Updated Q-value for move 7 on board XO-OOXX--: 0.50
Updated Q-value for move 5 on board X--OO-X--: 0.94
Updated Q-value for move 0 on board ----O-X--: 1.32
Updated Q-value for move 6 on board ---------: 1.64
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2000/20000 [11:31<1:35:35,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2001/20000 [11:31<1:37:03,  3.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2002/20000 [11:31<1:44:27,  2.87it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2003/20000 [11:32<1:42:47,  2.92it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2005/20000 [11:32<1:39:29,  3.01it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2006/20000 [11:33<1:38:34,  3.04it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2007/20000 [11:33<1:35:36,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2008/20000 [11:33<1:35:26,  3.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2009/20000 [11:34<1:34:09,  3.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2010/20000 [11:34<1:36:10,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2011/20000 [11:34<1:50:47,  2.71it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2012/20000 [11:35<2:05:47,  2.38it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2013/20000 [11:36<2:15:35,  2.21it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2014/20000 [11:36<2:20:09,  2.14it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2015/20000 [11:37<2:26:39,  2.04it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2016/20000 [11:37<2:32:05,  1.97it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2017/20000 [11:37<2:19:26,  2.15it/s]

Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2018/20000 [11:38<2:07:02,  2.36it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2019/20000 [11:38<2:00:48,  2.48it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 6
Exploiting: Chose move 1 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 5 with Q-value 0.00
Updated Q-value for move 5 on board OXOOX-XOX: 0.00
Updated Q-value for move 8 on board OXO-X-XO-: 0.26
Updated Q-value for move 1 on board O-O-X-X--: 0.95
Updated Q-value for move 6 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2020/20000 [11:39<1:53:59,  2.63it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2021/20000 [11:39<1:52:20,  2.67it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2022/20000 [11:39<1:48:01,  2.77it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2023/20000 [11:39<1:42:58,  2.91it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploring: Chose move 8
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2024/20000 [11:40<1:39:53,  3.00it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2025/20000 [11:40<1:37:56,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2026/20000 [11:40<1:39:11,  3.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2027/20000 [11:41<1:40:05,  2.99it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2028/20000 [11:41<1:37:52,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2029/20000 [11:41<1:38:17,  3.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2030/20000 [11:42<1:41:11,  2.96it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2031/20000 [11:42<1:39:25,  3.01it/s]

Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.95
Exploiting: Chose move 1 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 1 on board O-OOXXX--: 0.50
Updated Q-value for move 6 on board O--OXX---: 0.95
Updated Q-value for move 5 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2032/20000 [11:42<1:37:00,  3.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2033/20000 [11:43<1:32:20,  3.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2034/20000 [11:43<1:34:50,  3.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2035/20000 [11:43<1:35:41,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2036/20000 [11:44<1:37:19,  3.08it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2037/20000 [11:44<1:37:08,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2039/20000 [11:45<1:35:56,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2040/20000 [11:45<1:39:53,  3.00it/s]

Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2041/20000 [11:45<1:39:11,  3.02it/s]

Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2042/20000 [11:46<1:40:22,  2.98it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2043/20000 [11:46<1:42:28,  2.92it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploring: Chose move 5
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 5 on board OXO-X-XO-: 0.26
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2044/20000 [11:46<1:40:56,  2.96it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 7


Training MENACE:  10%|█         | 2045/20000 [11:47<1:41:17,  2.95it/s]

Exploiting: Chose move 0 with Q-value 1.33
Exploiting: Chose move 2 with Q-value 0.94
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.50
Updated Q-value for move 2 on board XO----OX-: 0.94
Updated Q-value for move 0 on board -O-----X-: 1.33
Updated Q-value for move 7 on board ---------: 1.66
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2046/20000 [11:47<1:42:24,  2.92it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2047/20000 [11:47<1:46:30,  2.81it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2048/20000 [11:48<2:02:23,  2.44it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2049/20000 [11:49<2:17:24,  2.18it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2050/20000 [11:49<2:22:32,  2.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2051/20000 [11:50<2:33:21,  1.95it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 2


Training MENACE:  10%|█         | 2053/20000 [11:51<2:24:03,  2.08it/s]

Exploiting: Chose move 0 with Q-value 1.33
Exploiting: Chose move 7 with Q-value 0.94
Exploring: Chose move 8
Updated Q-value for move 8 on board XOXOO--X-: -1.00
Updated Q-value for move 7 on board XOX-O----: 0.94
Updated Q-value for move 0 on board --X-O----: 1.33
Updated Q-value for move 2 on board ---------: 1.66
Exploring: Chose move 8


Training MENACE:  10%|█         | 2054/20000 [11:51<2:13:13,  2.25it/s]

Exploiting: Chose move 0 with Q-value 1.32
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value -1.00
Updated Q-value for move 2 on board XO-XO-O-X: -1.00
Updated Q-value for move 3 on board XO--O---X: -0.08
Updated Q-value for move 0 on board ----O---X: 1.33
Updated Q-value for move 8 on board ---------: 1.65
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2055/20000 [11:51<2:06:40,  2.36it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2056/20000 [11:52<1:56:17,  2.57it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2058/20000 [11:52<1:45:46,  2.83it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2059/20000 [11:53<1:38:43,  3.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2060/20000 [11:53<1:37:01,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2061/20000 [11:53<1:37:41,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 1


Training MENACE:  10%|█         | 2062/20000 [11:54<1:37:03,  3.08it/s]

Exploiting: Chose move 8 with Q-value 0.32
Exploiting: Chose move 2 with Q-value 0.30
Exploiting: Chose move 3 with Q-value 0.23
Exploiting: Chose move 7 with Q-value 0.00
Updated Q-value for move 7 on board OXXXOOO-X: 0.00
Updated Q-value for move 3 on board OXX-OO--X: 0.26
Updated Q-value for move 2 on board OX--O---X: 0.34
Updated Q-value for move 8 on board OX-------: 0.37
Updated Q-value for move 1 on board ---------: 0.78
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2063/20000 [11:54<1:39:30,  3.00it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2064/20000 [11:54<1:40:11,  2.98it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 2


Training MENACE:  10%|█         | 2065/20000 [11:55<1:38:04,  3.05it/s]

Exploiting: Chose move 0 with Q-value 1.33
Exploring: Chose move 7
Exploring: Chose move 6
Updated Q-value for move 6 on board XOXOO--X-: -1.00
Updated Q-value for move 7 on board XOX-O----: 0.94
Updated Q-value for move 0 on board --X-O----: 1.33
Updated Q-value for move 2 on board ---------: 1.67
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2066/20000 [11:55<1:37:09,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2067/20000 [11:55<1:35:31,  3.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2068/20000 [11:56<1:36:19,  3.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2069/20000 [11:56<1:37:42,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2070/20000 [11:56<1:39:16,  3.01it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2071/20000 [11:57<1:41:13,  2.95it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2072/20000 [11:57<1:38:01,  3.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2073/20000 [11:57<1:37:23,  3.07it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2074/20000 [11:58<1:39:28,  3.00it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2075/20000 [11:58<1:40:14,  2.98it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2076/20000 [11:58<1:40:46,  2.96it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 2


Training MENACE:  10%|█         | 2078/20000 [11:59<1:38:25,  3.03it/s]

Exploiting: Chose move 0 with Q-value 1.33
Exploiting: Chose move 7 with Q-value 0.94
Exploiting: Chose move 5 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.50
Updated Q-value for move 7 on board XOX-O----: 0.94
Updated Q-value for move 0 on board --X-O----: 1.33
Updated Q-value for move 2 on board ---------: 1.67
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2079/20000 [11:59<1:40:44,  2.96it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2080/20000 [12:00<1:39:38,  3.00it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2081/20000 [12:00<1:37:24,  3.07it/s]

Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2082/20000 [12:00<1:36:54,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2083/20000 [12:01<1:43:06,  2.90it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2084/20000 [12:01<1:57:23,  2.54it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2085/20000 [12:02<2:08:46,  2.32it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2086/20000 [12:02<2:18:07,  2.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2087/20000 [12:03<2:22:14,  2.10it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2088/20000 [12:03<2:27:21,  2.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2089/20000 [12:04<2:30:40,  1.98it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2090/20000 [12:04<2:18:29,  2.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2091/20000 [12:04<2:06:18,  2.36it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2092/20000 [12:05<1:59:12,  2.50it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2093/20000 [12:05<1:51:59,  2.66it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:  10%|█         | 2094/20000 [12:05<1:45:33,  2.83it/s]

Exploiting: Chose move 4 with Q-value 1.24
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 4 on board O--X-----: 1.25
Updated Q-value for move 3 on board ---------: 1.44
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2095/20000 [12:06<1:45:22,  2.83it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2096/20000 [12:06<1:42:59,  2.90it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2097/20000 [12:06<1:43:10,  2.89it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2098/20000 [12:07<1:43:20,  2.89it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2099/20000 [12:07<1:40:07,  2.98it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  10%|█         | 2100/20000 [12:07<1:40:05,  2.98it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 2


Training MENACE:  11%|█         | 2101/20000 [12:08<1:38:25,  3.03it/s]

Exploiting: Chose move 0 with Q-value 1.33
Exploiting: Chose move 7 with Q-value 0.94
Exploiting: Chose move 5 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.50
Updated Q-value for move 7 on board XOX-O----: 0.94
Updated Q-value for move 0 on board --X-O----: 1.34
Updated Q-value for move 2 on board ---------: 1.67
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2102/20000 [12:08<1:42:42,  2.90it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2103/20000 [12:08<1:40:19,  2.97it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2104/20000 [12:09<1:40:26,  2.97it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2105/20000 [12:09<1:42:31,  2.91it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2106/20000 [12:09<1:42:01,  2.92it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2107/20000 [12:10<1:38:11,  3.04it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploring: Chose move 6
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2108/20000 [12:10<1:38:35,  3.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2109/20000 [12:10<1:37:33,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 8


Training MENACE:  11%|█         | 2110/20000 [12:11<1:38:46,  3.02it/s]

Exploiting: Chose move 0 with Q-value 1.33
Exploiting: Chose move 7 with Q-value 0.94
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 3
Updated Q-value for move 3 on board XOX-OOOXX: 0.00
Updated Q-value for move 2 on board XO--O-OXX: 0.50
Updated Q-value for move 7 on board XO--O---X: 0.94
Updated Q-value for move 0 on board ----O---X: 1.33
Updated Q-value for move 8 on board ---------: 1.66
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2112/20000 [12:11<1:37:29,  3.06it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2114/20000 [12:12<1:40:24,  2.97it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 1 on board O-XXXOO--: 0.50
Updated Q-value for move 3 on board O-X-X-O--: 0.95
Updated Q-value for move 2 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 7


Training MENACE:  11%|█         | 2115/20000 [12:12<1:40:06,  2.98it/s]

Exploiting: Chose move 0 with Q-value 1.33
Exploiting: Chose move 2 with Q-value 0.94
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.50
Updated Q-value for move 2 on board XO----OX-: 0.94
Updated Q-value for move 0 on board -O-----X-: 1.33
Updated Q-value for move 7 on board ---------: 1.67
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2116/20000 [12:13<1:40:50,  2.96it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2117/20000 [12:13<1:44:21,  2.86it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2118/20000 [12:13<1:40:31,  2.96it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 1


Training MENACE:  11%|█         | 2119/20000 [12:14<1:38:29,  3.03it/s]

Exploiting: Chose move 8 with Q-value 0.37
Exploiting: Chose move 2 with Q-value 0.34
Exploiting: Chose move 3 with Q-value 0.26
Exploiting: Chose move 7 with Q-value 0.00
Updated Q-value for move 7 on board OXXXOOO-X: 0.00
Updated Q-value for move 3 on board OXX-OO--X: 0.28
Updated Q-value for move 2 on board OX--O---X: 0.39
Updated Q-value for move 8 on board OX-------: 0.42
Updated Q-value for move 1 on board ---------: 0.79
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2120/20000 [12:14<1:51:44,  2.67it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2121/20000 [12:15<2:03:39,  2.41it/s]

Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 1 on board O-XXXOO--: 0.50
Updated Q-value for move 3 on board O-X-X-O--: 0.95
Updated Q-value for move 2 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2122/20000 [12:15<2:19:31,  2.14it/s]

Exploring: Chose move 8
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 1 on board O-O-X---X: 0.95
Updated Q-value for move 8 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2123/20000 [12:16<2:26:49,  2.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 6


Training MENACE:  11%|█         | 2124/20000 [12:16<2:31:29,  1.97it/s]

Exploiting: Chose move 0 with Q-value 1.32
Exploiting: Chose move 5 with Q-value 0.94
Exploiting: Chose move 7 with Q-value 0.50
Exploiting: Chose move 2 with Q-value 0.00
Updated Q-value for move 2 on board XO-OOXXXO: 0.00
Updated Q-value for move 7 on board XO-OOXX--: 0.50
Updated Q-value for move 5 on board X--OO-X--: 0.94
Updated Q-value for move 0 on board ----O-X--: 1.32
Updated Q-value for move 6 on board ---------: 1.64
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2126/20000 [12:17<2:22:38,  2.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2127/20000 [12:18<2:08:51,  2.31it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2128/20000 [12:18<1:57:07,  2.54it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2129/20000 [12:18<1:50:47,  2.69it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2130/20000 [12:19<1:44:38,  2.85it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2131/20000 [12:19<1:43:42,  2.87it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2132/20000 [12:19<1:47:18,  2.78it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2133/20000 [12:20<1:44:01,  2.86it/s]

Exploring: Chose move 5
Exploring: Chose move 8
Exploring: Chose move 7
Updated Q-value for move 7 on board O-OOXX--X: -1.00
Updated Q-value for move 8 on board O--OXX---: -0.08
Updated Q-value for move 5 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2134/20000 [12:20<1:45:03,  2.83it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2135/20000 [12:20<1:46:19,  2.80it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2136/20000 [12:21<1:44:21,  2.85it/s]

Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.95
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 1 on board O-OOXXX--: 0.50
Updated Q-value for move 6 on board O--OXX---: 0.95
Updated Q-value for move 5 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2137/20000 [12:21<1:39:17,  3.00it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2138/20000 [12:21<1:41:58,  2.92it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2139/20000 [12:22<1:41:36,  2.93it/s]

Exploring: Chose move 1
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2140/20000 [12:22<1:35:31,  3.12it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2141/20000 [12:22<1:36:27,  3.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2142/20000 [12:23<1:36:35,  3.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2143/20000 [12:23<1:39:25,  2.99it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2144/20000 [12:23<1:40:41,  2.96it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2145/20000 [12:24<1:38:46,  3.01it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2146/20000 [12:24<1:38:37,  3.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2147/20000 [12:24<1:40:42,  2.95it/s]

Exploring: Chose move 5
Exploring: Chose move 1
Updated Q-value for move 1 on board O--OXX---: -1.00
Updated Q-value for move 5 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 7


Training MENACE:  11%|█         | 2148/20000 [12:25<1:43:30,  2.87it/s]

Exploiting: Chose move 0 with Q-value 1.33
Exploiting: Chose move 2 with Q-value 0.94
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.50
Updated Q-value for move 2 on board XO----OX-: 0.94
Updated Q-value for move 0 on board -O-----X-: 1.33
Updated Q-value for move 7 on board ---------: 1.67
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2149/20000 [12:25<1:41:22,  2.93it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2150/20000 [12:25<1:42:30,  2.90it/s]

Exploring: Chose move 3
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 3 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2151/20000 [12:26<1:44:17,  2.85it/s]

Exploring: Chose move 6
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 1 on board O-O-X-X--: 0.95
Updated Q-value for move 6 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2152/20000 [12:26<1:41:39,  2.93it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2153/20000 [12:26<1:39:16,  3.00it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2154/20000 [12:27<1:40:49,  2.95it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2155/20000 [12:27<1:40:43,  2.95it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2156/20000 [12:28<1:58:57,  2.50it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2157/20000 [12:28<2:15:03,  2.20it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2158/20000 [12:29<2:22:52,  2.08it/s]

Exploring: Chose move 3
Exploring: Chose move 2
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 1 on board O-XXXOO--: 0.50
Updated Q-value for move 2 on board O--XXO---: 0.09
Updated Q-value for move 3 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2159/20000 [12:29<2:23:15,  2.08it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2160/20000 [12:30<2:27:45,  2.01it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2161/20000 [12:30<2:32:09,  1.95it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2162/20000 [12:31<2:17:30,  2.16it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 6


Training MENACE:  11%|█         | 2163/20000 [12:31<2:08:37,  2.31it/s]

Exploiting: Chose move 0 with Q-value 1.32
Exploring: Chose move 1
Updated Q-value for move 1 on board X--OO-X--: -1.00
Updated Q-value for move 0 on board ----O-X--: 1.32
Updated Q-value for move 6 on board ---------: 1.65
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2164/20000 [12:31<1:58:43,  2.50it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2165/20000 [12:32<1:51:18,  2.67it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2166/20000 [12:32<1:48:17,  2.74it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2167/20000 [12:32<1:48:16,  2.75it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2168/20000 [12:33<1:44:43,  2.84it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2169/20000 [12:33<1:41:55,  2.92it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2170/20000 [12:33<1:41:24,  2.93it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2171/20000 [12:34<1:41:07,  2.94it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2172/20000 [12:34<1:43:14,  2.88it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2173/20000 [12:34<1:46:00,  2.80it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2174/20000 [12:35<1:46:36,  2.79it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2175/20000 [12:35<1:48:34,  2.74it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2176/20000 [12:35<1:44:32,  2.84it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2177/20000 [12:36<1:37:40,  3.04it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2178/20000 [12:36<1:39:45,  2.98it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2179/20000 [12:36<1:39:29,  2.99it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2180/20000 [12:37<1:37:14,  3.05it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2181/20000 [12:37<1:41:40,  2.92it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2182/20000 [12:37<1:43:06,  2.88it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2183/20000 [12:38<1:38:24,  3.02it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2184/20000 [12:38<1:39:27,  2.99it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2185/20000 [12:38<1:42:34,  2.89it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2186/20000 [12:39<1:39:33,  2.98it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2187/20000 [12:39<1:39:17,  2.99it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2188/20000 [12:39<1:40:35,  2.95it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 6 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2189/20000 [12:40<1:38:02,  3.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2190/20000 [12:40<1:39:31,  2.98it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2191/20000 [12:41<1:46:53,  2.78it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2192/20000 [12:41<2:01:00,  2.45it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2193/20000 [12:42<2:11:28,  2.26it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2194/20000 [12:42<2:16:27,  2.17it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2195/20000 [12:43<2:21:59,  2.09it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2196/20000 [12:43<2:28:07,  2.00it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2197/20000 [12:44<2:32:42,  1.94it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2198/20000 [12:44<2:19:12,  2.13it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2199/20000 [12:44<2:12:29,  2.24it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board OXX-X-OO-: -1.00
Updated Q-value for move 2 on board OX--X--O-: -0.40
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2200/20000 [12:45<2:01:23,  2.44it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploring: Chose move 6
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2201/20000 [12:45<1:53:22,  2.62it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2202/20000 [12:45<1:50:18,  2.69it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploring: Chose move 3
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2203/20000 [12:46<1:48:47,  2.73it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2204/20000 [12:46<1:43:37,  2.86it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2205/20000 [12:46<1:45:06,  2.82it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2206/20000 [12:47<1:49:07,  2.72it/s]

Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2207/20000 [12:47<1:46:27,  2.79it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2208/20000 [12:48<1:47:10,  2.77it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2210/20000 [12:48<1:40:19,  2.96it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2211/20000 [12:49<1:38:07,  3.02it/s]

Exploring: Chose move 6
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 1 on board O-O-X-X--: 0.95
Updated Q-value for move 6 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2212/20000 [12:49<1:39:13,  2.99it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2213/20000 [12:49<1:42:00,  2.91it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2214/20000 [12:50<1:43:17,  2.87it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 1


Training MENACE:  11%|█         | 2215/20000 [12:50<1:42:44,  2.89it/s]

Exploiting: Chose move 8 with Q-value 0.42
Exploiting: Chose move 2 with Q-value 0.39
Exploiting: Chose move 3 with Q-value 0.28
Exploiting: Chose move 7 with Q-value 0.00
Updated Q-value for move 7 on board OXXXOOO-X: 0.00
Updated Q-value for move 3 on board OXX-OO--X: 0.31
Updated Q-value for move 2 on board OX--O---X: 0.43
Updated Q-value for move 8 on board OX-------: 0.46
Updated Q-value for move 1 on board ---------: 0.80
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2216/20000 [12:50<1:39:00,  2.99it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2217/20000 [12:51<1:40:17,  2.96it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2218/20000 [12:51<1:42:46,  2.88it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2219/20000 [12:51<1:41:00,  2.93it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2220/20000 [12:52<1:40:31,  2.95it/s]

Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.95
Exploiting: Chose move 1 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 1 on board O-OOXXX--: 0.50
Updated Q-value for move 6 on board O--OXX---: 0.95
Updated Q-value for move 5 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2221/20000 [12:52<1:40:04,  2.96it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2222/20000 [12:52<1:40:47,  2.94it/s]

Exploring: Chose move 6
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 1 on board O-O-X-X--: 0.95
Updated Q-value for move 6 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 0
Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 8
Updated Q-value for move 8 on board XO--OXOX-: -1.00
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2224/20000 [12:53<1:47:00,  2.77it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2225/20000 [12:53<1:46:50,  2.77it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2226/20000 [12:54<1:46:16,  2.79it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2227/20000 [12:54<2:00:17,  2.46it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:  11%|█         | 2228/20000 [12:55<2:15:19,  2.19it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 8
Updated Q-value for move 8 on board XO--OXOX-: -1.00
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2229/20000 [12:55<2:20:21,  2.11it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2230/20000 [12:56<2:26:13,  2.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 2


Training MENACE:  11%|█         | 2231/20000 [12:57<2:31:12,  1.96it/s]

Exploiting: Chose move 0 with Q-value 1.34
Exploiting: Chose move 7 with Q-value 0.94
Exploiting: Chose move 5 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.50
Updated Q-value for move 7 on board XOX-O----: 0.94
Updated Q-value for move 0 on board --X-O----: 1.34
Updated Q-value for move 2 on board ---------: 1.68
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2233/20000 [12:57<2:06:13,  2.35it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2234/20000 [12:58<1:58:24,  2.50it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 8
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 8 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2235/20000 [12:58<1:55:44,  2.56it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 4


Training MENACE:  11%|█         | 2236/20000 [12:58<1:51:53,  2.65it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2237/20000 [12:59<1:48:47,  2.72it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2238/20000 [12:59<1:47:21,  2.76it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2239/20000 [12:59<1:46:07,  2.79it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2240/20000 [13:00<1:40:36,  2.94it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2241/20000 [13:00<1:41:03,  2.93it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OOXXXOOX-: 0.00
Updated Q-value for move 3 on board OOX-X-OX-: 0.50
Updated Q-value for move 2 on board OO--X--X-: 0.95
Updated Q-value for move 7 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2242/20000 [13:00<1:42:42,  2.88it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2243/20000 [13:01<1:39:28,  2.98it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2244/20000 [13:01<1:44:50,  2.82it/s]

Exploring: Chose move 6
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 1 on board O-O-X-X--: 0.95
Updated Q-value for move 6 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2245/20000 [13:01<1:43:51,  2.85it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2246/20000 [13:02<1:45:14,  2.81it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2247/20000 [13:02<1:44:41,  2.83it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2248/20000 [13:02<1:44:32,  2.83it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█         | 2249/20000 [13:03<1:44:00,  2.84it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOXXOXO-: 0.00
Updated Q-value for move 3 on board OXO-X-XO-: 0.50
Updated Q-value for move 6 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█▏        | 2250/20000 [13:03<1:46:44,  2.77it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value 0.50
Exploring: Chose move 6
Updated Q-value for move 6 on board OXOXXO-OX: 0.00
Updated Q-value for move 3 on board OXO-X--OX: 0.50
Updated Q-value for move 8 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█▏        | 2251/20000 [13:04<1:45:12,  2.81it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█▏        | 2252/20000 [13:04<1:56:59,  2.53it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█▏        | 2253/20000 [13:04<1:53:33,  2.60it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█▏        | 2254/20000 [13:05<1:47:44,  2.75it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█▏        | 2255/20000 [13:05<1:47:23,  2.75it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█▏        | 2256/20000 [13:05<1:44:21,  2.83it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 2


Training MENACE:  11%|█▏        | 2257/20000 [13:06<1:41:31,  2.91it/s]

Exploiting: Chose move 0 with Q-value 1.34
Exploiting: Chose move 7 with Q-value 0.94
Exploiting: Chose move 5 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.50
Updated Q-value for move 7 on board XOX-O----: 0.95
Updated Q-value for move 0 on board --X-O----: 1.34
Updated Q-value for move 2 on board ---------: 1.68
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█▏        | 2258/20000 [13:06<1:41:26,  2.92it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72


Training MENACE:  11%|█▏        | 2260/20000 [13:07<1:40:52,  2.93it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploring: Chose move 5
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXOOXXXO-: 0.00
Updated Q-value for move 6 on board OX-OXX-O-: 0.50
Updated Q-value for move 5 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 4 with Q-value 1.72
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploring: Chose move 5
Exploiting: Chose move 0 with Q-value 1.31
Exploiting: Chose move 1 with Q-value 0.94
Exploiting: Chose mov

Training MENACE:  11%|█▏        | 2261/20000 [13:07<1:54:04,  2.59it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2262/20000 [13:08<2:03:33,  2.39it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2264/20000 [13:09<2:13:29,  2.21it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2265/20000 [13:09<2:18:37,  2.13it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 3
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2266/20000 [13:10<2:23:45,  2.06it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2267/20000 [13:10<2:24:00,  2.05it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 3
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2268/20000 [13:11<2:07:57,  2.31it/s]

Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.15
Exploiting: Chose move 7 with Q-value 0.10
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 7 on board XOXXO-O--: 0.14
Updated Q-value for move 2 on board X--XO-O--: 0.20
Updated Q-value for move 3 on board X---O----: 0.24
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2269/20000 [13:11<1:58:22,  2.50it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 3
Updated Q-value for move 3 on board XO--OXOX-: -1.00
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2270/20000 [13:11<1:53:24,  2.61it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploring: Chose move 2
Updated Q-value for move 2 on board XO--OX---: -1.00
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2271/20000 [13:12<1:50:25,  2.68it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploring: Chose move 7
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2272/20000 [13:12<1:48:04,  2.73it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 5


Training MENACE:  11%|█▏        | 2273/20000 [13:12<1:49:04,  2.71it/s]

Exploiting: Chose move 0 with Q-value 1.31
Exploiting: Chose move 1 with Q-value 0.94
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 6 on board XXOOOX---: 0.50
Updated Q-value for move 1 on board X-OO-X---: 0.94
Updated Q-value for move 0 on board --O--X---: 1.32
Updated Q-value for move 5 on board ---------: 1.63
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2274/20000 [13:13<1:46:13,  2.78it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 8
Updated Q-value for move 8 on board XO--OXOX-: -1.00
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2275/20000 [13:13<1:44:38,  2.82it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 8
Updated Q-value for move 8 on board XO--OXOX-: -1.00
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2276/20000 [13:13<1:44:53,  2.82it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 3
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2277/20000 [13:14<1:46:49,  2.77it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2278/20000 [13:14<1:45:01,  2.81it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2279/20000 [13:14<1:46:31,  2.77it/s]

Exploring: Chose move 8
Exploiting: Chose move 7 with Q-value 0.94
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 3
Updated Q-value for move 3 on board XOX-OOOXX: 0.00
Updated Q-value for move 2 on board XO--O-OXX: 0.50
Updated Q-value for move 7 on board XO--O---X: 0.94
Updated Q-value for move 8 on board X---O----: 0.30
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2280/20000 [13:15<1:44:46,  2.82it/s]

Exploring: Chose move 5
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72
Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2281/20000 [13:15<1:47:34,  2.75it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 3
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2282/20000 [13:16<1:49:41,  2.69it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploring: Chose move 7
Exploring: Chose move 3
Updated Q-value for move 3 on board XO--OXOX-: -1.00
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2283/20000 [13:16<1:42:24,  2.88it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 1


Training MENACE:  11%|█▏        | 2285/20000 [13:16<1:35:19,  3.10it/s]

Exploiting: Chose move 8 with Q-value 0.46
Exploiting: Chose move 2 with Q-value 0.43
Exploiting: Chose move 3 with Q-value 0.31
Exploiting: Chose move 7 with Q-value 0.00
Updated Q-value for move 7 on board OXXXOOO-X: 0.00
Updated Q-value for move 3 on board OXX-OO--X: 0.33
Updated Q-value for move 2 on board OX--O---X: 0.46
Updated Q-value for move 8 on board OX-------: 0.51
Updated Q-value for move 1 on board ---------: 0.82
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2286/20000 [13:17<1:35:20,  3.10it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2287/20000 [13:17<1:32:34,  3.19it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2288/20000 [13:17<1:34:38,  3.12it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2289/20000 [13:18<1:36:53,  3.05it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2290/20000 [13:18<1:35:29,  3.09it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2291/20000 [13:18<1:37:45,  3.02it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2292/20000 [13:19<1:38:58,  2.98it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 3
Updated Q-value for move 3 on board XO--OXOX-: -1.00
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2293/20000 [13:19<1:36:57,  3.04it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2294/20000 [13:19<1:39:30,  2.97it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2295/20000 [13:20<1:40:14,  2.94it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2296/20000 [13:20<1:42:34,  2.88it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72
Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2297/20000 [13:21<2:02:08,  2.42it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:  11%|█▏        | 2298/20000 [13:21<2:14:18,  2.20it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  11%|█▏        | 2299/20000 [13:22<2:21:19,  2.09it/s]

Exploring: Chose move 2
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 5 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.50
Updated Q-value for move 7 on board XOX-O----: 0.95
Updated Q-value for move 2 on board X---O----: 0.27
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2300/20000 [13:22<2:26:12,  2.02it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2301/20000 [13:23<2:26:06,  2.02it/s]

Exploring: Chose move 6
Exploiting: Chose move 5 with Q-value 0.94
Exploiting: Chose move 7 with Q-value 0.50
Exploiting: Chose move 2 with Q-value 0.00
Updated Q-value for move 2 on board XO-OOXXXO: 0.00
Updated Q-value for move 7 on board XO-OOXX--: 0.50
Updated Q-value for move 5 on board X--OO-X--: 0.94
Updated Q-value for move 6 on board X---O----: 0.33
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2302/20000 [13:23<2:30:38,  1.96it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2303/20000 [13:24<2:17:36,  2.14it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2304/20000 [13:24<2:06:14,  2.34it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2305/20000 [13:24<1:58:08,  2.50it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2307/20000 [13:25<1:43:37,  2.85it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 3
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2308/20000 [13:25<1:42:56,  2.86it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2309/20000 [13:26<1:40:59,  2.92it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value -1.00
Updated Q-value for move 2 on board XO-XOXO--: -1.00
Updated Q-value for move 3 on board XO--OX---: -0.26
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2310/20000 [13:26<1:40:03,  2.95it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board XOO-OX--X: -1.00
Updated Q-value for move 8 on board XO--OX---: 0.00
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2311/20000 [13:26<1:42:30,  2.88it/s]

Exploring: Chose move 1
Exploiting: Chose move 6 with Q-value 0.16
Exploiting: Chose move 5 with Q-value 0.14
Exploring: Chose move 8
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 5 on board XXOOO-X--: 0.17
Updated Q-value for move 6 on board XXO-O----: 0.21
Updated Q-value for move 1 on board X---O----: 0.28
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2312/20000 [13:27<1:41:54,  2.89it/s]

Exploring: Chose move 6
Exploring: Chose move 1
Updated Q-value for move 1 on board X--OO-X--: -1.00
Updated Q-value for move 6 on board X---O----: 0.43
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72
Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2313/20000 [13:27<1:45:46,  2.79it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2314/20000 [13:27<1:45:04,  2.81it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2315/20000 [13:28<1:43:02,  2.86it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:  12%|█▏        | 2316/20000 [13:28<1:41:52,  2.89it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2317/20000 [13:28<1:42:15,  2.88it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2318/20000 [13:29<1:43:32,  2.85it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72
Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2320/20000 [13:29<1:42:30,  2.87it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2321/20000 [13:30<1:41:48,  2.89it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 7
Exploiting: Chose move 0 with Q-value 1.33
Exploiting: Chose move 2 with Q-value 0.94
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.50
Updated Q-value for move 2 on board XO----OX-: 0.94
Updated Q-value for move 0 on board -O-----X-: 1.33
Updated Q-value for move 7 on board ---------: 1.67
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2322/20000 [13:30<1:45:55,  2.78it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2323/20000 [13:31<1:45:54,  2.78it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 8
Updated Q-value for move 8 on board XO--OXOX-: -1.00
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 2


Training MENACE:  12%|█▏        | 2324/20000 [13:31<1:40:09,  2.94it/s]

Exploiting: Chose move 0 with Q-value 1.34
Exploring: Chose move 5
Updated Q-value for move 5 on board XOX-O----: -1.00
Updated Q-value for move 0 on board --X-O----: 1.34
Updated Q-value for move 2 on board ---------: 1.68
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2325/20000 [13:31<1:44:35,  2.82it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2326/20000 [13:32<1:41:35,  2.90it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2327/20000 [13:32<1:42:57,  2.86it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2328/20000 [13:32<1:40:18,  2.94it/s]

Exploring: Chose move 5
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board XOO-OX--X: -1.00
Updated Q-value for move 8 on board XO--OX---: -0.04
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2329/20000 [13:33<1:38:09,  3.00it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2331/20000 [13:33<1:41:42,  2.90it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72
Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2332/20000 [13:34<1:57:11,  2.51it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2333/20000 [13:34<2:05:48,  2.34it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2334/20000 [13:35<2:16:26,  2.16it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2335/20000 [13:35<2:21:37,  2.08it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2336/20000 [13:36<2:24:37,  2.04it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2338/20000 [13:37<2:05:41,  2.34it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 3
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72
Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 5


Training MENACE:  12%|█▏        | 2339/20000 [13:37<1:58:34,  2.48it/s]

Exploiting: Chose move 0 with Q-value 1.32
Exploiting: Chose move 1 with Q-value 0.94
Exploiting: Chose move 6 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 6 on board XXOOOX---: 0.50
Updated Q-value for move 1 on board X-OO-X---: 0.94
Updated Q-value for move 0 on board --O--X---: 1.32
Updated Q-value for move 5 on board ---------: 1.64
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2340/20000 [13:37<1:57:40,  2.50it/s]

Exploring: Chose move 2
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 5 with Q-value 0.50
Exploring: Chose move 6
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.50
Updated Q-value for move 7 on board XOX-O----: 0.95
Updated Q-value for move 2 on board X---O----: 0.38
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2341/20000 [13:38<1:53:48,  2.59it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2342/20000 [13:38<1:45:52,  2.78it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2343/20000 [13:38<1:45:13,  2.80it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2344/20000 [13:39<1:42:56,  2.86it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2345/20000 [13:39<1:40:07,  2.94it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2346/20000 [13:39<1:40:17,  2.93it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2347/20000 [13:40<1:40:48,  2.92it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2348/20000 [13:40<1:35:57,  3.07it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2349/20000 [13:40<1:42:39,  2.87it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2350/20000 [13:41<1:40:00,  2.94it/s]

Exploring: Chose move 1
Exploiting: Chose move 6 with Q-value 0.21
Exploiting: Chose move 5 with Q-value 0.17
Exploring: Chose move 8
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 5 on board XXOOO-X--: 0.20
Updated Q-value for move 6 on board XXO-O----: 0.26
Updated Q-value for move 1 on board X---O----: 0.32
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:  12%|█▏        | 2351/20000 [13:41<1:39:36,  2.95it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2352/20000 [13:41<1:38:31,  2.99it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2353/20000 [13:42<1:40:08,  2.94it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value -1.00
Updated Q-value for move 2 on board XO-XOXO--: -1.00
Updated Q-value for move 3 on board XO--OX---: -0.27
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2354/20000 [13:42<1:40:57,  2.91it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 3
Updated Q-value for move 3 on board XO--OXOX-: -1.00
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2355/20000 [13:42<1:40:59,  2.91it/s]

Exploring: Chose move 2
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 5 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.50
Updated Q-value for move 7 on board XOX-O----: 0.95
Updated Q-value for move 2 on board X---O----: 0.47
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2356/20000 [13:43<1:42:53,  2.86it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2357/20000 [13:43<1:41:13,  2.90it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2358/20000 [13:44<1:42:50,  2.86it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 7


Training MENACE:  12%|█▏        | 2359/20000 [13:44<1:40:29,  2.93it/s]

Exploiting: Chose move 0 with Q-value 1.33
Exploiting: Chose move 2 with Q-value 0.94
Exploiting: Chose move 3 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 3 on board XOX-O-OX-: 0.50
Updated Q-value for move 2 on board XO----OX-: 0.94
Updated Q-value for move 0 on board -O-----X-: 1.33
Updated Q-value for move 7 on board ---------: 1.67
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2360/20000 [13:44<1:42:21,  2.87it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 3
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2361/20000 [13:45<1:41:18,  2.90it/s]

Exploring: Chose move 5
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2362/20000 [13:45<1:40:33,  2.92it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2363/20000 [13:45<1:40:55,  2.91it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72
Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2364/20000 [13:46<1:45:48,  2.78it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2365/20000 [13:46<1:42:27,  2.87it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value -1.00
Updated Q-value for move 2 on board XO-XOXO--: -1.00
Updated Q-value for move 3 on board XO--OX---: -0.29
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2366/20000 [13:46<1:39:03,  2.97it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2367/20000 [13:47<1:55:21,  2.55it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2368/20000 [13:47<2:05:13,  2.35it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2369/20000 [13:48<2:13:38,  2.20it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2370/20000 [13:48<2:20:42,  2.09it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 3
Updated Q-value for move 3 on board XO--OXOX-: -1.00
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2372/20000 [13:49<2:26:41,  2.00it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploring: Chose move 6
Updated Q-value for move 6 on board XO--OX---: -1.00
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2373/20000 [13:50<2:13:43,  2.20it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploring: Chose move 6
Updated Q-value for move 6 on board XO--OX---: -1.00
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2375/20000 [13:50<1:54:26,  2.57it/s]

Exploring: Chose move 6
Exploiting: Chose move 5 with Q-value 0.94
Exploiting: Chose move 7 with Q-value 0.50
Exploiting: Chose move 2 with Q-value 0.00
Updated Q-value for move 2 on board XO-OOXXXO: 0.00
Updated Q-value for move 7 on board XO-OOXX--: 0.50
Updated Q-value for move 5 on board X--OO-X--: 0.94
Updated Q-value for move 6 on board X---O----: 0.52
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72
Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2376/20000 [13:51<1:50:08,  2.67it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72
Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2377/20000 [13:51<1:48:53,  2.70it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2378/20000 [13:51<1:46:08,  2.77it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2379/20000 [13:52<1:42:16,  2.87it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2380/20000 [13:52<1:46:42,  2.75it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2381/20000 [13:53<1:45:21,  2.79it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2382/20000 [13:53<1:44:20,  2.81it/s]

Exploring: Chose move 6
Exploiting: Chose move 5 with Q-value 0.94
Exploiting: Chose move 7 with Q-value 0.50
Exploiting: Chose move 2 with Q-value 0.00
Updated Q-value for move 2 on board XO-OOXXXO: 0.00
Updated Q-value for move 7 on board XO-OOXX--: 0.50
Updated Q-value for move 5 on board X--OO-X--: 0.94
Updated Q-value for move 6 on board X---O----: 0.60
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2383/20000 [13:53<1:42:08,  2.87it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2384/20000 [13:54<1:40:17,  2.93it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2385/20000 [13:54<1:39:24,  2.95it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2386/20000 [13:54<1:42:44,  2.86it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploring: Chose move 6
Updated Q-value for move 6 on board XO--OX---: -1.00
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2387/20000 [13:55<1:40:20,  2.93it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2388/20000 [13:55<1:41:21,  2.90it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2389/20000 [13:55<1:43:22,  2.84it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2390/20000 [13:56<1:41:38,  2.89it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72
Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2391/20000 [13:56<1:45:17,  2.79it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2392/20000 [13:56<1:43:23,  2.84it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2394/20000 [13:57<1:38:26,  2.98it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72
Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2395/20000 [13:57<1:39:52,  2.94it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2397/20000 [13:58<1:36:10,  3.05it/s]

Exploring: Chose move 5
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2398/20000 [13:58<1:35:33,  3.07it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2399/20000 [13:59<1:32:48,  3.16it/s]

Exploring: Chose move 2
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 5 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.50
Updated Q-value for move 7 on board XOX-O----: 0.95
Updated Q-value for move 2 on board X---O----: 0.56
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2400/20000 [13:59<1:32:02,  3.19it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2401/20000 [13:59<1:37:13,  3.02it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 8
Updated Q-value for move 8 on board XO--OXOX-: -1.00
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2402/20000 [14:00<1:51:28,  2.63it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72
Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 8
Updated Q-value for move 8 on board XO--OXOX-: -1.00
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2404/20000 [14:01<2:13:05,  2.20it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72
Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 8


Training MENACE:  12%|█▏        | 2405/20000 [14:01<2:16:37,  2.15it/s]

Exploiting: Chose move 0 with Q-value 1.33
Exploiting: Chose move 7 with Q-value 0.94
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OOOXX: 0.00
Updated Q-value for move 2 on board XO--O-OXX: 0.50
Updated Q-value for move 7 on board XO--O---X: 0.94
Updated Q-value for move 0 on board ----O---X: 1.33
Updated Q-value for move 8 on board ---------: 1.66
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2406/20000 [14:02<2:23:16,  2.05it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2408/20000 [14:03<2:12:23,  2.21it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2409/20000 [14:03<2:04:26,  2.36it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72
Exploring: Chose move 7
Exploiting: Chose move 5 with Q-value 0.05
Exploiting: Chose move 6 with Q-value 0.10
Exploiting: Chose move 1 with Q-value 0.00
Updated Q-value for move 1 on board X-OOOXXXO: 0.00
Updated Q-value for move 6 on board X-OOOX-X-: 0.14
Updated Q-value for move 5 on board X--OO--X-: 0.11
Updated Q-value for move 7 on board X---O----: 0.42
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2410/20000 [14:03<1:58:27,  2.47it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2412/20000 [14:04<1:46:37,  2.75it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2413/20000 [14:04<1:39:30,  2.95it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2414/20000 [14:05<1:37:45,  3.00it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2415/20000 [14:05<1:37:57,  2.99it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2416/20000 [14:05<1:37:40,  3.00it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2417/20000 [14:06<1:39:51,  2.93it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploring: Chose move 6
Updated Q-value for move 6 on board XO--OX---: -1.00
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2418/20000 [14:06<1:39:57,  2.93it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 8
Updated Q-value for move 8 on board XO--OXOX-: -1.00
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2419/20000 [14:06<1:40:22,  2.92it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploring: Chose move 8
Exploring: Chose move 3
Updated Q-value for move 3 on board XOO-OX--X: -1.00
Updated Q-value for move 8 on board XO--OX---: -0.08
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2420/20000 [14:07<1:39:38,  2.94it/s]

Exploring: Chose move 7
Exploiting: Chose move 5 with Q-value 0.11
Exploiting: Chose move 6 with Q-value 0.14
Exploiting: Chose move 1 with Q-value 0.00
Updated Q-value for move 1 on board X-OOOXXXO: 0.00
Updated Q-value for move 6 on board X-OOOX-X-: 0.17
Updated Q-value for move 5 on board X--OO--X-: 0.16
Updated Q-value for move 7 on board X---O----: 0.44
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2421/20000 [14:07<1:38:57,  2.96it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2422/20000 [14:07<1:41:16,  2.89it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72
Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2423/20000 [14:08<1:44:03,  2.82it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 2


Training MENACE:  12%|█▏        | 2424/20000 [14:08<1:47:55,  2.71it/s]

Exploring: Chose move 0
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 5 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.50
Updated Q-value for move 7 on board XOX-O----: 0.95
Updated Q-value for move 0 on board --X-O----: 1.34
Updated Q-value for move 2 on board ---------: 1.69
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2425/20000 [14:09<1:58:48,  2.47it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2426/20000 [14:09<2:08:40,  2.28it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2427/20000 [14:10<2:18:51,  2.11it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2428/20000 [14:10<2:23:07,  2.05it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2429/20000 [14:11<2:31:02,  1.94it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2430/20000 [14:11<2:23:15,  2.04it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2431/20000 [14:12<2:08:40,  2.28it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploring: Chose move 7
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2432/20000 [14:12<1:57:07,  2.50it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2433/20000 [14:12<1:52:09,  2.61it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 8
Updated Q-value for move 8 on board XO--OXOX-: -1.00
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2434/20000 [14:13<2:00:07,  2.44it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2435/20000 [14:13<2:13:11,  2.20it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 3
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2436/20000 [14:14<2:21:42,  2.07it/s]

Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.20
Exploiting: Chose move 7 with Q-value 0.14
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 7 on board XOXXO-O--: 0.17
Updated Q-value for move 2 on board X--XO-O--: 0.24
Updated Q-value for move 3 on board X---O----: 0.29
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2437/20000 [14:14<2:21:28,  2.07it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 1


Training MENACE:  12%|█▏        | 2438/20000 [14:15<2:26:01,  2.00it/s]

Exploiting: Chose move 8 with Q-value 0.51
Exploiting: Chose move 2 with Q-value 0.46
Exploiting: Chose move 3 with Q-value 0.33
Exploiting: Chose move 7 with Q-value 0.00
Updated Q-value for move 7 on board OXXXOOO-X: 0.00
Updated Q-value for move 3 on board OXX-OO--X: 0.34
Updated Q-value for move 2 on board OX--O---X: 0.50
Updated Q-value for move 8 on board OX-------: 0.55
Updated Q-value for move 1 on board ---------: 0.83
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2440/20000 [14:16<2:13:04,  2.20it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72
Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 3
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2441/20000 [14:16<2:05:52,  2.32it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 5


Training MENACE:  12%|█▏        | 2443/20000 [14:17<1:48:20,  2.70it/s]

Exploiting: Chose move 0 with Q-value 1.32
Exploiting: Chose move 1 with Q-value 0.94
Exploring: Chose move 6
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 6 on board XXOOOX---: 0.50
Updated Q-value for move 1 on board X-OO-X---: 0.94
Updated Q-value for move 0 on board --O--X---: 1.32
Updated Q-value for move 5 on board ---------: 1.64
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2444/20000 [14:17<1:46:51,  2.74it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2445/20000 [14:17<1:44:50,  2.79it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72
Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2447/20000 [14:18<1:36:54,  3.02it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2448/20000 [14:18<1:42:25,  2.86it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72
Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2449/20000 [14:19<1:44:18,  2.80it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2450/20000 [14:19<1:43:33,  2.82it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2451/20000 [14:20<1:42:09,  2.86it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 1


Training MENACE:  12%|█▏        | 2452/20000 [14:20<1:41:50,  2.87it/s]

Exploiting: Chose move 8 with Q-value 0.55
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.34
Exploiting: Chose move 7 with Q-value 0.00
Updated Q-value for move 7 on board OXXXOOO-X: 0.00
Updated Q-value for move 3 on board OXX-OO--X: 0.36
Updated Q-value for move 2 on board OX--O---X: 0.53
Updated Q-value for move 8 on board OX-------: 0.60
Updated Q-value for move 1 on board ---------: 0.85
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2453/20000 [14:20<1:46:04,  2.76it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2454/20000 [14:21<1:45:28,  2.77it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2455/20000 [14:21<1:42:23,  2.86it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2456/20000 [14:21<1:46:45,  2.74it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72
Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2457/20000 [14:22<1:46:48,  2.74it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 0


Training MENACE:  12%|█▏        | 2458/20000 [14:22<1:46:55,  2.73it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 3
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2459/20000 [14:22<1:44:39,  2.79it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2460/20000 [14:23<1:41:57,  2.87it/s]

Exploring: Chose move 3
Exploring: Chose move 1
Updated Q-value for move 1 on board X--XO-O--: -1.00
Updated Q-value for move 3 on board X---O----: 0.33
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2461/20000 [14:23<1:39:33,  2.94it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2462/20000 [14:23<1:41:05,  2.89it/s]

Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.24
Exploiting: Chose move 7 with Q-value 0.17
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 7 on board XOXXO-O--: 0.20
Updated Q-value for move 2 on board X--XO-O--: 0.29
Updated Q-value for move 3 on board X---O----: 0.38
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2463/20000 [14:24<1:38:32,  2.97it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2464/20000 [14:24<1:36:18,  3.03it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 2


Training MENACE:  12%|█▏        | 2465/20000 [14:24<1:40:05,  2.92it/s]

Exploring: Chose move 6
Exploiting: Chose move 3 with Q-value 0.00
Exploiting: Chose move 5 with Q-value -1.00
Updated Q-value for move 5 on board OOXXO-X--: -1.00
Updated Q-value for move 3 on board -OX-O-X--: -0.04
Updated Q-value for move 6 on board --X-O----: 0.05
Updated Q-value for move 2 on board ---------: 1.69
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2466/20000 [14:25<1:38:03,  2.98it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2467/20000 [14:25<1:39:55,  2.92it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2468/20000 [14:25<1:43:22,  2.83it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2469/20000 [14:26<1:55:09,  2.54it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2470/20000 [14:26<2:06:14,  2.31it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2471/20000 [14:27<2:14:21,  2.17it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2472/20000 [14:27<2:15:51,  2.15it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2473/20000 [14:28<2:22:03,  2.06it/s]

Exploring: Chose move 5
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2474/20000 [14:29<2:25:53,  2.00it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2475/20000 [14:29<2:12:19,  2.21it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value -1.00
Updated Q-value for move 2 on board XO-XOXO--: -1.00
Updated Q-value for move 3 on board XO--OX---: -0.30
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2476/20000 [14:29<2:03:39,  2.36it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2477/20000 [14:30<1:58:00,  2.47it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2478/20000 [14:30<1:48:41,  2.69it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 3
Updated Q-value for move 3 on board XO--OXOX-: -1.00
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:  12%|█▏        | 2479/20000 [14:30<1:44:57,  2.78it/s]

Exploiting: Chose move 4 with Q-value 1.25
Exploring: Chose move 2
Exploring: Chose move 1
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 1 on board O-XXXOO--: 0.50
Updated Q-value for move 2 on board O--XXO---: 0.18
Updated Q-value for move 4 on board O--X-----: 1.26
Updated Q-value for move 3 on board ---------: 1.46
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2480/20000 [14:31<1:42:10,  2.86it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2481/20000 [14:31<1:42:50,  2.84it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2482/20000 [14:31<1:40:55,  2.89it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2483/20000 [14:32<1:41:35,  2.87it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2484/20000 [14:32<1:41:20,  2.88it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2485/20000 [14:32<1:41:41,  2.87it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2486/20000 [14:33<1:42:30,  2.85it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2487/20000 [14:33<1:41:45,  2.87it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72
Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2489/20000 [14:34<1:38:14,  2.97it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2490/20000 [14:34<1:38:36,  2.96it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 3
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2491/20000 [14:34<1:37:27,  2.99it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2492/20000 [14:35<1:35:11,  3.07it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2493/20000 [14:35<1:39:42,  2.93it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2494/20000 [14:35<1:39:59,  2.92it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72
Exploring: Chose move 6
Exploiting: Chose move 5 with Q-value 0.94
Exploiting: Chose move 7 with Q-value 0.50
Exploiting: Chose move 2 with Q-value 0.00
Updated Q-value for move 2 on board XO-OOXXXO: 0.00
Updated Q-value for move 7 on board XO-OOXX--: 0.50
Updated Q-value for move 5 on board X--OO-X--: 0.94
Updated Q-value for move 6 on board X---O----: 0.68
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2495/20000 [14:36<1:44:06,  2.80it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2496/20000 [14:36<1:46:21,  2.74it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2497/20000 [14:36<1:45:17,  2.77it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▏        | 2499/20000 [14:37<1:39:22,  2.94it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72
Exploring: Chose move 7
Exploiting: Chose move 5 with Q-value 0.16
Exploiting: Chose move 6 with Q-value 0.17
Exploiting: Chose move 1 with Q-value 0.00
Updated Q-value for move 1 on board X-OOOXXXO: 0.00
Updated Q-value for move 6 on board X-OOOX-X-: 0.20
Updated Q-value for move 5 on board X--OO--X-: 0.21
Updated Q-value for move 7 on board X---O----: 0.47
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  12%|█▎        | 2500/20000 [14:37<1:38:24,  2.96it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2501/20000 [14:38<1:37:10,  3.00it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2502/20000 [14:38<1:37:29,  2.99it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 5


Training MENACE:  13%|█▎        | 2503/20000 [14:38<1:36:29,  3.02it/s]

Exploiting: Chose move 0 with Q-value 1.32
Exploiting: Chose move 1 with Q-value 0.94
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 6 on board XXOOOX---: 0.50
Updated Q-value for move 1 on board X-OO-X---: 0.94
Updated Q-value for move 0 on board --O--X---: 1.32
Updated Q-value for move 5 on board ---------: 1.65
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2504/20000 [14:39<1:55:09,  2.53it/s]

Exploring: Chose move 5
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2505/20000 [14:39<2:05:08,  2.33it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2506/20000 [14:40<2:10:40,  2.23it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2507/20000 [14:41<2:19:14,  2.09it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2508/20000 [14:41<2:22:33,  2.04it/s]

Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.29
Exploiting: Chose move 7 with Q-value 0.20
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 7 on board XOXXO-O--: 0.23
Updated Q-value for move 2 on board X--XO-O--: 0.33
Updated Q-value for move 3 on board X---O----: 0.42
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2509/20000 [14:42<2:25:26,  2.00it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2510/20000 [14:42<2:13:25,  2.18it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2511/20000 [14:42<2:03:37,  2.36it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2512/20000 [14:43<1:56:17,  2.51it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2513/20000 [14:43<1:47:18,  2.72it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2514/20000 [14:43<1:49:51,  2.65it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2515/20000 [14:44<1:48:34,  2.68it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 3
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2516/20000 [14:44<1:43:28,  2.82it/s]

Exploring: Chose move 6
Exploiting: Chose move 5 with Q-value 0.94
Exploring: Chose move 2
Updated Q-value for move 2 on board XO-OOXX--: -1.00
Updated Q-value for move 5 on board X--OO-X--: 0.94
Updated Q-value for move 6 on board X---O----: 0.74
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2517/20000 [14:44<1:45:30,  2.76it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploring: Chose move 2
Updated Q-value for move 2 on board XO--OX---: -1.00
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2518/20000 [14:45<1:43:53,  2.80it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 8
Updated Q-value for move 8 on board XO--OXOX-: -1.00
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72
Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2519/20000 [14:45<1:50:13,  2.64it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2520/20000 [14:46<1:48:57,  2.67it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2522/20000 [14:46<1:39:34,  2.93it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72
Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2523/20000 [14:46<1:38:54,  2.94it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 4


Training MENACE:  13%|█▎        | 2524/20000 [14:47<1:40:55,  2.89it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 8
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2526/20000 [14:48<1:39:53,  2.92it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72
Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72
Exploiting: Chose move 5 with Q-value 1.36
Exploit

Training MENACE:  13%|█▎        | 2527/20000 [14:48<1:41:45,  2.86it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2528/20000 [14:48<1:42:08,  2.85it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2529/20000 [14:49<1:42:03,  2.85it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2530/20000 [14:49<1:41:28,  2.87it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2531/20000 [14:49<1:41:21,  2.87it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2532/20000 [14:50<1:42:04,  2.85it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:  13%|█▎        | 2533/20000 [14:50<1:41:25,  2.87it/s]

Exploiting: Chose move 4 with Q-value 1.26
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 4 on board O--X-----: 1.27
Updated Q-value for move 3 on board ---------: 1.47
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2534/20000 [14:50<1:40:19,  2.90it/s]

Exploring: Chose move 1
Exploiting: Chose move 6 with Q-value 0.26
Exploiting: Chose move 5 with Q-value 0.20
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 5 on board XXOOO-X--: 0.23
Updated Q-value for move 6 on board XXO-O----: 0.30
Updated Q-value for move 1 on board X---O----: 0.37
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2535/20000 [14:51<1:42:55,  2.83it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2536/20000 [14:51<1:42:53,  2.83it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2537/20000 [14:51<1:39:50,  2.92it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2538/20000 [14:52<1:45:26,  2.76it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2539/20000 [14:52<2:01:17,  2.40it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 2
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2540/20000 [14:53<2:11:23,  2.21it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploring: Chose move 2
Updated Q-value for move 2 on board XO--OX---: -1.00
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2541/20000 [14:53<2:19:25,  2.09it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2542/20000 [14:54<2:23:06,  2.03it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2543/20000 [14:54<2:27:24,  1.97it/s]

Exploring: Chose move 5
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 4


Training MENACE:  13%|█▎        | 2544/20000 [14:55<2:23:02,  2.03it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2545/20000 [14:55<2:08:08,  2.27it/s]

Exploring: Chose move 6
Exploiting: Chose move 5 with Q-value 0.94
Exploiting: Chose move 7 with Q-value 0.50
Exploiting: Chose move 2 with Q-value 0.00
Updated Q-value for move 2 on board XO-OOXXXO: 0.00
Updated Q-value for move 7 on board XO-OOXX--: 0.50
Updated Q-value for move 5 on board X--OO-X--: 0.94
Updated Q-value for move 6 on board X---O----: 0.80
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2546/20000 [14:56<2:01:33,  2.39it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2547/20000 [14:56<1:53:49,  2.56it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2548/20000 [14:56<1:48:39,  2.68it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2549/20000 [14:57<1:45:29,  2.76it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 3
Updated Q-value for move 3 on board XO--OXOX-: -1.00
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2550/20000 [14:57<1:43:46,  2.80it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2551/20000 [14:57<1:42:18,  2.84it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2552/20000 [14:58<1:41:07,  2.88it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 3
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2553/20000 [14:58<1:40:19,  2.90it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 4


Training MENACE:  13%|█▎        | 2554/20000 [14:58<1:41:28,  2.87it/s]

Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2555/20000 [14:59<1:41:39,  2.86it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploring: Chose move 8
Exploiting: Chose move 3 with Q-value -1.00
Updated Q-value for move 3 on board XOO-OX--X: -1.00
Updated Q-value for move 8 on board XO--OX---: -0.11
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2556/20000 [14:59<1:38:03,  2.97it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2557/20000 [14:59<1:40:01,  2.91it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2558/20000 [15:00<1:40:52,  2.88it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 8


Training MENACE:  13%|█▎        | 2559/20000 [15:00<1:42:35,  2.83it/s]

Exploiting: Chose move 0 with Q-value 1.33
Exploiting: Chose move 7 with Q-value 0.94
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OOOXX: 0.00
Updated Q-value for move 2 on board XO--O-OXX: 0.50
Updated Q-value for move 7 on board XO--O---X: 0.94
Updated Q-value for move 0 on board ----O---X: 1.33
Updated Q-value for move 8 on board ---------: 1.67
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2560/20000 [15:00<1:42:37,  2.83it/s]

Exploring: Chose move 2
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 5 with Q-value 0.50
Exploiting: Chose move 6 with Q-value 0.00
Updated Q-value for move 6 on board XOXOOX-XO: 0.00
Updated Q-value for move 5 on board XOXOO--X-: 0.50
Updated Q-value for move 7 on board XOX-O----: 0.95
Updated Q-value for move 2 on board X---O----: 0.64
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2561/20000 [15:01<1:39:40,  2.92it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploring: Chose move 6
Updated Q-value for move 6 on board XO--OX---: -1.00
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2562/20000 [15:01<1:41:52,  2.85it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2563/20000 [15:01<1:40:59,  2.88it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 3
Updated Q-value for move 3 on board XO--OXOX-: -1.00
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2564/20000 [15:02<1:40:28,  2.89it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2565/20000 [15:02<1:40:31,  2.89it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2566/20000 [15:02<1:38:11,  2.96it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 8
Updated Q-value for move 8 on board XO--OXOX-: -1.00
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2567/20000 [15:03<1:36:13,  3.02it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2568/20000 [15:03<1:37:21,  2.98it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 3
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2569/20000 [15:03<1:36:34,  3.01it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2570/20000 [15:04<1:37:02,  2.99it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2571/20000 [15:04<1:38:10,  2.96it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2572/20000 [15:04<1:38:48,  2.94it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 8


Training MENACE:  13%|█▎        | 2573/20000 [15:05<1:41:32,  2.86it/s]

Exploiting: Chose move 0 with Q-value 1.33
Exploiting: Chose move 7 with Q-value 0.94
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OOOXX: 0.00
Updated Q-value for move 2 on board XO--O-OXX: 0.50
Updated Q-value for move 7 on board XO--O---X: 0.94
Updated Q-value for move 0 on board ----O---X: 1.33
Updated Q-value for move 8 on board ---------: 1.67
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2574/20000 [15:05<1:56:46,  2.49it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2575/20000 [15:06<2:10:09,  2.23it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2576/20000 [15:06<2:16:37,  2.13it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 8


Training MENACE:  13%|█▎        | 2577/20000 [15:07<2:23:30,  2.02it/s]

Exploiting: Chose move 0 with Q-value 1.33
Exploiting: Chose move 7 with Q-value 0.94
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OOOXX: 0.00
Updated Q-value for move 2 on board XO--O-OXX: 0.50
Updated Q-value for move 7 on board XO--O---X: 0.94
Updated Q-value for move 0 on board ----O---X: 1.33
Updated Q-value for move 8 on board ---------: 1.67
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2578/20000 [15:07<2:25:23,  2.00it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 3


Training MENACE:  13%|█▎        | 2579/20000 [15:08<2:23:38,  2.02it/s]

Exploiting: Chose move 4 with Q-value 1.27
Exploiting: Chose move 1 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 1 on board O--XXO---: 0.95
Updated Q-value for move 4 on board O--X-----: 1.28
Updated Q-value for move 3 on board ---------: 1.49
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2580/20000 [15:08<2:07:38,  2.27it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2581/20000 [15:09<2:02:55,  2.36it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2582/20000 [15:09<1:58:01,  2.46it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploring: Chose move 3
Updated Q-value for move 3 on board XO--OXOX-: -1.00
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2583/20000 [15:09<1:55:02,  2.52it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2584/20000 [15:10<1:47:43,  2.69it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2585/20000 [15:10<1:46:48,  2.72it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2586/20000 [15:10<1:47:54,  2.69it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2587/20000 [15:11<1:49:44,  2.64it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploring: Chose move 3
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2588/20000 [15:11<1:46:51,  2.72it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2589/20000 [15:12<1:46:45,  2.72it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2590/20000 [15:12<1:44:00,  2.79it/s]

Exploring: Chose move 3
Exploiting: Chose move 2 with Q-value 0.33
Exploiting: Chose move 7 with Q-value 0.23
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XOXXOOOX-: 0.00
Updated Q-value for move 7 on board XOXXO-O--: 0.26
Updated Q-value for move 2 on board X--XO-O--: 0.37
Updated Q-value for move 3 on board X---O----: 0.46
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2591/20000 [15:12<1:42:19,  2.84it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2592/20000 [15:13<1:44:13,  2.78it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2593/20000 [15:13<1:39:39,  2.91it/s]

Exploring: Chose move 1
Exploiting: Chose move 6 with Q-value 0.30
Exploring: Chose move 5
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 5 on board XXOOO-X--: 0.26
Updated Q-value for move 6 on board XXO-O----: 0.34
Updated Q-value for move 1 on board X---O----: 0.41
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2594/20000 [15:13<1:40:56,  2.87it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploring: Chose move 5


Training MENACE:  13%|█▎        | 2595/20000 [15:14<1:43:08,  2.81it/s]

Exploiting: Chose move 0 with Q-value 1.32
Exploiting: Chose move 1 with Q-value 0.94
Exploiting: Chose move 6 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board XXOOOXXO-: 0.00
Updated Q-value for move 6 on board XXOOOX---: 0.50
Updated Q-value for move 1 on board X-OO-X---: 0.94
Updated Q-value for move 0 on board --O--X---: 1.33
Updated Q-value for move 5 on board ---------: 1.65
Exploring: Chose move 4
Exploiting: Chose move 1 with Q-value 1.35
Exploiting: Chose move 3 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 8 with Q-value 0.00
Updated Q-value for move 8 on board OXXXXOOO-: 0.00
Updated Q-value for move 2 on board OX-XXO-O-: 0.50
Updated Q-value for move 3 on board OX--X--O-: 0.95
Updated Q-value for move 1 on board O---X----: 1.35
Updated Q-value for move 4 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2596/20000 [15:14<1:42:37,  2.83it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72


Training MENACE:  13%|█▎        | 2598/20000 [15:15<1:38:45,  2.94it/s]

Exploiting: Chose move 5 with Q-value 1.36
Exploiting: Chose move 7 with Q-value 0.95
Exploiting: Chose move 2 with Q-value 0.50
Exploiting: Chose move 3 with Q-value 0.00
Updated Q-value for move 3 on board XOX-OXOXO: 0.00
Updated Q-value for move 2 on board XO--OXOX-: 0.50
Updated Q-value for move 7 on board XO--OX---: 0.95
Updated Q-value for move 5 on board X---O----: 1.36
Updated Q-value for move 0 on board ---------: 1.72
Exploiting: Chose move 0 with Q-value 1.72
