In [1]:
import numpy as np

class Agent:
    
    def __init__(self, alpha=0.1, gamma=0.9, epsilon=0.1, mark="X", q_table=np.zeros([3**9, 9])):
        self.q_table = q_table 
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.mark = mark

    # Compress a tic tac toe board into a single number
    # We have 3^9 possible board states = 19683
    def encode_state(self, board):
        state = 0
        for i, mark in enumerate(board):
            if mark == 'X':
                state += 3**(8-i) * 2
            elif mark == 'O':
                state += 3**(8-i)
        return state
    
    # Function to update variable state
    def set_state(self, board):
        self.state = self.board_to_decimal(board)
    
    # Function to update variable next_state
    def set_next_state(self, board):
        self.next_state = self.board_to_decimal(board)

    # Function to choose action based on epsilon-greedy policy
    def choose_action(self, board):
        # Exploration strategy: choose a random action:
        if np.random.uniform(0, 1) < self.epsilon:
            valid_actions = [i for i in range(9) if board[i] == ' ']
            self.action =  np.random.choice(valid_actions)
        # Select the best action given the current state of the agent:
        else:
            q_values = self.q_table[self.state]
            valid_actions = [i for i in range(9) if board[i] == ' ']
            max_q = np.max(q_values[valid_actions])
            self.action = np.random.choice([a for a in valid_actions if q_values[a] == max_q])
    
    # Function to check if the agent won
    def check_win(self, board):
        for i in range(3):
            # Check row i
            if board[i*3:(i+1)*3] == [self.mark]*3:  
                return True
            # Check column i
            if board[i:(i+7):3] == [self.mark]*3:  
                return True
        # Check the 2 diagonals   
        if board[0:9:4] == [self.mark]*3:  # \
            return True
        if board[2:7:2] == [self.mark]*3:  # /
            return True
        return False
    
    # Define the available actions
    def get_actions(self, board):
        return [i for i in range(len(board)) if board[i] == " "]

    # Update the Q-value for a state-action pair
    def update_Q(self, reward):
        old_value = self.q_table[(self.state, self.action)]
        next_max = max(self.q_table[self.next_state])
        new_value = (1 - self.alpha) * old_value + self.alpha * (reward + self.gamma * next_max)
        self.q_table[(self.state, self.action)] = new_value

In [None]:
def train_two_agents(episodes=10000): 
    # train Q-table
    agent_1 = Agent(mark="X")
    agent_2 = Agent(mark="O")
    for episode in range(episodes):
        board = [' ']*9
        done = False

        while not done:
            # player X's turn
            agent_1.set_state(board)
            agent_1.choose_action(board)
            board[agent_1.action] = 'X'
            agent_1.set_next_state(board)

            if ' ' not in board or agent_1.check_win(board):
                done = True
                reward = -1 if agent_1.check_win(board) else 0
            else:
                # player O's turn
                agent_2.set_state(board)
                action = agent_2.choose_action(board)
                board[agent_2.action] = 'O'
                agent_2.set_next_state(board)

                if ' ' not in board or agent_2.check_win(board):
                    done = True
                    reward = 1 if agent_2.check_win(board) else 0
                else:
                    reward = 0

            # update Q-table
            agent_1.update_Q(-reward)
            agent_2.update_Q(reward)

In [39]:
# test trained Q-table
board = [' ']*9
done = False
agent_2.epsilon = 0
while not done:
    # Player's turn
    print('Current board:')
    print(board[0:3])
    print(board[3:6])
    print(board[6:9])
    print('')
    action = int(input('Enter your move (0-8): '))
    while board[action] != ' ':
        action = int(input('Invalid move. Enter your move (0-8): '))
    board[action] = 'X'

    if ' ' not in board or check_win(board, 'X') or check_win(board, 'O'):
        done = True
    else:
        # AI's turn
        agent_2.set_state(board)
        agent_2.choose_action(board)
        board[agent_2.action] = 'O'
        if ' ' not in board or check_win(board, 'X') or check_win(board, 'O'):
            done = True
            
# Determine winner
if check_win(board, 'X'):
    print('You win!')
elif check_win(board, 'O'):
    print('AI wins!')
else:
    print('Tie!')
print('')
print('Final board:')
print(board[0:3])
print(board[3:6])
print(board[6:9])

Current board:
[' ', ' ', ' ']
[' ', ' ', ' ']
[' ', ' ', ' ']

Enter your move (0-8): 4
Current board:
[' ', ' ', ' ']
[' ', 'X', ' ']
['O', ' ', ' ']



KeyboardInterrupt: Interrupted by user

In [42]:
# test trained Q-table
board = [' ']*9
done = False
agent_1.epsilon = 0
while not done:
    # AI's turn
    agent_1.set_state(board)
    agent_1.choose_action(board)
    board[agent_1.action] = 'X'
    print('Current board:')
    print(board[0:3])
    print(board[3:6])
    print(board[6:9])
    print('')
    if ' ' not in board or check_win(board, 'X') or check_win(board, 'O'):
        done = True
    else:
        action = int(input('Enter your move (0-8): '))
        while board[action] != ' ':
            action = int(input('Invalid move. Enter your move (0-8): '))
        board[action] = 'O'
            
# Determine winner
if check_win(board, 'O'):
    print('You win!')
elif check_win(board, 'X'):
    print('AI wins!')
else:
    print('Tie!')
print('')
print('Final board:')
print(board[0:3])
print(board[3:6])
print(board[6:9])

Current board:
[' ', ' ', 'X']
[' ', ' ', ' ']
[' ', ' ', ' ']

Enter your move (0-8): 4
Current board:
[' ', ' ', 'X']
[' ', 'O', ' ']
[' ', 'X', ' ']

Enter your move (0-8): 0
Current board:
['O', ' ', 'X']
[' ', 'O', ' ']
[' ', 'X', 'X']

Enter your move (0-8): 5
Current board:
['O', ' ', 'X']
[' ', 'O', 'O']
['X', 'X', 'X']

AI wins!

Final board:
['O', ' ', 'X']
[' ', 'O', 'O']
['X', 'X', 'X']


In [44]:
import pickle
pickle.dump(agent_1.q_table, open('agent1', "wb"))
pickle.dump(agent_2.q_table, open('agent2', "wb"))

In [None]:
# Play against AI
board = [' ']*9
done = False

while not done:
    # Player's turn
    print('Current board:')
    print(board[0:3])
    print(board[3:6])
    print(board[6:9])
    print('')
    action = int(input('Enter your move (0-8): '))
    while board[action] != ' ':
        action = int(input('Invalid move. Enter your move (0-8): '))
    board[action] = 'X'

    if ' ' not in board or check_win(board, 'X') or check_win(board, 'O'):
        done = True
    else:
        # AI's turn
        state = int(''.join([str([' ', 'X', 'O'].index(mark)) for mark in board]), 3)
        q_values = q_table[state]
        valid_actions = [i for i in range(9) if board[i] == ' ']
        max_q = np.max(q_values[valid_actions])
        action = np.random.choice([a for a in valid_actions if q_values[a] == max_q])
        
        print('AI moves to', action)
        board[action] = 'O'

        if ' ' not in board or check_win(board, 'X') or check_win(board, 'O'):
            done = True
            
# Determine winner
if check_win(board, 'X'):
    print('You win!')
elif check_win(board, 'O'):
    print('AI wins!')
else:
    print('It is a Tie!')
print('')
print('Final board:')
print(board[0:3])
print(board[3:6])
print(board[6:9])

In [11]:
import pygame

# Initialize Pygame
pygame.init()

# Define the window dimensions
WINDOW_SIZE = (300, 400)

# Set up the window display
screen = pygame.display.set_mode(WINDOW_SIZE)

# Set the title of the window
pygame.display.set_caption("Tic Tac Toe")

# Define the colors
BLACK = (0, 0, 0)
WHITE = (255, 255, 255)
GRAY = (128, 128, 128)
RED = (255, 0, 0)
BLUE = (0, 0, 255)

# Define the font for the text
font = pygame.font.Font(None, 50)
font_2 = pygame.font.Font(None, 40)

# Define the board and player variables
board = [[None, None, None], [None, None, None], [None, None, None]]
player = "X"

# Define a function to draw the board
def draw_board():
    screen.fill(WHITE)
    for row in range(3):
        for column in range(3):
            pygame.draw.rect(screen, BLACK, [(10 + 100 * column), (10 + 100 * row + 50), 80, 80], 2)
            if board[row][column] == 'X':
                text = font.render(board[row][column], True, RED)
                screen.blit(text, (35 + 100 * column, 35 + 100 * row + 50))
            elif board[row][column] == 'O':
                text = font.render(board[row][column], True, BLUE)
                screen.blit(text, (35 + 100 * column, 35 + 100 * row + 50))
    if winner != None:
        COLOR = RED if winner == 'X' else BLUE
        text = font.render(winner + " wins!", True, COLOR)
        screen.blit(text, (100, 360))
    elif all([all(row) for row in board]):
        text = font.render("Tie!", True, BLACK)
        screen.blit(text, (100, 360))
    restart_button = pygame.draw.rect(screen, GRAY, [90, 10, 120, 30])
    restart_text = font_2.render("Restart", True, BLACK)
    screen.blit(restart_text, (100, 15))
    
# Define a function to check for a winner
def check_win():
    for row in range(3):
        if board[row][0] == board[row][1] == board[row][2] != None:
            return board[row][0]
    for column in range(3):
        if board[0][column] == board[1][column] == board[2][column] != None:
            return board[0][column]
    if board[0][0] == board[1][1] == board[2][2] != None:
        return board[0][0]
    if board[0][2] == board[1][1] == board[2][0] != None:
        return board[0][2]
    if all([all(row) for row in board]):
        return "Tie"
    return None

# Define a function to reset the board
def reset_board():
    global board, player
    board = [[None, None, None], [None, None, None], [None, None, None]]
    player = "X"

# Game loop
running = True
while running:
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False
        elif event.type == pygame.MOUSEBUTTONDOWN:
            x, y = pygame.mouse.get_pos()
            if y < 50 and 90 <= x <= 210:
                reset_board()
            else:
                row = (y - 50) // 100
                column = x // 100
                if board[row][column] == None:
                    board[row][column] = player
                    player = "O" if player == "X" else "X"
    winner = check_win()
    draw_board()
    pygame.display.update()

# Quit Pygame
pygame.quit()

