In [2]:
import copy
import numpy as np
import pickle
import sys
import tkinter as tk

class Player(object):
    def __init__(self, mark):
        self.mark = mark

    def opponent_mark(self):
        if self.mark == 'X':
            return 'O'
        else:
            return 'X'
            
class HumanPlayer(Player):
    pass

class ComputerPlayer(Player):
    pass

class RandomPlayer(ComputerPlayer):
    def get_move(self, board):
        moves = board.available_moves()
        if moves:   
            return moves[np.random.choice(len(moves))]

class SafePlayer(ComputerPlayer):
    def get_move(self, board):
        moves = board.available_moves()
        if moves:
            for move in moves:
                next_board = board.get_next_board(move, self.mark)
                if next_board.winner() == self.mark:
                    return move
            for move in moves:
                next_board = board.get_next_board(move, self.opponent_mark())
                if next_board.winner() == self.opponent_mark():
                    return move
            return moves[np.random.choice(len(moves))]

class MCTSPlayer(SafePlayer):
    pass

In [3]:
class Board:
    def __init__(self, grid=np.ones((3,3))*np.nan):
        self.grid = grid

    def over(self):
        return (not np.any(np.isnan(self.grid))) or (self.winner() is not None)

    def place_mark(self, move, mark):
        num = Board.mark2num(mark)
        self.grid[tuple(move)] = num

    @staticmethod
    def mark2num(mark):
        d = {"X": 1, "O": 0}
        return d[mark]

    def available_moves(self):
        return [(i,j) for i in range(3) for j in range(3) if np.isnan(self.grid[i][j])]

    def get_next_board(self, move, mark):
        next_board = copy.deepcopy(self)
        next_board.place_mark(move, mark)
        return next_board

    def winner(self):
        rows = [self.grid[i,:] for i in range(3)]
        cols = [self.grid[:,j] for j in range(3)]
        diag = [np.array([self.grid[i,i] for i in range(3)])]
        cross_diag = [np.array([self.grid[2-i,i] for i in range(3)])]
        lanes = np.concatenate((rows, cols, diag, cross_diag))

        any_lane = lambda x: any([np.array_equal(lane, x) for lane in lanes])
        if any_lane(np.ones(3)):
            return "X"
        elif any_lane(np.zeros(3)):
            return "O"

    def make_key(self, mark):          
        fill_value = 9
        filled_grid = copy.deepcopy(self.grid)
        np.place(filled_grid, np.isnan(filled_grid), fill_value)
        return "".join(map(str, (list(map(int, filled_grid.flatten()))))) + mark

    def give_reward(self):                          
        if self.over():
            if self.winner() is not None:
                if self.winner() == "X":
                    return 1.0                      
                elif self.winner() == "O":
                    return -1.0                     
            else:
                return 0.5                          
        else:
            return 0.0

In [4]:
class Game:
    def __init__(self, master, player1, player2, Q_learn=None, Q={}, alpha=0.3, gamma=0.9):
        frame = tk.Frame()
        frame.grid()
        self.master = master
        master.title("Game")
        self.player1 = player1
        self.player2 = player2
        self.current_player = player1
        self.other_player = player2
        self.empty_text = ""
        self.board = Board()
        self.buttons = [[None, None, None], [None, None, None], [None, None, None]]
        for i in range(3):
            for j in range(3):
                self.buttons[i][j] = tk.Button(frame, height=3, width=3, text=self.empty_text, command=lambda i=i, j=j: self.callback(self.buttons[i][j]))
                self.buttons[i][j].grid(row=i, column=j)
        self.reset_button = tk.Button(text="Play again", command=self.reset)
        self.reset_button.grid(row=3)
        self.winner = None

    def callback(self, button):
        if self.board.over():
            pass                
        else:
            if isinstance(self.current_player, HumanPlayer) and isinstance(self.other_player, HumanPlayer):
                if self.empty(button):
                    move = self.get_move(button)
                    self.handle_move(move)
            elif isinstance(self.current_player, HumanPlayer) and isinstance(self.other_player, ComputerPlayer):
                computer_player = self.other_player
                if self.empty(button):
                    human_move = self.get_move(button)
                    self.handle_move(human_move)
                    if not self.board.over():               
                        computer_move = computer_player.get_move(self.board)
                        self.handle_move(computer_move)

    def empty(self, button):
        return button["text"] == self.empty_text

    def get_move(self, button):
        info = button.grid_info()
        move = (int(info["row"]), int(info["column"]))                
        return move

    def handle_move(self, move):
        i, j = move         
        self.buttons[i][j].configure(text=self.current_player.mark)     
        self.board.place_mark(move, self.current_player.mark)           
        if self.board.over():
            self.declare_outcome()
        else:
            self.switch_players()

    def declare_outcome(self):
        if self.board.winner() is None:
            print("Tie!")
        else:
            print(("The player with mark {mark} won!".format(mark=self.current_player.mark)))
            self.winner = self.current_player.mark

    def reset(self):
        for i in range(3):
            for j in range(3):
                self.buttons[i][j].configure(text=self.empty_text)
        self.board = Board(grid=np.ones((3,3))*np.nan)
        self.current_player = self.player1
        self.other_player = self.player2
        self.act()

    def switch_players(self):
        self.current_player, self.other_player = self.other_player, self.current_player

    def act(self):
        if isinstance(self.player1, HumanPlayer):
            pass
        elif isinstance(self.player1, ComputerPlayer) and isinstance(self.player2, HumanPlayer):
            first_computer_move = player1.get_move(self.board)
            self.handle_move(first_computer_move)
        elif isinstance(self.player1, ComputerPlayer) and isinstance(self.player2, ComputerPlayer):
            while not self.board.over():
                self.play_turn()

    def play_turn(self):
        move = self.current_player.get_move(self.board)
        self.handle_move(move)

In [5]:
# root = tk.Tk()
# playerType = ""
# player1 = HumanPlayer(mark="X")
# if playerType == "SafeAgent":
#     player2 = SafePlayer(mark="O")
# elif playerType == "RandomAgent":
#     player2 = RandomPlayer(mark="O")
# elif playerType == "HumanAgent":
#     player2 = HumanPlayer(mark="O")

# game = Game(root, player1, player2)

# game.act()
# root.mainloop()

In [6]:
games = 1000
win = 0
root = tk.Tk()
player1 = MCTSPlayer(mark="X")
player2 = RandomPlayer(mark="O")
game = Game(root, player1, player2)

while(games):
    game.act()
    if game.winner == "X":
        win = win + 1
    game.reset()
    games = games - 1


ayer with mark X won!
The player with mark X won!
The player with mark X won!
The player with mark X won!
The player with mark X won!
The player with mark X won!
The player with mark X won!
The player with mark X won!
The player with mark X won!
The player with mark O won!
The player with mark X won!
Tie!
The player with mark X won!
The player with mark X won!
The player with mark X won!
The player with mark X won!
The player with mark X won!
The player with mark X won!
The player with mark X won!
The player with mark X won!
The player with mark X won!
The player with mark X won!
The player with mark X won!
The player with mark X won!
The player with mark X won!
The player with mark X won!
The player with mark X won!
Tie!
Tie!
Tie!
Tie!
The player with mark X won!
The player with mark X won!
The player with mark X won!
The player with mark X won!
The player with mark X won!
The player with mark X won!
The player with mark X won!
The player with mark X won!
The player with mark X won!
T

In [7]:
print(f"MCTSPlayer won {win} times against RandomPlayer")

MCTSPlayer won 979 times against RandomPlayer


In [8]:
games = 1000
win = 0
root = tk.Tk()
player1 = MCTSPlayer(mark="X")
player2 = MCTSPlayer(mark="O")
game = Game(root, player1, player2)

while(games):
    game.act()
    if game.winner == "X":
        win = win + 1
    game.reset()
    games = games - 1

The player with mark O won!
The player with mark X won!
Tie!
The player with mark O won!
Tie!
The player with mark X won!
Tie!
Tie!
The player with mark X won!
The player with mark O won!
The player with mark X won!
Tie!
The player with mark X won!
The player with mark X won!
The player with mark X won!
Tie!
Tie!
Tie!
The player with mark O won!
Tie!
The player with mark O won!
Tie!
Tie!
The player with mark X won!
The player with mark X won!
The player with mark X won!
The player with mark O won!
Tie!
The player with mark X won!
The player with mark X won!
Tie!
The player with mark X won!
Tie!
Tie!
Tie!
Tie!
The player with mark X won!
The player with mark X won!
The player with mark X won!
Tie!
The player with mark X won!
The player with mark X won!
The player with mark X won!
Tie!
The player with mark X won!
Tie!
The player with mark O won!
Tie!
Tie!
Tie!
The player with mark O won!
Tie!
Tie!
Tie!
Tie!
The player with mark X won!
The player with mark O won!
The player with mark X wo

In [9]:
print(f"MCTSPlayer won {win} times against MCTSPlayer")

MCTSPlayer won 622 times against MCTSPlayer
