In [1]:
import numpy as np
import copy
import time

In [2]:
class Node:
    def __init__(self, game, value, player, parent, action):
        self.game = game
        self.value = value
        self.player = player
        self.parent = parent
        self.action = action
        self.childs = []
        self.S = 0
        self.N = 0

    def expand(self):
        actions = self.game.getPossibleActions()
        for action in actions:
            game = copy.deepcopy(self.game)
            game.updateBoard(action, self.value)
            child = Node(game, self.value * -1, self.player, self, action)
            self.childs.append(child)
            res = child.rollout()
            res = 1 if self.player == res else 0
            child.backpropogate(res)

    def rollout(self):
        game = copy.deepcopy(self.game)
        while True:
            for turn in [self.value, -self.value]:
                if len(game.getPossibleActions()) > 0:
                    action = np.random.choice(game.getPossibleActions())
                    
                    game.updateBoard(action, turn)
                    gameWon = game.checkVictory(action, turn)
                    if gameWon:
                        return turn
                    turn *= -1
                else:
                    return 0
    
    def backpropogate(self, value):
        node = self
        while node != None:
            node.S += value
            node.N += 1
            node = node.parent

    def select_child(self, C):
        if len(self.childs) > 0:
            UCBs = [child.UCB(C) for child in self.childs]
            max_i = np.argmax(UCBs)
            return self.childs[max_i]
        else:
            return None

    def UCB(self, C):
        if self.N > 0 and self.parent.N > 0:
            return (self.S / self.N) + C * (np.sqrt(np.log(self.parent.N)/self.N))
        else:
            return np.inf

class MCTS:
    def __init__(self, C, thinking_time):
        self.C = C
        self.thinking_time = thinking_time

    def think(self, game, value):
        root = Node(copy.deepcopy(game), value, value, None, None)
        start_time = time.time()
        expanded = 0
        while time.time() - start_time < self.thinking_time:
            node = self.select(root)
            node.expand()
            expanded += 1
        
        print(f"expanded: {expanded}")
        return root.select_child(self.C).action

    def select(self, node):
        selected = node.select_child(self.C)
        while selected != None:
            node = selected
            selected = node.select_child(self.C)
        return node

In [3]:
from game import gamerules

In [4]:
class MCTSPlayer(gamerules.Player):
    def __init__(self, name, mcts):
        super().__init__(name)
        self.mcts = mcts
    
    def getAction(self, board, value):
        action = self.mcts.think(board, value)
        return action

    def newGame(self, new_opponent):
        pass

In [5]:
from utils import play_game, test_games
from classes.player import RNGPlayer

In [6]:
mcts = MCTS(0.8, 1)
p1 = MCTSPlayer("Custom", mcts)
p2 = RNGPlayer()

In [7]:
test_games(p1, p2, 100)

expanded: 15
expanded: 17
expanded: 19
expanded: 20
expanded: 25
expanded: 23
expanded: 20
expanded: 25
expanded: 28
expanded: 39
expanded: 41
expanded: 51
expanded: 69
expanded: 75
expanded: 101
expanded: 115
expanded: 255
expanded: 4957
expanded: 8856
expanded: 16372
expanded: 24674
expanded: 18
expanded: 19
expanded: 21
expanded: 18
expanded: 17
expanded: 19
expanded: 32
expanded: 38
expanded: 34
expanded: 45
game pair 0, result: (1; -1)
expanded: 18
expanded: 18
expanded: 20
expanded: 23
expanded: 25
expanded: 32
expanded: 38
expanded: 40
expanded: 54
expanded: 46
expanded: 38
expanded: 46
expanded: 68
expanded: 75
expanded: 91
expanded: 123
expanded: 191
expanded: 4863
expanded: 10481
expanded: 16053
expanded: 26385
expanded: 18
expanded: 19
expanded: 19
expanded: 23
expanded: 22
expanded: 26
expanded: 27
expanded: 30
expanded: 36
expanded: 39
expanded: 46
expanded: 54
expanded: 70
expanded: 76
expanded: 94
expanded: 119
expanded: 309
expanded: 7991
game pair 1, result: (1; 1)
exp