### Connect Four implementation

In [18]:
class Board:

    WIDTH: int = 7
    HEIGHT: int = 6
    TOKENS: list = [
        [7, 4], # Horizontal Connect-Four
        [1, 2], # Vertical Connect-Four
        [6, 12], # Diagonal Connect-Four
        [8, 16] # Diagonal Connect-Four
    ]

    def __init__(self) -> None:
        '''
        position: bitstring representing the positions of the tokens of a player
        mask: bitstring representing the positions of both players
        '''
        self.position: int = 0
        self.mask: int = 0
        self.moves = 0
    
    def clone(self):
        result: Board = Board()
        result.position = self.position
        result.mask = self.mask
        result.moves = self.moves
        return result
    
    def is_playable(self, col: int) -> bool:
        top_mask: int = 1 << (Board.HEIGHT - 1) << col * (Board.HEIGHT + 1)
        return (self.mask & top_mask) == 0

    def connected_four(self) -> bool:
        line: int

        # Use the position bitboard to determine a win
        for [a, b] in Board.TOKENS:
            line = self.position & (self.position >> a)
            if (line & (line >> b)):
                return True

        return False
    
    def move(self, col: int) -> bool:
        if not (0 <= col < Board.WIDTH):
            raise ValueError
        
        if not self.is_playable(col):
            raise ValueError

        bottom_mask: int = 1 << (col * (Board.HEIGHT + 1))
        self.position ^= self.mask
        self.mask |= self.mask + bottom_mask
        self.moves += 1

        return self.connected_four()
    
    def get_valid_moves(self) -> list:
        return [c for c in range(Board.WIDTH) if self.is_playable(c)]

Code to run a simulation between two players

In [21]:
# players = [player_one, player_two]
def run_simulations(iterations: int, players: list) -> list:
    wins: list = [
        0, # player one's wins
        0  # player two's wins
    ]

    game: Board
    action: int # which column will be chosen by the Player class
    player: int # whether we're player 0 or 1

    for i in range(iterations):
        if i % 100 == 0:
            print(f"{i} games completed.")
        
        game = Board()

        while len(game.get_valid_moves()) > 0:
            player = game.moves % 2
            action = players[player].move(game)
            if game.move(action):
                wins[player] += 1
                break
    
    return wins
        

Abstract Class that different players will inherit

In [11]:
from abc import ABC, abstractmethod

class BasePlayer:
    def __init__(self):
        pass
    
    @abstractmethod
    def move(self, game):
        pass


Random player which randomly chooses a move from the valid moves

In [12]:
import random

class RandomPlayer(BasePlayer):
    def __init__(self):
        pass

    def move(self, game):
        return random.choice(game.get_valid_moves())

Quick test to make sure that the game works correctly - the wins for each player should be roughly half and there should be very little draws

In [25]:
p1 = RandomPlayer()
p2 = RandomPlayer()
wins = run_simulations(1000, [p1, p2])
print("Random Player 1 wins: " + str(wins[0]))
print("Random Player 2 wins: " + str(wins[1]))
print("Draws: " + str(1000 - wins[0] - wins[1]))

0 games completed.
100 games completed.
200 games completed.
300 games completed.
400 games completed.
500 games completed.
600 games completed.
700 games completed.
800 games completed.
900 games completed.
Random Player 1 wins: 467
Random Player 2 wins: 533
Draws: 0


# MCTS Implementation

In [6]:
import math
import time
import copy

class Node:
    def __init__(self, game, game_copy, predecessor):
        self.game = game 
        self.game_copy = game_copy
        self.predecessor = predecessor
        self.actions = game.get_valid_moves()
        self.successors = []
        self.payoff = 0
        self.num_paths = 0
    
class MCTSPlayer(BasePlayer):
    def __init__(self, time_limit):
        self.time_limit = time_limit

    def move(self, game, player):
        game_cpy = game.clone()
        root = Node(game_cpy, None, None)
        time_end = time.time() + self.time_limit
        while time.time() < time_end:
            curr_node = root
            while not curr_node.game.is_terminal:
                if len(curr_node.actions) != 0:
                    break
                vals = []
                for successor in curr_node.successors:
                    if successor.predecessor.game.player_to_move == 1:
                        vals.append(successor.payoff / successor.num_paths - 2 * math.sqrt(2) * math.sqrt(math.log(successor.predecessor.num_paths) / successor.num_paths))
                    else:
                        vals.append(successor.payoff / successor.num_paths + 2 * math.sqrt(2) * math.sqrt(math.log(successor.predecessor.num_paths) / successor.num_paths))
                if(curr_node.game.player_to_move == 1):
                    i = vals.index(min(vals))
                else:
                    i = vals.index(max(vals))
                curr_node = curr_node.successors[i]
            if not curr_node.game.is_terminal:
                action = curr_node.actions.pop()
                game_copy = curr_node.game
                game_copy.move(action)
                next_node = Node(game_copy, action, curr_node)
                curr_node.successors.append(next_node)
                curr_state = next_node
                while not curr_state.game.is_terminal:
                    action = random.choice(curr_state.actions)
                    game_copy = curr_node.game
                    game_copy.move(action)
                    new_state = Node(game_copy, action, curr_state)
                    curr_state = new_state
                if curr_state.game.winner is None:
                    payoff = 0
                elif curr_state.game.winner == player:
                    payoff = 1
                else:
                    payoff = -1
                tmp = next_node
                while(tmp is not None):
                    tmp.num_paths += 1
                    tmp.payoff += payoff
                    tmp = tmp.predecessor
            else:
                if curr_node.game.winner is None:
                    payoff = 0
                elif curr_node.game.winner == player:
                    payoff = 1
                else:
                    payoff = -1
                tmp = curr_node
                while(tmp is not None):
                    tmp.num_paths += 1
                    tmp.payoff += payoff
                    tmp = tmp.predecessor
        exploitations = []
        for successor in root.successors:
            exploitations.append(successor.payoff / successor.num_paths)
        if(root.game.player_to_move == 1):
            i = exploitations.index(min(exploitations))
        else:
            i = exploitations.index(max(exploitations))
        return root.successors[i].game_copy

Results of MCTS vs random player

In [7]:
p1 = MCTSPlayer(0.01)
p2 = RandomPlayer()
wins = run_simulations(1000, [p1, p2])
print("MCTS Player wins: " + str(wins[p1]))
print("Random Player wins: " + str(wins[p2]))
print("Draws: " + str(1000 - wins[p1] - wins[p2]))

0 games done
100 games done
200 games done
300 games done
400 games done
500 games done
600 games done
700 games done
800 games done
900 games done
MCTS Player wins: 785
Random Player wins: 215
Draws: 0


In [None]:
import keras
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
import os

class NeuralNetwork:
    def __init__(self):
        self.model_file = 'model.h5'
        model = Sequential()
        model.add(Conv2D(64, (4,4), input_shape=(6, 7, 1)))
        model.add(Activation('relu'))
        model.add(Conv2D(64, (2, 2)))
        model.add(Activation('relu'))
        model.add(Conv2D(64, (2, 2)))
        model.add(Activation('relu'))
        model.add(Flatten())
        model.add(Dense(64))
        model.add(Activation('relu'))
        model.add(Dense(1))
        model.compile(loss = 'mean_squared_error', optimizer = keras.optimizers.Adagrad(), metrics=['accuracy'])
        self.model = model
        if os.path.isfile(model_file):
            self.model = load_model(model_file)
        