In [21]:
import datetime
import numpy as np
from random import choice
from math import log, sqrt
from itertools import combinations

In [18]:
class Board(object):
    def __init__(self):
        # Initialize tokens, development cards, noble tiles
        self.color = ['green', 'blue', 'red', 'white', 'black', 'yellow']
        # Tokens are ordered as follows:
        # green, blue, red, white, black, yellow
        self.tokens = np.array([7, 7, 7, 7, 7, 5])
        # Development cards
        # level, color, prestige, price(g, b, r, w, k, y)
        self.devs1 = np.array([
            # level 1 black
            [1, 4, 0, 1, 1, 1, 1, 0],
            [1, 4, 0, 1, 2, 1, 1, 0],
            [1, 4, 0, 0, 2, 1, 2, 0],
            [1, 4, 0, 1, 0, 3, 0, 1],
            [1, 4, 0, 2, 0, 1, 0, 0],
            [1, 4, 0, 2, 0, 0, 2, 0],
            [1, 4, 0, 3, 0, 0, 0, 0],
            [1, 4, 1, 0, 4, 0, 0, 0],
            # level 1 blue
            [1, 1, 0, 1, 0, 1, 1, 1],
            [1, 1, 0, 1, 0, 2, 1, 1],
            [1, 1, 0, 2, 0, 2, 1, 0],
            [1, 1, 0, 3, 1, 1, 0, 0],
            [1, 1, 0, 0, 0, 0, 1, 2],
            [1, 1, 0, 2, 0, 0, 0, 2],
            [1, 1, 0, 0, 0, 0, 0, 3],
            [1, 1, 1, 0, 0, 4, 0, 0],
            # level 1 white
            [1, 3, 0, 1, 1, 1, 0, 1],
            [1, 3, 0, 2, 1, 1, 0, 1],
            [1, 3, 0, 2, 2, 0, 0, 1],
            [1, 3, 0, 0, 1, 0, 3, 1],
            [1, 3, 0, 0, 0, 2, 0, 1],
            [1, 3, 0, 0, 2, 0, 0, 2],
            [1, 3, 0, 0, 3, 0, 0, 0],
            [1, 3, 1, 4, 0, 0, 0, 0],
            # level 1 green
            [1, 0, 0, 0, 1, 1, 1, 1],
            [1, 0, 0, 0, 1, 1, 1, 2],
            [1, 0, 0, 0, 1, 2, 0, 2],
            [1, 0, 0, 1, 3, 0, 1, 0],
            [1, 0, 0, 0, 1, 0, 2, 0],
            [1, 0, 0, 0, 2, 2, 0, 0],
            [1, 0, 0, 0, 0, 3, 0, 0],
            [1, 0, 1, 0, 0, 0, 0, 4],
            # level 1 red
            [1, 2, 0, 1, 1, 0, 1, 1],
            [1, 2, 0, 1, 1, 0, 2, 1],
            [1, 2, 0, 1, 0, 0, 2, 2],
            [1, 2, 0, 0, 0, 1, 1, 3],
            [1, 2, 0, 1, 2, 0, 0, 0],
            [1, 2, 0, 0, 0, 2, 2, 0],
            [1, 2, 0, 0, 0, 0, 3, 0],
            [1, 2, 1, 0, 0, 0, 4, 0]
        ])
        self.devs2 = np.array([
            # level 2 black
            [2, 4, 1, 2, 2, 0, 3, 0],
            [2, 4, 1, 3, 0, 0, 3, 2],
            [2, 4, 2, 4, 1, 2, 0, 0],
            [2, 4, 2, 5, 0, 3, 0, 0],
            [2, 4, 2, 0, 0, 0, 5, 0],
            [2, 4, 3, 0, 0, 0, 0, 6],
            # level 2 blue
            [2, 1, 1, 2, 2, 3, 0, 0],
            [2, 1, 1, 3, 2, 0, 0, 3],
            [2, 1, 2, 0, 3, 0, 5, 0],
            [2, 1, 2, 0, 0, 1, 2, 4],
            [2, 1, 2, 0, 5, 0, 0, 0],
            [2, 1, 3, 0, 6, 0, 0, 0],
            # level 2 white
            [2, 3, 1, 3, 0, 2, 0, 2],
            [2, 3, 1, 0, 3, 3, 2, 0],
            [2, 3, 2, 1, 0, 4, 0, 2],
            [2, 3, 2, 0, 0, 5, 0, 3],
            [2, 3, 2, 0, 0, 5, 0, 0],
            [2, 3, 3, 0, 0, 0, 6, 0],
            # level 2 green
            [2, 0, 1, 2, 0, 3, 3, 0],
            [2, 0, 1, 0, 3, 0, 2, 2],
            [2, 0, 2, 0, 2, 0, 4, 1],
            [2, 0, 2, 3, 5, 0, 0, 0],
            [2, 0, 2, 5, 0, 0, 0, 0],
            [2, 0, 3, 6, 0, 0, 0, 0],
            # level 2 red
            [2, 2, 1, 0, 0, 2, 2, 3],
            [2, 2, 1, 0, 3, 2, 0, 3],
            [2, 2, 2, 2, 4, 0, 1, 0],
            [2, 2, 2, 0, 0, 0, 3, 5],
            [2, 2, 2, 0, 0, 0, 0, 5],
            [2, 2, 3, 0, 0, 6, 0, 0]
        ])
        self.devs3 = np.array([
            # level 3 black
            [3, 4, 3, 5, 3, 3, 3, 0],
            [3, 4, 4, 0, 0, 7, 0, 0],
            [3, 4, 4, 3, 0, 6, 0, 3],
            [3, 4, 5, 0, 0, 7, 0, 3],
            # level 3 blue
            [3, 1, 3, 3, 0, 3, 3, 5],
            [3, 1, 4, 0, 0, 0, 7, 0],
            [3, 1, 4, 0, 3, 0, 6, 3],
            [3, 1, 5, 0, 3, 0, 7, 0],
            # level 3 white
            [3, 3, 3, 3, 3, 5, 0, 3],
            [3, 3, 4, 0, 0, 0, 0, 7],
            [3, 3, 4, 0, 0, 3, 3, 6],
            [3, 3, 5, 0, 0, 0, 3, 7],
            # level 3 green
            [3, 0, 3, 0, 3, 3, 5, 3],
            [3, 0, 4, 0, 7, 0, 0, 0],
            [3, 0, 4, 3, 6, 0, 3, 0],
            [3, 0, 5, 3, 7, 0, 0, 0],
            # level 3 red
            [3, 2, 3, 3, 5, 0, 3, 3],
            [3, 2, 4, 7, 0, 0, 0, 0],
            [3, 2, 4, 6, 3, 3, 0, 0],
            [3, 2, 5, 7, 0, 3, 0, 0]
        ])
        self.nobles = np.array([
            [3, 3, 3, 3, 0, 0],
            [3, 0, 3, 0, 3, 3],
            [3, 0, 0, 0, 4, 4],
            [3, 0, 4, 0, 4, 0],
            [3, 4, 4, 0, 0, 0],
            [3, 3, 3, 0, 3, 0],
            [3, 0, 0, 3, 3, 3],
            [3, 3, 0, 3, 0, 3],
            [3, 0, 0, 4, 0, 4],
            [3, 3, 0, 3, 0, 0]
        ])
        
    def start(self, player_count, main_player):
        # Returns a representation of the starting state of the game.
        # State is a tuple of current_player, main_player,
        # players, tokens, devs1, devs2, devs3, nobles
        # Player = 6 tokens + 5 devs + 3 reserves + 10 nobles 
        players = np.zeros((player_count, 24))
        tokens = np.copy(self.tokens)
        if player_count == 2:
            np.subtract(tokens, np.array([3, 3, 3, 3, 3, 0]))
        elif player_count == 3:
            np.subtract(tokens, np.array([2, 2, 2, 2, 2, 0]))
        devs1 = np.arange(self.devs1.shape[0])
        devs2 = np.arange(self.devs2.shape[0])
        devs3 = np.arange(self.devs3.shape[0])
        nobles = np.arange(self.nobles.shape[0])
        np.random.shuffle(devs1)
        np.random.shuffle(devs2)
        np.random.shuffle(devs3)
        np.random.shuffle(nobles)
        current_player = 0
        return (current_player, main_player, players, tokens, devs1, devs2, devs3, nobles)

    def current_player(self, state):
        # Takes the game state and returns the current player's
        # number.
        return state[0]

    def next_state(self, state, play):
        # Takes the game state, and the move to be applied.
        # Returns the new game state.
        pass

    def legal_plays(self, state_history):
        # Takes a sequence of game states representing the full
        # game history, and returns the full list of moves that
        # are legal plays for the current player.
        state = state_history[-1]
        current_player = state[0]
        main_player = state[1]
        players = state[2]
        tokens = state[3]
        devs1 = state[4]
        devs2 = state[5]
        devs3 = state[6]
        nobles = state[7]
        legal = []
        # take 3 tokens of diff colors
        avail_tokens1 = [t for t in range(tokens.shape[0]) if t > 0]
        for buy in combinations(avail_tokens1, 3):
            legal.append(('buy1', buy))
        # take 2 tokens of same colors **at least 4 tokens must be left
        avail_tokens2 = [t for t in range(tokens.shape[0]) if t >= 6]
        for buy in avail_tokens2:
            legal.append(('buy2', buy))
        # reserve 1 dev card and take 1 gold
        # purchase 1 dev card from table or reserved
        p = players[current_player]
        for faceup in devs1[0:4]:
            diff = np.subtract(p[0:5], self.devs1[faceup][3:8])
            if all(np.greater_equal(diff, np.zeros(5))):
                # buy normally
                pass
            elif np.sum(diff[np.where(diff < 0)]) + p[5] >= 0:
                # buy with some gold
                pass
            else
                # reserve
                pass
            

    def winner(self, state_history):
        # Takes a sequence of game states representing the full
        # game history.  If the game is now won, return the player
        # number.  If the game is still ongoing, return zero.  If
        # the game is tied, return a different distinct value, e.g. -1.
        pass

In [4]:
class MonteCarlo(object):
    def __init__(self, board, **kwargs):
        # Takes an instance of a Board and optionally some keyword
        # arguments.  Initializes the list of game states and the
        # statistics tables.
        self.board = board
        self.states = []
        seconds = kwargs.get('time', 30)
        self.calculation_time = datetime.timedelta(seconds=seconds)
        self.max_moves = kwargs.get('max_moves', 100)
        self.wins = {}
        self.plays = {}
        self.C = kwargs.get('C', 1.4)

    def update(self, state):
        # Takes a game state, and appends it to the history.
        self.states.append(state)

    def get_play(self):
        # Causes the AI to calculate the best move from the
        # current game state and return it.
        self.max_depth = 0
        state = self.states[-1]
        player = self.board.current_player(state)
        legal = self.board.legal_plays(self.states[:])

        # Bail out early if there is no real choice to be made.
        if not legal:
            return
        if len(legal) == 1:
            return legal[0]

        games = 0
        begin = datetime.datetime.utcnow()
        while datetime.datetime.utcnow() - begin < self.calculation_time:
            self.run_simulation()
            games += 1

        moves_states = [(p, self.board.next_state(state, p)) for p in legal]

        # Display the number of calls of `run_simulation` and the
        # time elapsed.
        print(games, datetime.datetime.utcnow() - begin)

        # Pick the move with the highest percentage of wins.
        percent_wins, move = max(
            (self.wins.get((player, S), 0) / self.plays.get((player, S), 1),
             p)
            for p, S in moves_states
        )

        # Display the stats for each possible play.
        for x in sorted(
            ((100 * self.wins.get((player, S), 0) / self.plays.get((player, S), 1),
              self.wins.get((player, S), 0),
              self.plays.get((player, S), 0), 
              p)
             for p, S in moves_states),
            reverse=True
        ):
            print("{3}: {0:.2f}% ({1} / {2})".format(*x))

        print("Maximum depth searched:", self.max_depth)

        return move

    def run_simulation(self):
        # Plays out a "random" game from the current position,
        # then updates the statistics tables with the result.
        plays, wins = self.plays, self.wins

        visited_states = set()
        states_copy = self.states[:]
        state = states_copy[-1]
        player = self.board.current_player(state)

        expand = True
        for t in range(1, self.max_moves + 1):
            legal = self.board.legal_plays(states_copy)
            moves_states = [(p, self.board.next_state(state, p)) for p in legal]

            if all(plays.get((player, S)) for p, S in moves_states):
                # If we have stats on all of the legal moves here, use them.
                log_total = log(sum(plays[(player, S)] for p, S in moves_states))
                value, move, state = max(
                    ((wins[(player, S)] / plays[(player, S)]) + self.C * sqrt(log_total / plays[(player, S)]), 
                    p, 
                    S)
                    for p, S in moves_states
                )
            else:
                # Otherwise, just make an arbitrary decision.
                move, state = choice(moves_states)

            states_copy.append(state)

            # `player` here and below refers to the player
            # who moved into that particular state.
            if expand and (player, state) not in plays:
                expand = False
                plays[(player, state)] = 0
                wins[(player, state)] = 0
                if t > self.max_depth:
                    self.max_depth = t

            visited_states.add((player, state))

            player = self.board.current_player(state)
            winner = self.board.winner(states_copy)
            if winner:
                break

        for player, state in visited_states:
            if (player, state) not in plays:
                continue
            plays[(player, state)] += 1
            if player == winner:
                wins[(player, state)] += 1


In [47]:
# settings
A = np.array([1, 2, 3, 4])
B = np.array([1, 4, 5])
for x in np.concatenate((A,B)):
    print(x)

1
2
3
4
1
4
5
