In [1]:
from board import Board

In [None]:
class MCTS(object):
    """使用Monte Carlo Tree Search with UCB寻求最佳路径  """
    '''初始化棋盘状态，默认计算时间3秒，1000次'''
    def __init__(self, board, time=3, max_actions=1000):

        self.board = board  #起始棋盘，寻找下一步最佳落子位置
        self.player = 'x' # 电脑寻找最佳落子，所以第一个一定是'x'
        self.nextmove = ()
        
        self.players = ['x', 'o'] # 出手顺序 x先手，代表计算机
        self.c_time = float(time) # 最大运算时间
        self.max_actions = max_actions # 每次模拟对局最多进行的步数

        
        self.confident = 1.96 # UCB中的常数
        self.plays = {} # 记录着法参与模拟的次数，键形如(player, move)，即（玩家，落子）
        self.wins = {} # 记录着法获胜的次数
        self.max_depth = 1

    def get_action(self): # return move

        if len(self.board.availables) == 1:
            return self.board.availables[0] # 棋盘只剩最后一个落子位置，直接返回

        # 每次计算下一步时都要清空plays和wins表，因为经过AI和玩家的2步棋之后，
        #整个棋盘的局面发生了变化，原来的记录已经不适用了——原先普通的一步
        #现在可能是致胜的一步，如果不清空，会影响现在的结果，导致这一步可能
        #没那么“致胜”了
        self.plays = {} 
        self.wins = {}
        simulations = 0
        begin = time.time()
        while time.time() - begin < self.calculation_time:
            # 模拟会修改board的参数，所以必须进行深拷贝，与原board进行隔离
            board_copy = copy.deepcopy(self.board) 
            # 每次模拟都必须按照固定的顺序进行，所以进行深拷贝防止顺序被修改
            play_turn_copy = copy.deepcopy(self.play_turn) 
            self.run_simulation(board_copy, play_turn_copy) # 进行MCTS
            simulations += 1

        print("total simulations=", simulations)

        move = self.select_one_move() # 选择最佳着法
        location = self.board.move_to_location(move)
        print('Maximum depth searched:', self.max_depth)

        print("AI move: %d,%d\n" % (location[0], location[1]))

        return move

    def run_simulation(self, board, play_turn):
        """
        MCTS main process
        """

        plays = self.plays
        wins = self.wins
        availables = board.availables

        player = self.get_player(play_turn) # 获取当前出手的玩家
        visited_states = set() # 记录当前路径上的全部着法
        winner = -1
        expand = True

        # Simulation
        for t in range(1, self.max_actions + 1):
            # Selection
            # 如果所有着法都有统计信息，则获取UCB最大的着法
            if all(plays.get((player, move)) for move in availables):
                log_total = log(
                    sum(plays[(player, move)] for move in availables))
                value, move = max(
                    ((wins[(player, move)] / plays[(player, move)]) +
                     sqrt(self.confident * log_total / plays[(player, move)]), move)
                    for move in availables) 
            else:
                # 否则随机选择一个着法
                move = choice(availables)

            board.update(player, move)

            # Expand
            # 每次模拟最多扩展一次，每次扩展只增加一个着法
            if expand and (player, move) not in plays:
                expand = False
                plays[(player, move)] = 0
                wins[(player, move)] = 0
                if t > self.max_depth:
                    self.max_depth = t

            visited_states.add((player, move))

            is_full = not len(availables)
            win, winner = self.has_a_winner(board)
            if is_full or win: # 游戏结束，没有落子位置或有玩家获胜
                break

            player = self.get_player(play_turn)

        # Back-propagation
        for player, move in visited_states:
            if (player, move) not in plays:
                continue
            plays[(player, move)] += 1 # 当前路径上所有着法的模拟次数加1
            if player == winner:
                wins[(player, move)] += 1 # 获胜玩家的所有着法的胜利次数加1

    def get_player(self, players):
        p = players.pop(0)
        players.append(p)
        return p

    def select_one_move(self):
        percent_wins, move = max(
            (self.wins.get((self.player, move), 0) /
             self.plays.get((self.player, move), 1),
             move)
            for move in self.board.availables) # 选择胜率最高的着法

        return move

    def has_a_winner(self, board):
        """
        检查是否有玩家获胜
        """
        moved = list(set(range(board.width * board.height)) - set(board.availables))
        if(len(moved) < self.n_in_row + 2):
            return False, -1

        width = board.width
        height = board.height
        states = board.states
        n = self.n_in_row
        for m in moved:
            h = m // width
            w = m % width
            player = states[m]

            if (w in range(width - n + 1) and
                len(set(states[i] for i in range(m, m + n))) == 1): # 横向连成一线
                return True, player

            if (h in range(height - n + 1) and
                len(set(states[i] for i in range(m, m + n * width, width))) == 1): # 竖向连成一线
                return True, player

            if (w in range(width - n + 1) and h in range(height - n + 1) and
                len(set(states[i] for i in range(m, m + n * (width + 1), width + 1))) == 1): # 右斜向上连成一线
                return True, player

            if (w in range(n - 1, width) and h in range(height - n + 1) and
                len(set(states[i] for i in range(m, m + n * (width - 1), width - 1))) == 1): # 左斜向下连成一线
                return True, player

        return False, -1

    def __str__(self):
        return "AI"

In [2]:
class Human(object):
    '''人类选手'''
    def __init__(self, board, player):
        self.board = board
        self.player = player

    def get_action(self):
        try:
            location = input('请输入落子的位置，如34代表3行4列（q代表退出)：')
            location = [int(n, 10) for n in input("Your move: ").split(",")]
            move = self.board.location_to_move(location)
        except Exception as e:
            move = -1
        if move == -1 or move not in self.board.availables:
            print("invalid move")
            move = self.get_action()
        return move

    def __str__(self):
        return "Human"

In [3]:
a=Board(5,4,2)

In [7]:
a.update('o',(1,3))
a.update('x',(3,2))
#a.oxshow()
#print(a.oxavailable)

a.update('o',(1,4))
a.update('x',(3,4))
a.update('x',(4,3))
#a.update('o',(5,4))


a.oxshow()

a.oxstates()


      1   2   3   4 
    .   .   .   .   .
 1            Q   Q 
    .   .   .   .   .
 2                  
    .   .   .   .   .
 3        X       X 
    .   .   .   .   .
 4            X     
    .   .   .   .   .
 5                  
    .   .   .   .   .


-1

In [None]:



'''给出有效的落子列表，找出最佳位子。'''
'''遍历所有可能，找到最大权值的位子'''
'''遍历各个位子的时候，给没给位子附值'''


'''如果没有输赢，则继续'''
while checkwin() == 0:
    '''取得可用位子的列表'''
    ox_a = oxavalible()
    if ox_a == 0:
        '''无可下位子'''
        break
    if player == 'x':
        '''人工选择'''
        player = 'o'
    if player == 'o':
        '''计算机选择'''
        bestlocation = getlocation(ox=ox_a)
        marklocation(bestlocation,*ox_p)
        player = 'x'
if checkwin() == 0:
    print('平局')
elif checkwin() == 1:
    print('选手赢')
else:
    print('计算机赢')

In [None]:
for i in range(self.height):        #行数
            for j in range(self.width):     #列数
                
                    elif i*self.width+j <100:
                        print('. %d'%(i*self.width+j+1), end='')
                    else:
                        print('.%d'%(i*self.width+j+1), end='')
                elif self.ox[i][j] == 1:   # x 落子
                    print('. X ', end='')
                elif self.ox[i][j] == -1:  # o 落子
                    print('. Q ', end='')
                    
                '''打印最后一列，并换行'''
                if j == self.width-1:
                    print('.')
                    print()

In [None]:
dict(zip( [k for k in range(12)], 
         [(i,j) for i in range(2) for j in range(3)] ))
