In [1]:
import numpy as np
import copy
import itertools
import random
import time
#import math
#from board import Board

In [2]:
class BoardMCTSnode(object):
    '''蒙特卡洛数搜索的棋盘节点'''
    #默认为井字棋 Tie Tac Toe
    def __init__(self, ox_board=np.zeros((3,3),dtype=int), playturn='x', parent=None):        
        #''' # M x N 矩阵，值是棋子类型，0空，1代表'x'，-1代表'o'; ''' 
        self.board = copy.deepcopy(ox_board)
        self.width = self.board.shape[1]     #列
        self.height = self.board.shape[0]    #行 
        
        self.playturn = playturn
        
        self.parent = parent
        self.children = []
        self.child_moves = []
        
        self.N = 0    #N(s, a)是访问次数，
        self.W = 0    #W(s, a)是总行动价值，
        #self.Q#Q(s, a)是平均行动价值，
        #self.U#U(s, a)是= c × 概率P(s, a) × np.sqrt(父节点访问次数N) / ( 1 + 某子节点action的访问次数N(s, a) )
        self.c = 1.96    #是一个决定探索水平的常数；这种搜索控制策略最初倾向于具有高先验概率和低访问次数的行为，但是渐近地倾向于具有高行动价值的行为。
        self.p = 1.4    #P(s, a)是被选择的先验概率。


    def board_show(self):
        '''显示棋盘，参数是棋盘矩阵'''
        #'''打印列标号'''
        print("{0:>15}".format('x\y'), end='') # 5个字符
        for j in range(self.width):
            print("{0:^5}".format(j), end='') # 5个字符          
        print('\n')  # 换行,打印空行        
      
        #'''打印棋盘'''            
        for i in range(self.height):        #行数
            #'''打印行标号'''
            print("{0:>15}".format(i), end='') # 5个字符
            #'''打印该行棋盘'''    
            for j in range(self.width):
                if self.board[i][j] == 0:    #未落子  5个字符
                    print("{0:^5}".format('+'), end='')
                elif self.board[i][j] == 1:   # x 落子  5个字符
                    print("{0:^5}".format('X'), end='')
                elif self.board[i][j] == -1:  # o 落子
                    print("{0:^5}".format('Q'), end='') 
            print('\n')     
            
            
    

In [3]:
def get_moves(ox:np.ndarray):
    #'''现棋盘空余可下位置列表如[(0,0), (0,1),]'''
    indices = np.where(ox == 0)
    return [move for move in list(zip(indices[0], indices[1]))] 
    
    
def get_states(ox:np.ndarray, ninline=3):
    #，ninline表示连成一线的子数是多少为赢
    #'''返回棋盘棋局胜负标志，>=1代表'x'赢，<=-1代表'o'赢，0代表平局，None代表未结束'''
        
    #'''生成棋盘所有ninline维方阵，并判断方阵是否有连成一线的情况'''
    matrixlist = [ox[i:i+ninline, j:j+ninline] 
                      for i in range(ox.shape[0]- ninline +1)
                        for j in range(ox.shape[1]- ninline +1)
                  ]
   
    #'''计算方阵连线情况,1代表'x'连成一线，-1代表'o'连成一线，0代表没有'''
    for nm in matrixlist:
        #'''把各行,列的和组成集合，查找集合中是否存在ninline或-ninline，代表连成一行'''
        n_line = set(nm.sum(axis=1)) | set(nm.sum(axis=0))
        #'''第一条对角线的和加入列表；使用方阵的迹'''
        n_line.add(nm.trace())
        #'''第二条对角线的和加入列表；使用方阵反转后的迹'''
        n_line.add(nm[::-1].trace())
        
        #只要存在连成一线的情况，就不在检查后续。
        states = -1 if -ninline in n_line else 1 if ninline in n_line else 0
        if states == 1 or states == -1 : break

    #'''有ninline维方阵返回1或者-1，加权可落子处多少，输赢结果'''
    if states == 1 : return 1 + len(get_moves(ox))
    if states == -1 : return -1 - len(get_moves(ox)) 
    #'''所有ninline维方阵返回都是0，检查是否有可落子处，无则返回0代表平局，有则None代表未结束'''
    return 0 if get_moves(ox) == [] else None
    
    
def best_child(node:BoardMCTSnode, player='x'):
    UCBs = [ Ni.W/Ni.N +
                    Ni.p * np.sqrt( Ni.c * np.log(node.N)/Ni.N )
                       for Ni in node.children 
           ]
    return node.children[np.argmax(UCBs)] if player=='x' else node.children[np.argmin(UCBs)]
    
    
def rollout_policy(possible_moves):    #随机选择一个可以选择的下法
    move = possible_moves[np.random.randint(len(possible_moves))]
    return move
               
        
def rollout(rollout_board:np.ndarray, playturn='x', ninline=3, counts=1000): #模拟展开棋盘（每一局棋盘就是一个节点）
    
    states = 0
    #模拟counts=1000次，结果求和返回
    for _ in range(counts):
        
        ox = copy.deepcopy(rollout_board)
        player = playturn 
        possible_moves = get_moves(ox)
        
        while get_states(ox, ninline) == None:  #如果当前棋局未中止
            #possible_moves = get_moves(ox)
            rollout_move = rollout_policy(possible_moves)    #随机选择一个可以选择的下法
            ox[rollout_move] = 1 if player == 'x' else -1
            possible_moves.remove(rollout_move)
            player = 'o' if player == 'x' else 'x'
        
        states += get_states(ox, ninline)
    
    return states 
    
    
def traverse(root:BoardMCTSnode): #遍历节点
    possible_moves = get_moves(root.board)
    if len(possible_moves) == len(root.child_moves):  #所有可走节点都展开，在子节点中
        return best_child(root, root.playturn) 
    
    for m in root.child_moves:
        possible_moves.remove(m)
    
    new_board = copy.deepcopy(root.board)
    move = possible_moves[np.random.randint(len(possible_moves))]
    new_board[move] = 1 if root.playturn == 'x' else -1 #新棋盘
    player = 'o' if root.playturn == 'x' else 'x'
    child_node = BoardMCTSnode(new_board, player, root)  #新棋盘节点
    root.children.append(child_node)
    root.child_moves.append(move)
            
    return child_node
    

def backpropagate(node, result):
    if node == None : return 0
    node.N += 1
    node.W += result
    backpropagate(node.parent, result)
    
    
def monte_carlo_tree_search(root:BoardMCTSnode, ninline=3, timelast=3.0, mounts=1000):
    begin_time = time.time()

    while True:
        leaf = traverse(root)     # leaf = unvisited node or the max UCB node
        simulation_result = rollout(leaf.board, leaf.playturn, ninline, mounts)
        backpropagate(leaf, simulation_result)
        
        if (time.time() - begin_time) >= timelast : break
    
    return best_child(root, root.playturn)

In [9]:
ttt = np.zeros((6, 6), dtype=int )

player = 'x'

#__init__(self, ox_board, playturn='x', parent=None)
root = BoardMCTSnode(ttt, player)
root.board_show()

while get_states(root.board, 4) == None:
    if player=='x':  
        root = monte_carlo_tree_search(root, 4, 3.0, 1000)
        root.board_show()
        player='o'
    else:
    
        print('请选手选择落子处的数字，如23代表在第2行第3列落子：', end='')
        xy = input()
        playlocation = (int(xy[0]), int(xy[1]) )
        root.board[playlocation] = -1
        root = BoardMCTSnode(root.board, 'x')
        root.board_show()
        player='x'

print(get_states(root.board, 4))

            x\y  0    1    2    3    4    5  

              0  +    +    +    +    +    +  

              1  +    +    +    +    +    +  

              2  +    +    +    +    +    +  

              3  +    +    +    +    +    +  

              4  +    +    +    +    +    +  

              5  +    +    +    +    +    +  

            x\y  0    1    2    3    4    5  

              0  +    +    +    +    +    +  

              1  +    +    +    +    +    +  

              2  +    +    +    X    +    +  

              3  +    +    +    +    +    +  

              4  +    +    +    +    +    +  

              5  +    +    +    +    +    +  

请选手选择落子处的数字，如23代表在第2行第3列落子：22
            x\y  0    1    2    3    4    5  

              0  +    +    +    +    +    +  

              1  +    +    +    +    +    +  

              2  +    +    Q    X    +    +  

              3  +    +    +    +    +    +  

              4  +    +    +    +    +    +  

              5  +    +    +  