In [1]:
import numpy as np
import copy
import itertools
import random
import time
#import math
#from board import Board

In [None]:
  
    def board_update(self, player, location): 
        '''# player在location(x,y)处落子，更新棋盘，,更新可用位置，计算此时棋局胜负'''
        if location in self._ox_availables:
            
            self.ox[location] = 1 if player=='x' else -1

            return 1    
        else:
            return 0    

In [5]:
class BoardMCTSnode(object):
    '''棋盘节点'''
    def __init__(self, ox_board, ninline=3, playturn='x', parent=None):        
        #''' # M x N 矩阵，值是棋子类型，0空，1代表'x'，-1代表'o'; ''' 
        self.board = copy.deepcopy(ox_board)
        self.width = self.board.shape[1]     #列
        self.height = self.board.shape[0]    #行 
        self.ninline = ninline
        
        self.playturn = playturn
        
        self.parent = parent
        self.children = []
        
        self.N = 0    #N(s, a)是访问次数，
        self.W = 0    #W(s, a)是总行动价值，
        #self.Q#Q(s, a)是平均行动价值，
        #self.U#U(s, a)是= c_puct × 概率P(s, a) × np.sqrt(父节点访问次数N) / ( 1 + 某子节点action的访问次数N(s, a) )
        self.c = 1.96    #是一个决定探索水平的常数；这种搜索控制策略最初倾向于具有高先验概率和低访问次数的行为，但是渐近地倾向于具有高行动价值的行为。
        self.p = 1.4    #P(s, a)是被选择的先验概率。


    def board_show(self):
        '''显示棋盘，参数是棋盘矩阵'''
        #'''打印列标号'''
        print("{0:>15}".format('x\y'), end='') # 5个字符
        for j in range(self.width):
            print("{0:^5}".format(j), end='') # 5个字符          
        print('\n')  # 换行,打印空行        
      
        #'''打印棋盘'''            
        for i in range(self.height):        #行数
            #'''打印行标号'''
            print("{0:>15}".format(i), end='') # 5个字符
            #'''打印该行棋盘'''    
            for j in range(self.width):
                if self.board[i][j] == 0:    #未落子  5个字符
                    print("{0:^5}".format('+'), end='')
                elif self.board[i][j] == 1:   # x 落子  5个字符
                    print("{0:^5}".format('X'), end='')
                elif self.board[i][j] == -1:  # o 落子
                    print("{0:^5}".format('Q'), end='') 
            print('\n')     
            
            
    def get_moves(self):
        #'''现棋盘空余可下位置列表如[(0,0), (0,1),]'''
        indices = np.where(self.board == 0)
        return [moves for moves in list(zip(indices[0], indices[1]))] 
    
    
    def get_states(self):
        #'''返回棋盘棋局胜负标志，>=1代表'x'赢，<=-1代表'o'赢，0代表平局，None代表未结束'''
        #'''生成棋盘所有ninline维方阵，并判断方阵是否有连成一线的情况'''
        matrixlist = [self.board[i:i+self.ninline, j:j+self.ninline] 
                          for i in range(self.height- self.ninline +1)
                            for j in range(self.width- self.ninline +1)
                         ]
    
        #'''如果出现ninline维方阵返回1或者-1，代表已经有了输赢，返回输赢结果'''
        for nm in matrixlist:
            '''计算方阵连线情况,1代表'x'连成一线，-1代表'o'连成一线，0代表没有'''
            #'''把各行列的和组成集合，查找集合中是否存在ninline或-ninline，代表连成一行'''
            n_line = set(nm.sum(axis=1)) | set(nm.sum(axis=0))
            #'''第一条对角线的和加入列表；使用方阵的迹'''
            n_line.add(nm.trace())
            #'''第二条对角线的和加入列表；使用方阵反转后的迹'''
            n_line.add(nm[::-1].trace())
        
            #<=-1代表o赢，>=1代表x，0平局，None还没有结果
            if -self.ninline in n_line : return -1 - len(self.get_moves()) 
            if self.ninline in n_line : return 1 + len(self.get_moves())
            states = 0 if nm.all() else None
                    
        #'''所有ninline维方阵返回都是0，检查是否有可落子处，无则返回0代表平局，有则None代表未结束'''
        return 0 if self.get_moves() == [] else None   
    
               
    def best_child(self):
        UCBs = [ Ni.W/Ni.N +
                    Ni.p * np.sqrt( Ni.c * np.log(self.N)/Ni.N )
                       for Ni in self.children ]
        return self.children[np.argmax(UCBs)]
    
    
    def rollout_policy(self, possible_moves):    #随机选择一个可以选择的下法
        possible_move = possible_moves[np.random.randint(len(possible_moves))]
        return possible_move
               
        
    def rollout(self): #模拟展开棋盘（每一局棋盘就是一个节点）
        current_rollout_node = self.board
        states, possible_moves, lastmove = self.states, self.possible_moves , self.lastmove[0]       
        while states == None:
            action_move = self.rollout_policy(possible_moves)    #随机选择一个可以选择的下法
            player = 'x' if lastmove[0] == 'o' else 'o'
            current_rollout_node.board_update(player, action_move)
            states,(possible_moves),(lastmove) = current_rollout_node.board_now()

        return states 
    
    
    def traverse(self): #遍历节点
        if self.possible_moves == set():
            return self.best_child()
        else:
            new_board = copy.deepcopy(self.board)
            s, mv, m = new_board.board_now()
            move = mv.pop()
            player = 'o' if m[0] == 'x' else 'x'
            new_board.board_update(player , move)
            child_node = BoardMCTSnode(new_board, self.board)
            self.children.append(child_node)
            return child_node
    

    def backpropagate(node, result):
        node.N += 1
        node.W += result
        if node.parend != None: self.backpropagate(node.parent)

In [None]:
def monte_carlo_tree_search(root:BoardMCTSnode, timelast=3.0, mounts=1000):
    begin_time = time.time()
    node = copy.deepcopy(root)
    while _ in range(mounts):
        leaf = node.traverse()     # leaf = unvisited node or the max UCB node
        simulation_result = leaf.rollout()
        backpropagate(leaf, simulation_result)
        if (time.time() - begin_time) >= timelast : break
    return node.best_child()

In [6]:
ttt = np.zeros((3, 3), dtype=int )
print(ttt)
#__init__(self, ox_board, ninline=3, playturn='x', parent=None)
mctsttt = BoardMCTSnode(ttt)

print("棋盘初始化，X先手：")
mctsttt.board_show()
print(mctsttt.board)
print(mctsttt.parent)
print(mctsttt.children)
print(mctsttt.N)
mctsttt.get_moves()

[[0 0 0]
 [0 0 0]
 [0 0 0]]
棋盘初始化，X先手：
            x\y  0    1    2  

              0  +    +    +  

              1  +    +    +  

              2  +    +    +  

[[0 0 0]
 [0 0 0]
 [0 0 0]]
None
[]
0


[(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2), (2, 0), (2, 1), (2, 2)]

In [None]:
leaf = mctsttt.traverse()
print(leaf.board.ox)
#print(leaf.parent.board.board_now())
#print(leaf.children)
#print(mctsttt.board.ox)
#print(mctsttt.parent)
#for n in mctsttt.children:
#    print(n.board.board_now())
#node = monte_carlo_tree_search(mctsttt)

In [None]:
"""
player = 'x'
i = 0

while True :
    i += 1
    print("X先手，第%d回合："%i, end='')    

    if player == 'x':
        #print('请选手选择落子处的数字，如23代表在第2行第3列落子：', end='')
        #xy = input()
        #playlocation = (int(xy[0]), int(xy[1]) )
        timebegin = time.time()
        node = monte_carlo_tree_search(mctsttt)
        playlocation = node.lastmove
        #playlocation = ai_full_search(ttt, player)
        #playlocation = ai_montecarlo_search(ttt, player, 200)
        ttt.board_update(player, playlocation)
        s, (v) ,(m)= ttt.board_now()
        print('X落子在', playlocation, '用时%0.4f'%(time.time()-timebegin), '秒; ', end='')
        if s == 1:
            print('选手X 赢')
            ttt.board_show()
            break
        if v == set():
            print('平局')
            ttt.board_show()
            break
        player = 'o'

    if player == 'o':
        timebegin = time.time()
        node = monte_carlo_tree_search(mctsttt)
        playlocation = node.lastmove
        #playlocation = ai_full_search(ttt, player)
        #playlocation = ai_montecarlo_search(ttt, player, 4000, 120)
        ttt.board_update(player, playlocation)
        s, (v) , (m)= ttt.board_now()
        print('O落子在', playlocation, '用时%0.4f'%(time.time()-timebegin), '秒. ')
        if s == -1:
            print('计算机Q 赢')
            ttt.board_show()
            break 
        if v == set():
            print('平局')
            ttt.board_show()
            break
        player = 'x'
        
    ttt.board_show()
"""