In [1]:
import numpy as np
import copy
import itertools
import random
import time
#import math
from ox_board import Board

In [2]:
class BoardMCTSnode(Board):
    '''蒙特卡洛数搜索的棋盘节点'''
    def __init__(self, height=3, width=3, player='x', parent=None):        
        #'继承Board，增加新的字段 
        super(BoardMCTSnode, self).__init__(height, width, player)
        
        self.parent = parent
        self.child_moves = []
        self.children = []
    
        self.N = 1    #N(s, a)是访问次数，默认1方式出现除0
        self.W = 0    #W(s, a)是总行动价值，
        #self.Q#Q(s, a)是平均行动价值，W/N
        #self.U#U(s, a)是= c × 概率P(s, a) × np.sqrt(父节点访问次数N) / ( 1 + 某子节点action的访问次数N(s, a) )
        self.c = 1.96    #是一个决定探索水平的常数；这种搜索控制策略最初倾向于具有高先验概率和低访问次数的行为，但是渐近地倾向于具有高行动价值的行为。
        self.p = 1.4    #P(s, a)是被选择的先验概率。



In [3]:
a=BoardMCTSnode(3,3,'x')
a.show()
print(a.moves)
print(a.steps)
print(a.player)

print(a.states(3))
print(a.N, a.W, a.c, a.p)

board=Board(3,3,'x')
board.move((1,0))
print(board.states(1))
board.move((1,1))
board.move((2,0))

print(board.ox)
print(board.steps)
print(board.player)
print(board.states(2))


            x\y  0    1    2  

              0  +    +    +  

              1  +    +    +  

              2  +    +    +  

{(0, 1), (1, 2), (0, 0), (2, 1), (2, 0), (1, 1), (2, 2), (1, 0), (0, 2)}
[]
x
None
1 0 1.96 1.4
9
[[ 0  0  0]
 [ 1 -1  0]
 [ 1  0  0]]
[(1, 0), (1, 1), (2, 0)]
o
7


In [None]:
       #'''打印列标号'''
        print("{0:>15}".format('x\y'), end='') # 5个字符
        for j in range(self.width):
            print("{0:^5}".format(j), end='') # 5个字符          
        print('\n')  # 换行,打印空行        
      
        #'''打印棋盘'''            
        for i in range(self.height):        #行数
            #'''打印行标号'''
            print("{0:>15}".format(i), end='') # 5个字符
            #'''打印该行棋盘'''    
            for j in range(self.width):
                if self.board[i][j] == 0:    #未落子  5个字符
                    print("{0:^5}".format('+'), end='')
                elif self.board[i][j] == 1:   # x 落子  5个字符
                    print("{0:^5}".format('X'), end='')
                elif self.board[i][j] == -1:  # o 落子
                    print("{0:^5}".format('Q'), end='') 
            print('\n')     
            
            

In [None]:
#利用蒙特卡搜索树搜索
def get_moves(ox:np.ndarray):
    #'''现棋盘空余可下位置列表如[(0,0), (0,1),]'''
    indices = np.where(ox == 0)
    return [move for move in list(zip(indices[0], indices[1]))] 
    
    
def get_states(ox:np.ndarray, ninline=3):
    #，ninline表示连成一线的子数是多少为赢
    #'''返回棋盘棋局胜负标志，>=1代表'x'赢，<=-1代表'o'赢，0代表平局，None代表未结束'''
        
    #'''生成棋盘所有ninline维方阵，并判断方阵是否有连成一线的情况'''
    matrixlist = [ox[i:i+ninline, j:j+ninline] 
                      for i in range(ox.shape[0]- ninline +1)
                        for j in range(ox.shape[1]- ninline +1)
                  ]
   
    #'''计算方阵连线情况,1代表'x'连成一线，-1代表'o'连成一线，0代表没有'''
    for nm in matrixlist:
        #'''把各行,列的和组成集合，查找集合中是否存在ninline或-ninline，代表连成一行'''
        n_line = set(nm.sum(axis=1)) | set(nm.sum(axis=0))
        #'''第一条对角线的和加入列表；使用方阵的迹'''
        n_line.add(nm.trace())
        #'''第二条对角线的和加入列表；使用方阵反转后的迹'''
        n_line.add(nm[::-1].trace())
        
        #只要存在连成一线的情况，就不在检查后续。
        states = -1 if -ninline in n_line else 1 if ninline in n_line else 0
        if states == 1 or states == -1 : break

    #'''有ninline维方阵返回1或者-1，加权可落子处多少，输赢结果'''
    if states == 1 : return 1 + len(get_moves(ox))
    if states == -1 : return -1 - len(get_moves(ox)) 
    #'''所有ninline维方阵返回都是0，检查是否有可落子处，无则返回0代表平局，有则None代表未结束'''
    
    return 0 if get_moves(ox) == [] else None
    
    
def best_child(node:BoardMCTSnode):
    if node.children == []:
        return node
    else:
        UCBs = [ Ni.W/Ni.N +
                    Ni.p * np.sqrt( Ni.c * np.log(node.N)/Ni.N )
                       for Ni in node.children 
               ]
        return node.children[np.argmax(UCBs)] if node.playturn=='x' else node.children[np.argmin(UCBs)]
    
    
def rollout_policy(possible_moves):    #随机选择一个可以选择的下法
    move = possible_moves[np.random.randint(len(possible_moves))]
    return move
               
        
def rollout(rollout_board:np.ndarray, playturn='x', ninline=3, counts=1): #模拟展开棋盘（每一局棋盘就是一个节点）
    #tb = time.time()
    states = 0
    #模拟counts=1次，结果求和返回
    for _ in range(counts):
        
        ox = copy.deepcopy(rollout_board)
        player = playturn 
        possible_moves = get_moves(ox)
        
        while get_states(ox, ninline) == None:  #如果当前棋局未中止
            #possible_moves = get_moves(ox)
            rollout_move = rollout_policy(possible_moves)    #随机选择一个可以选择的下法
            ox[rollout_move] = 1 if player == 'x' else -1
            possible_moves.remove(rollout_move)
            player = 'o' if player == 'x' else 'x'
        
        states += get_states(ox, ninline)
        
    return states 
    
    
def traverse(node:BoardMCTSnode): #遍历节点

    possible_moves = get_moves(node.board)

    if possible_moves == [] : 
        return node
    elif len(possible_moves) == len(node.child_moves):  #所有可走节点都展开，则继续遍历其子节点
        expand_node = best_child(node)
        return traverse(expand_node) 
    else:
        
        #print('子节点节点', node.child_moves)
        
        for m in node.child_moves:  #获取还没有展开的下法
            possible_moves.remove(m)
    
        new_board = copy.deepcopy(node.board)
        move = possible_moves[np.random.randint(len(possible_moves))]
        new_board[move] = 1 if node.playturn == 'x' else -1 #新棋盘
        player = 'o' if node.playturn == 'x' else 'x'
        child_node = BoardMCTSnode(new_board, player, node)  #新棋盘节点
        node.children.append(child_node)
        node.child_moves.append(move)

        return child_node
    

def backpropagate(node, result):
    if node == None : 
        return 0
    else:
        node.N += 1
        node.W += result
        backpropagate(node.parent, result)
    
    
def monte_carlo_tree_search(node:BoardMCTSnode, ninline=3, timelast=3.0, mounts=1600):
    tb = time.time()

    for _ in range(mounts):
        leaf = traverse(node)     # leaf = unvisited node or the max UCB node
        #if leaf == None : break
        simulation_result = rollout(leaf.board, leaf.playturn, ninline, 1)
        backpropagate(leaf, simulation_result)
        
        if (time.time() - tb) >= timelast : break
    
    return best_child(node)

In [None]:
#此栏机机下棋
board = np.zeros((3, 3), dtype=int )
player = 'o'
root = BoardMCTSnode(board, player)
ninline=3
timelast=5
mounts=1600

tb=time.time()
node=root
while get_states(node.board, ninline) == None:
    #node.board_show()
    node=monte_carlo_tree_search(node, ninline, timelast, mounts)
    print('此次mcts搜索后产生的子节点数：',len(node.parent.child_moves))
    
node.board_show()
print('mcts用时%0.4f'%(time.time()-tb))



In [None]:
#此栏人机下棋
board = np.zeros((8, 8), dtype=int )
player = 'o'
root = BoardMCTSnode(board, player)
ninline=5
timelast=60
mounts=160000000
node=root
while get_states(node.board, ninline) == None:
    if player=='x':  
        node = monte_carlo_tree_search(node, ninline, timelast, mounts)
        print('此次mcts搜索后产生的子节点数：',len(node.parent.child_moves))
        node.board_show()
        player='o'
    else:  
        print('请选手选择落子处的数字，如23代表在第2行第3列落子：', end='')
        xy = input()
        playlocation = (int(xy[0]), int(xy[1]) )
        #在当前节点中寻找这个下法的子节点，若不存在，则为当前节点生成该子节点，然后把此子节点当作先一轮mcts的搜索节点
        if playlocation in node.child_moves:
            node = node.children[node.child_moves.index(playlocation)]
            player='x'
        else:
            new_board = copy.deepcopy(node.board)
            new_board[playlocation] = -1
            child_node = BoardMCTSnode(new_board, 'x', node)
            node.children.append(child_node)
            node.child_moves.append(playlocation)
            node = child_node
            node.board_show()
            player='x'

print(get_states(node.board, ninline))

In [None]:
#利用蒙特卡罗方法搜索
def simulation(ttt, ninline, playturn, ms=[]):
    '''根据给定棋盘现状和后续下法，返回棋盘最终状态，谁赢或者平局'''
    ox = copy.deepcopy(ttt)
    player = playturn
    for i in range(len(ms)):
        ox[ms[i]] = 1 if player == 'x' else -1    # 更新棋盘
        player = 'x' if player == 'o' else 'x'
        s = get_states(ox, ninline)   
        if s != None:
            return s
        
    
def monte_carlo_search(ox, playturn, ninline=3, mounts=16000000, timelast=120.0):
    timebegin = time.time()
    v = get_moves(ox)
    move_value = {x:0 for x in v}
    #在资源范围内，使用随机方式产生下法序列,并用该序列模拟输赢
    #当棋盘大于5*5时，25！已经是非常大的量，4000次模拟就显得非常小，丧失MonteCarlo算法意义
    for count in range(mounts):
        mv = []
        v_setlist = list(v)
        for i in range(len(v)):
            mv.append(v_setlist.pop(random.randint(0,len(v_setlist)-1)))
            
        move_value[mv[0]] += simulation(ox, ninline, playturn, mv)
        
        if (time.time()-timebegin) >= timelast : break
        
    if playturn == 'o':
        minvalue = move_value[min(move_value, key=move_value.get)]
        mvlist = [k for k,kv in move_value.items() if kv==minvalue ]
    if playturn == 'x':
        maxvalue = move_value[max(move_value, key=move_value.get)]
        mvlist = [k for k,kv in move_value.items() if kv==maxvalue ]

    return mvlist[random.randint(0,len(mvlist)-1)] #返回元组(x,y)

In [None]:
#此栏 机vs.机 下棋, 蒙特卡洛算法  对阵  蒙特卡洛搜索树 ，1600万次模拟
board = np.zeros((9, 9), dtype=int )
player = 'o'
root = BoardMCTSnode(board, player)
ninline=5
timelast=6
mounts=16000000
node=root
while get_states(node.board, ninline) == None:
    if player=='x':  
        node = monte_carlo_tree_search(node, ninline, timelast, mounts)
        print('此次mcts搜索后产生的子节点数：',len(node.parent.child_moves))
        node.board_show()
        player='o'
    else:  
        #print('请选手选择落子处的数字，如23代表在第2行第3列落子：', end='')
        #xy = input()
        playlocation = monte_carlo_search(node.board, player, ninline, mounts, timelast)
        #在当前节点中寻找这个下法的子节点，若不存在，则为当前节点生成该子节点，然后把此子节点当作先一轮mcts的搜索节点
        if playlocation in node.child_moves:
            node = node.children[node.child_moves.index(playlocation)]
            print('此次 monte carlo 搜索后产生:', playlocation)
            node.board_show()
            player='x'
        else:
            new_board = copy.deepcopy(node.board)
            new_board[playlocation] = -1
            child_node = BoardMCTSnode(new_board, 'x', node)
            node.children.append(child_node)
            node.child_moves.append(playlocation)
            node = child_node
            print('此次 monte carlo 搜索后产生:', playlocation)
            node.board_show()
            player='x'

print(get_states(node.board, ninline))