In [6]:
import numpy as np
import copy
import time
import math

In [7]:
class Board(object):
    '''棋盘，默认是“井字棋”Tie Tac Toe，也可以作为五子棋的棋盘'''
    def __init__(self, height=3, width=3, ninline=3):
        self.width = width     #列
        self.height = height   #行 
        
        ''' # 表示几个相同的棋子连成一线算作胜利'''
        self.ninline = ninline
        
        ''' # M x N 矩阵，值是棋子类型，0空，1代表'x'，-1代表'o'; ''' 
        self.ox = np.zeros((self.height, self.width), dtype=int )
        
        '''棋盘棋局胜负标志，1代表'x'赢，-1代表'o'赢，0代表平局，2代表未结束'''
        self.ox_states = 2
        
        '''现棋盘空余可下位置集合如{(0,0), (0,1), }'''
        self.ox_available = set( [(i,j) for i in range(self.height) for j in range(self.width)] )
        
    '''显示棋盘，参数是棋盘矩阵'''        
    def oxshow(self):
        '''打印列标号'''
        print(' x\y ', end='') # 5个字符
        for j in range(self.width):
            if j <10:
                print('  %d  '%(j+1), end='') # 5个字符
            elif j <100:
                print('  %d '%(j+1), end='')
        print('')  # 换行
        '''打印空行'''
        '''
        print('    ', end='')# 5个空格
        for j in range(self.width):
            print('    ', end='')
        '''
        print('')
        
        '''打印棋盘'''            
        for i in range(self.height):        #行数
            '''打印行标号'''
            if i <10:
                print('  %d  '%(i+1), end='' ) # 5个字符
            elif i*self.width+j <100:
                print('  %d '%(i+1), end='' )
            '''打印该行棋盘'''    
            for j in range(self.width):
                if self.ox[i][j] == 0:    #未落子  5个字符
                    print('  ~  ', end='')
                elif self.ox[i][j] == 1:   # x 落子  5个字符
                    print('  X  ', end='')
                elif self.ox[i][j] == -1:  # o 落子
                    print('  O  ', end='') 
            print('')
            
            '''打印空行'''
            '''
            print('    ', end='')# 5个空格
            for j in range(self.width):
                print('    ', end='')
            '''
            print('')                    
                
 
    '''计算一个ninline方阵连线情况,1代表'x'连成一线，-1代表'o'，0代表没有'''
    def nmatrixwinner(self,nm):
        '''把各行列的和组成列表'''
        ox_line = list(nm.sum(axis=1)) + list(nm.sum(axis=0))
        '''第一条对角线的和加入列表'''
        ox_line += [sum([nm[i, i] for i in range(nm.shape[0]) ])]
        '''第二条对角线的和加入列表'''
        ox_line += [sum([nm[nm.shape[0]-1-i,i] for i in range(nm.shape[0]) ])]            
        
        return -1 if -nm.shape[0] in ox_line else 1 if nm.shape[0] in ox_line else 0
    
    
    '''检查棋盘棋局胜负，1代表'x'赢，-1代表'o'赢，2代表平局，0代表未结束'''
    def oxwinner(self):
        '''生成棋盘所有ninline维方阵，并判断方阵是否有连成一线的情况'''
        matrixlist = [self.ox[i:i+self.ninline, j:j+self.ninline] 
                      for i in range(self.ox.shape[0]- self.ninline +1)
                     for j in range(self.ox.shape[1]- self.ninline +1)
                     ]
    
        '''如果出现ninline维方阵返回1或者-1，代表已经有了输赢，返回输赢结果'''
        for nm in matrixlist:
            if self.nmatrixwinner(nm) != 0:
                return self.nmatrixwinner(nm) 
        
        '''若所有ninline维方阵返回都是0，且已经没有可落子处，则返回0代表平局，2代表未结束'''
        if not self.ox_available:
            return 0
        else:
            return 2       
      
    
    '''# player在location(x,y)处落子，更新棋盘，,更新可用位置，计算此时棋局胜负'''
    def update(self, player, location): 
        if location in self.ox_available:
            self.ox[location[0], location[1]] = 1 if player=='x' else -1
            self.ox_available.remove( location )    
            self.ox_states = self.oxwinner()
            return 1
        else:
            return 0
        
            
    '''返回棋盘输赢状态'''
    def oxstates(self):        
        return self.ox_states
    
    
    '''为AI返回一个可以选择落子的集合'''
    def oxavailable(self):
        return self.ox_available
    

In [32]:
class Node(object):
    """使用Monte Carlo Tree Search with UCB寻求最佳路径  """
    '''初始化棋盘状态，默认计算时间3秒，1000次'''
    def __init__(self, board, player='', location=()):

        self.board = copy.deepcopy(board)  #根据父节点以及父节点选择的落子位置行成新的节点棋盘
        if location:
            self.board.update(player,location)
        
        self.move = (player, location) #本节点棋盘的最后一步下法，（选手，落子位置）
        self.move_N = 0 #所有子节点使用次数和
        self.move_Ni = dict([ (move, 0) for move in self.board.oxavailable() ]) #下法（子节点）使用次数
        self.move_Wi = dict([ (move, 0) for move in self.board.oxavailable() ]) #下法（子节点）获胜次数
        
        #self.move_ucb = dict([ (move, 0) for move in self.board.oxavailable() ])
        
        self.player = player
        if player:
            self.player = 'o' if player == 'x' else 'x' #更换选手，原来是x变为o，反之亦然
        
        self.ucb_confident = 1.96
        


In [37]:
def simu_move(simu_node , player ):
    for m in simu_node.board.oxavailable():
        simu_node.board.update(player, m)
        if simu_node.board.oxstates() == 2:
            simu_move(simu_node, player = 'o' if player == 'x' else 'x')
        else:
            simu_node.move_Wi[m] += simu_board.oxstates()
            simu_node.move_Ni[m] += 1

In [38]:
root_node = Node(Board(5,6,4))

In [39]:
root_node.board.oxshow()

simu_move(root_node, root_node.player)
print(root_node.move_Wi)
print(len(root_node.move_Wi))

 x\y   1    2    3    4    5    6  

  1    ~    ~    ~    ~    ~    ~  

  2    ~    ~    ~    ~    ~    ~  

  3    ~    ~    ~    ~    ~    ~  

  4    ~    ~    ~    ~    ~    ~  

  5    ~    ~    ~    ~    ~    ~  



RuntimeError: Set changed size during iteration

In [None]:
"""   
    def best_move(self):
        '''否则返回UCB最大的下法'''
        return max(self.move_ucb, key=self.move_ucb.get)
    
    def ucb(self):
        for m in self.board.oxavailable():
            mcts(self, m)
    
def max_ucb_move(node, a_move):
    '''返回UCB最大的MOVE'''
    
    log_N = math.log(self.move_N)
        

def monte_carlo_tree_search(node):#蒙特卡洛树搜索总函数
    simu_round = 1000
    simu_time = 2
    time_b = time.time()
    
    for i in range(simu_round):
        expand_node = tree_policy(node)
        reward = default_policy(expand_node)
        backup(expand_node,reward)
        if time.time()-time_b > simu_time:
            break
            
    best_next_node = best_child(node,False)
    return best_next_node

def tree_policy(node):#选择子节点的策略
	while node.get_state().is_terminal()==False:
		if node.is_all_expand():
			node=best_child(node,True)
		else:
			sub_node = expand(node)
			return sub_node
	return node
class MCTS(object):
    '''寻找最佳的下一步下法'''
    def __init__(self, node, time=3, max_actions=1000 ):
        self.parent = node
        self.child = []

"""  