In [2]:
import chess
import numpy as np

In [10]:
from keras.models import Model
from keras.layers import Input
from keras.layers import Activation
from keras.layers import Conv2D, Dense
from keras.layers import add, BatchNormalization, Flatten
import tensorflow as tf

chess_dict = {
    'p' : [1,0,0,0,0,0,0,0,0,0,0,0],
    'P' : [0,0,0,0,0,0,1,0,0,0,0,0],
    'n' : [0,1,0,0,0,0,0,0,0,0,0,0],
    'N' : [0,0,0,0,0,0,0,1,0,0,0,0],
    'b' : [0,0,1,0,0,0,0,0,0,0,0,0],
    'B' : [0,0,0,0,0,0,0,0,1,0,0,0],
    'r' : [0,0,0,1,0,0,0,0,0,0,0,0],
    'R' : [0,0,0,0,0,0,0,0,0,1,0,0],
    'q' : [0,0,0,0,1,0,0,0,0,0,0,0],
    'Q' : [0,0,0,0,0,0,0,0,0,0,1,0],
    'k' : [0,0,0,0,0,1,0,0,0,0,0,0],
    'K' : [0,0,0,0,0,0,0,0,0,0,0,1],
    '.' : [0,0,0,0,0,0,0,0,0,0,0,0],
}

def generate_side_matrix(board,side):
    matrix = board_matrix(board)
    translate = translate_board(board)
    bools = np.array([piece.isupper() == side for piece in matrix])
    bools = bools.reshape(8,8,1)
    
    side_matrix = translate*bools
    return np.array(side_matrix)

def translate_board(board): 
    pgn = board.epd()
    foo = []  
    pieces = pgn.split(" ", 1)[0]
    rows = pieces.split("/")
    for row in rows:
        foo2 = []  
        for thing in row:
            if thing.isdigit():
                for i in range(0, int(thing)):
                    foo2.append(chess_dict['.'])
            else:
                foo2.append(chess_dict[thing])
        foo.append(foo2)
    return np.array(foo)


def board_matrix(board): 
    pgn = board.epd()
    foo = []  
    pieces = pgn.split(" ", 1)[0]
    rows = pieces.split("/")
    for row in rows:
        foo2 = []  
        for thing in row:
            if thing.isdigit():
                for i in range(0, int(thing)):
                    foo.append('.')
            else:
                foo.append(thing)
    return np.array(foo)

def generate_input(positions,len_positions = 8):
    board_rep = []
    for position in positions:
        black = generate_side_matrix(position,False)
        white = generate_side_matrix(position,True)
        board_rep.append(black)
        board_rep.append(white)
    turn = np.zeros((8,8,12))
    turn.fill(int(position.turn))
    board_rep.append(turn)
    
    while len(board_rep) < len_positions*2 + 1:
        value = np.zeros((8,8,12))
        board_rep.insert(0,value)
    board_rep = np.array(board_rep)
    return board_rep

class Q_model():
    def __init__(self,model = None):
        if model:
            print('CUSTOM MODEL SET')
            self.model = model
        else:
            self.model = self.create_q_model()

    def create_q_model(self):
        def residual_module(layer_in, n_filters):
            merge_input = layer_in
            if layer_in.shape[-1] != n_filters:
                merge_input = Conv2D(n_filters, (1,1), padding='same', activation='relu', kernel_initializer='he_normal')(layer_in)
            conv1 = Conv2D(n_filters, (3,3), padding='same', activation='relu', kernel_initializer='he_normal')(layer_in)
            batch_norm = BatchNormalization()(conv1)
            layer_out = add([batch_norm, merge_input])
            layer_out = Activation('relu')(layer_out)
            return layer_out

        visible = Input(shape=(17, 8, 8,12))
        layer1 = residual_module(visible, 64)
        layer2 = residual_module(layer1, 64)
        layer3 = residual_module(layer2, 64)
        flatten = Flatten()(layer3)
        p = Dense(4096,activation='softmax')(flatten)
        v = Dense(1,activation = 'sigmoid')(flatten)
        return Model(inputs=visible, outputs=[p,v])
    
    def predict(self,env):
        input_values = generate_input(env.position_memory)
        state_tensor = tf.convert_to_tensor(input_values)
        p,v = self.model(state_tensor, training=False)
        return p,v
    
    def explore(self,env):
        action_space = np.random.randn(4096)
        action_space = filter_legal_moves(env.board,action_space)
        action = np.argmax(action_space, axis=None)
        move= num2move[action]
        return move,action
    
model = Q_model()

In [14]:

class TSModel():
    def __init__(self):
        def residual_module(layer_in, n_filters):
            merge_input = layer_in
            if layer_in.shape[-1] != n_filters:
                merge_input = Conv2D(n_filters, (1,1), padding='same', activation='relu', kernel_initializer='he_normal')(layer_in)
            conv1 = Conv2D(n_filters, (3,3), padding='same', activation='relu', kernel_initializer='he_normal')(layer_in)
            batch_norm = BatchNormalization()(conv1)
            layer_out = add([batch_norm, merge_input])
            layer_out = Activation('relu')(layer_out)
            return layer_out

        visible = Input(shape=(17, 8, 8,12))
        layer1 = residual_module(visible, 64)
        layer2 = residual_module(layer1, 64)
        layer3 = residual_module(layer2, 64)
        flatten = Flatten()(layer3)
        p = Dense(4096,activation='softmax')(flatten)
        v = Dense(1,activation = 'sigmoid')(flatten)
        self.model  = Model(inputs = visible, outputs = [p,v])
    def predict(self,boards):
        input_values = generate_input(boards)
        state_tensor = tf.convert_to_tensor(input_values)
        state_tensor = tf.expand_dims(state_tensor,0)
        p,v = self.model(state_tensor, training=False)
        return p,v
    
model = TSModel()

In [8]:
def pos_cont(board):
    boards = []
    legal_moves = list(board.legal_moves)
    for move in legal_moves:
        copy_board = board.copy()
        copy_board.push(move)
        boards.append(copy_board)
    return boards,legal_moves

class Node:
    def __init__(self,board,move):
        self.board = board
        self.move = move
        self.child_nodes = []
        self.parents = []
        self.action = 0
        self.visit_count = 0
            
    def extend(self):
        if not(self.child_nodes):
            continuations,legal_moves = pos_cont(self.board)
            for i in range(len(continuations)):
                self.child_nodes.append(Node(continuations[i],legal_moves[i]))
        
    def create_actions(self):
        new_parents = self.parents
        new_parents.append(self)
        for child_node in self.child_nodes:
            if not(child_node.action):
                child_node.action = Action(child_node.board,new_parents)    
            else:
                pass

In [24]:
c_puct = 1
def evaluate_reward(board):
    if board.is_checkmate():
        return 1
    else:
        return -1
    pass
class MonteCarloTree():
    def __init__(self,board = None):
        if board:
            self.create_root_node(board)
        self.nodes = []
        self.prev_node = self.root_node
        self.len_simulations = 1600
        self.chain = []

    def create_root_node(self,board):
        root_node = Node(board,None)
        self.root_node = root_node
        
    def simulate(self):
        self.chain.append(self.prev_node)
        if self.prev_node.board.is_game_over():
            reward = evaluate_reward(self.prev_node.board)
            for node in self.chain[1:]:
                node.action.V += reward
            return evaluate_reward(self.prev_node.board)   
        
        self.prev_node.extend()
        self.prev_node.create_actions()
        # Extend and create actions only happen when not done before
        Us = []
        child_nodes = self.prev_node.child_nodes
        Ns = [child_node.action.N for child_node in child_nodes]
        for child_node in child_nodes:
            U = child_node.action.evaluate(model,np.sum(Ns))
            Us.append(U)
        next_node = child_nodes[np.argmax(Us)]
        self.prev_node = next_node
        v = self.simulate()
        
        next_node.action.Q = (next_node.action.N*next_node.action.Q +next_node.action.V)/(next_node.action.N+1)
        next_node.action.N += 1
        return -next_node.action.V
    
    def run_simulations(self):
        for i in range(self.len_simulations):
            self.simulate()

In [25]:

import numpy as np
class Action():
    def __init__(self,state,parent_nodes):
        self.N = 0
        self.W = 0
        self.Q = 0
        self.P = 0
        self.U = 0
        self.V = 0
        self.state = state
        
        self.pred_states = []
        for parent_node in parent_nodes:
            self.pred_states.append(parent_node.board)
    def evaluate(self,model,Ns):
        self.P,self.V = model.predict(self.pred_states)
        self.U = c_puct * self.P * np.sqrt(Ns)/(1+ self.N)
        return self.U

In [26]:
tree = MonteCarloTree(chess.Board())
tree.simulate()

ECHO
ECHO
ECHO
ECHO
ECHO
ECHO
ECHO
ECHO
ECHO
ECHO
ECHO
ECHO
ECHO
ECHO
ECHO
ECHO
ECHO
ECHO
ECHO
ECHO
ECHO
ECHO
ECHO
ECHO


<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[0.25425518]], dtype=float32)>

In [33]:
tree.prev_node.board.result()

'1/2-1/2'