In [2]:
import chess
import pickle
import h5py
import numpy as np
import tensorflow as tf

In [3]:
# Piece to index mapping
piece_to_ind = {}
cur_ind = 0
for color in [True, False]:
    for piece_num in range(6):
        piece_to_ind[(color, piece_num+1)] = cur_ind
        cur_ind += 1

# Outcome to value mapping
outcome_to_val = {}
outcome_to_val['1-0'] = 1.0
outcome_to_val['0-1'] = -1.0
outcome_to_val['1/2-1/2'] = 0.0


# Converts board to 8x8x12 array
def convert_board(board):
    # Initialize tensor corresponding to board
    b_tensor = np.zeros((8, 8, 12))

    # Iterate over board squares 
    for i in range(64):
        piece = board.piece_at(i)
        #print(piece)
        if not piece:
            continue
        ind = piece_to_ind[(piece.color, piece.piece_type)]

        # One-hot encode piece
        b_tensor[i//8, i%8, ind] = 1
        #print(b_tensor)

    return b_tensor

In [4]:
class Magikarp(object):
    def __init__(self, config, sess):
        self.sess = sess
        self.batch_size = config['batch_size']
        self.cur_ind = 0
        self.data = h5py.File(config['datafile'], 'r')
        self.data_size = len(self.data['f_boards'])
        self.f_boards_full = pickle.load(open(config['full_boards_file'], 'rb'))
        self.l_rate = 0.00005
        self.p_cur_ind = 0
        self.p_data = h5py.File(config['p_datafile'], 'r')
        self.p_data_size = len(self.p_data['f_boards'])
        self.n_input = 768
        self.n_hidden1 = 2048
        self.n_hidden2 = 2048
        #self.hidden_layers = config['hidden_layers']
        self.n_out = 1
        self.num_epochs = config['num_epochs']
        self.reg_coeff = 4
        self.save_file = config['save_file']

    def rand_weights(self, n_in, n_out):
        return tf.compat.v1.random_uniform([n_in, n_out], -1*np.sqrt(6.0/(n_in + n_out)), np.sqrt(6.0/(n_in + n_out)))

    def get_gen_params(self):
        self.g_weights = {
            'h1': tf.compat.v1.Variable(self.rand_weights(self.n_input, self.n_hidden1), name='g_h1'),
            'h2': tf.compat.v1.Variable(self.rand_weights(self.n_hidden1, self.n_hidden2), name='g_h2'),
            'out': tf.compat.v1.Variable(self.rand_weights(self.n_hidden2, self.n_out), name='g_out')}
        self.g_biases = {
            'b1': tf.compat.v1.Variable(tf.compat.v1.random_normal([self.n_hidden1], stddev=0.01), name='g_b1'),
            'b2': tf.compat.v1.Variable(tf.compat.v1.random_normal([self.n_hidden2], stddev=0.01), name='g_b2'),
            'out': tf.compat.v1.Variable(tf.compat.v1.random_normal([self.n_out], stddev=0.01), name='g_b_out')}

    def get_dis_params(self):
        self.d_weights = {
            'h1': tf.compat.v1.Variable(self.rand_weights(self.n_input*2, self.n_hidden1), name='d_h1'),
            'h2': tf.compat.v1.Variable(self.rand_weights(self.n_hidden1, self.n_hidden2), name='d_h2'),
            'out': tf.compat.v1.Variable(self.rand_weights(self.n_hidden2, self.n_out), name='d_out')}
        self.d_biases = {
            'b1': tf.compat.v1.Variable(tf.compat.v1.random_normal([self.n_hidden1], stddev=0.01), name='d_b1'),
            'b2': tf.compat.v1.Variable(tf.compat.v1.random_normal([self.n_hidden2], stddev=0.01), name='d_b2'),
            'out': tf.compat.v1.Variable(tf.compat.v1.random_normal([self.n_out], stddev=0.01), name='d_b_out')}

    def gen_move(self, input_board, color):
        best_move = None
        maxval = float('-inf')		
        for move in input_board.legal_moves:
            input_board.push(move)
            val = color*self.get_prediction(convert_board(input_board).flatten().reshape((1, -1)))
            input_board.pop()
            if val > maxval:
                maxval = val
                best_move = move
        input_board.push(best_move)
        res = convert_board(input_board)
        input_board.pop()
        return res

    def g_predict(self, input_board, p_keep):
        hidden1 = tf.compat.v1.add(tf.compat.v1.matmul(input_board, self.g_weights['h1']), self.g_biases['b1'])
        hidden1 = tf.compat.v1.nn.relu(hidden1) #tf.maximum(0.01*hidden1, hidden1) #tf.nn.relu(hidden1)
        #hidden1 = tf.nn.dropout(hidden1, p_keep)

        hidden2 = tf.compat.v1.add(tf.compat.v1.matmul(hidden1, self.g_weights['h2']), self.g_biases['b2'])
        hidden2 = tf.compat.v1.nn.relu(hidden2) #tf.maximum(0.01*hidden2, hidden2) #tf.nn.relu(hidden2)
        #hidden2 = tf.nn.dropout(hidden2, p_keep)

        return tf.compat.v1.add(tf.compat.v1.matmul(hidden2, self.g_weights['out']), self.g_biases['out'])

    def d_predict(self, input_board, p_keep):
        hidden1 = tf.compat.v1.add(tf.compat.v1.matmul(input_board, self.d_weights['h1']), self.d_biases['b1'])
        hidden1 = tf.compat.v1.nn.relu(hidden1) #tf.maximum(0.01*hidden1, hidden1) #tf.nn.relu(hidden1)
        #hidden1 = tf.nn.dropout(hidden1, p_keep)

        hidden2 = tf.compat.v1.add(tf.compat.v1.matmul(hidden1, self.d_weights['h2']), self.d_biases['b2'])
        hidden2 = tf.compat.v1.nn.relu(hidden2) #tf.maximum(0.01*hidden2, hidden2) #tf.nn.relu(hidden2)
        #hidden2 = tf.nn.dropout(hidden2, p_keep)
        
        return tf.compat.v1.sigmoid(tf.compat.v1.add(tf.compat.v1.matmul(hidden2, self.d_weights['out']), self.d_biases['out']))

    def set_optimization(self):
        # Get params to update
        self.params = tf.compat.v1.compat.v1.trainable_variables()
        self.g_params = [p for p in self.params if p.name.startswith('g')]
        self.d_params = [p for p in self.params if p.name.startswith('d')]

        ''' Generator '''
        # Compute f(first board) + f(second board)
        self.pred_sum = self.f_pred - self.s_pred #- tf.multiply(self.results, self.move_props)

        # Compute -log(sigmoid(f(second board) - f(random board)))
        self.rand_diff = -1*tf.compat.v1.reduce_mean(tf.compat.v1.math.log(tf.compat.v1.sigmoid(tf.compat.v1.multiply((self.s_pred - self.r_pred), self.playing))))

        # Compute -log(sigmoid(sum of boards)) and -log(sigmoid(- sum of boards))
        self.equal_board1 = -1*tf.compat.v1.reduce_mean(tf.compat.v1.math.log(tf.compat.v1.sigmoid(self.pred_sum)))
        self.equal_board2 = -1*tf.compat.v1.reduce_mean(tf.compat.v1.math.log(tf.compat.v1.sigmoid(-1*self.pred_sum)))

        # Use discriminator as regularizer
        self.regularizer = -1*tf.compat.v1.reduce_mean(self.d_pred_fake)

        # Set up total cost and optimization
        self.g_cost = self.rand_diff + self.equal_board1 + self.equal_board2 + self.reg_coeff*self.regularizer
        self.g_optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=self.l_rate)
        self.g_gvs = self.g_optimizer.compute_gradients(self.g_cost, self.g_params)
        self.g_capped_gvs = self.g_gvs #[(tf.clip_by_value(grad, -1, 1), var) for grad, var in self.gvs]
        self.g_train_op = self.g_optimizer.apply_gradients(self.g_capped_gvs)

        ''' Discriminator '''
        # Set up total cost and optimization
        # Wasserstein Loss
        self.d_cost = -1*tf.compat.v1.reduce_mean(self.d_pred_real - self.d_pred_fake)
        self.d_optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=self.l_rate)
        self.d_gvs = self.d_optimizer.compute_gradients(self.d_cost, self.d_params)
        self.d_capped_gvs = self.d_gvs
        self.d_train_op = self.d_optimizer.apply_gradients(self.d_capped_gvs)	

    def gen_g_batch(self):
        f_boards = []
        s_boards = []
        r_boards = []
        results = []
        playing = []
        move_props = []
        for i in range(self.batch_size):
            f_boards.append(self.data['f_boards'][self.cur_ind].flatten())
            s_boards.append(self.data['s_boards'][self.cur_ind].flatten())
            r_boards.append(self.data['r_boards'][self.cur_ind].flatten())
            results.append(self.data['results'][self.cur_ind].flatten())
            playing.append(self.data['playing'][self.cur_ind].flatten())
            move_props.append(self.data['move_props'][self.cur_ind].flatten())
            self.cur_ind = (self.cur_ind+1) % self.data_size

        return f_boards, s_boards, r_boards, results, playing, move_props

    def gen_d_batch(self):
        p_f_boards = []
        p_s_boards = []
        gen_boards = []
        for i in range(self.batch_size):
            p_f_boards.append(self.p_data['f_boards'][self.p_cur_ind].flatten())
            p_s_boards.append(self.p_data['s_boards'][self.p_cur_ind].flatten())
            gen_boards.append(self.gen_move(self.f_boards_full[self.p_cur_ind], self.p_data['p_color'][self.p_cur_ind]).flatten())
            self.p_cur_ind = (self.p_cur_ind+1) % self.p_data_size

        return p_f_boards, p_s_boards, gen_boards

    def create_gen_model(self):
        # Set up model parameters
        self.get_gen_params()

        # Set up graph inputs
        self.f_board_input = tf.compat.v1.placeholder(tf.compat.v1.float32, [None, self.n_input])
        self.s_board_input = tf.compat.v1.placeholder(tf.compat.v1.float32, [None, self.n_input])
        self.r_board_input = tf.compat.v1.placeholder(tf.compat.v1.float32, [None, self.n_input])
        self.results = tf.compat.v1.placeholder(tf.compat.v1.float32, [None, 1])
        self.playing = tf.compat.v1.placeholder(tf.compat.v1.float32, [None, 1])
        self.move_props = tf.compat.v1.placeholder(tf.compat.v1.float32, [None, 1])
        self.p_keep = tf.compat.v1.placeholder(tf.compat.v1.float32)

        # Get graph outputs
        self.f_pred = self.g_predict(self.f_board_input, self.p_keep)
        self.s_pred = self.g_predict(self.s_board_input, self.p_keep)
        self.r_pred = self.g_predict(self.r_board_input, self.p_keep)

    def create_dis_model(self):
        # Set up discriminator model parameters
        self.get_dis_params()

        # Set up discriminator graph inputs
        self.person_board_1 = tf.compat.v1.placeholder(tf.compat.v1.float32, [None, self.n_input])
        self.person_board_2 = tf.compat.v1.placeholder(tf.compat.v1.float32, [None, self.n_input])
        self.gen_board = tf.compat.v1.placeholder(tf.compat.v1.float32, [None, self.n_input])
    
        # Get discriminator outputs
        self.d_pred_real = self.d_predict(tf.compat.v1.concat([self.person_board_1, self.person_board_2], 1), self.p_keep)
        self.d_pred_fake = self.d_predict(tf.compat.v1.concat([self.person_board_1, self.gen_board], 1), self.p_keep)

        # Clamp weights
        self.weight_clamps = [tf.compat.v1.clip_by_value(self.d_weights[layer], -0.01, 0.01) for layer in self.d_weights]
        self.bias_clamps = [tf.compat.v1.clip_by_value(self.d_biases[layer], -0.01, 0.01) for layer in self.d_biases]

    def create_model(self):
        # Create both networks
        self.create_gen_model()
        self.create_dis_model()

        # Get loss and optimize
        self.set_optimization()

        # Initialize all variables
        self.init = tf.compat.v1.global_variables_initializer()

        # Model saver
        self.saver = tf.train.Checkpoint()

        # Run initializer
        self.sess.run(self.init)

    def get_prediction(self, board):
        return self.f_pred.eval({self.f_board_input: board, self.p_keep: 1.0})

    def train(self):
        self.create_model()
        #print(self.g_params)
        #print(self.d_params)
        for epoch in range(self.num_epochs):
            num_batches = 10 #self.data_size/self.batch_size #change back to 100
            g_avg_cost = 0
            d_avg_cost = 0
            p_f_boards, p_s_boards, gen_boards = [], [], []
            for batch in range(num_batches):
                print(batch)
                for i in range(5):
                    p_f_boards, p_s_boards, gen_boards = self.gen_d_batch()
                    _, _, _, dc = self.sess.run([self.weight_clamps, self.bias_clamps, self.d_train_op, self.d_cost], feed_dict = {
                                self.person_board_1: p_f_boards, self.person_board_2: p_s_boards,
                                self.gen_board: gen_boards})
                    d_avg_cost += dc/float(num_batches*5)
                f_boards, s_boards, r_boards, results, playing, move_props = self.gen_g_batch()
                _, gc = self.sess.run([self.g_train_op, self.g_cost], feed_dict = {
                                self.f_board_input: f_boards, self.s_board_input: s_boards,
                                self.r_board_input: r_boards, self.p_keep: 0.5,
                                self.results: results, self.move_props: move_props,
                                self.playing: playing, self.person_board_1: p_f_boards,
                                self.person_board_2: p_s_boards, self.gen_board: gen_boards})
                g_avg_cost += gc/float(num_batches)
                #print c
                #print self.sess.run(self.weights['h1'])
            print("Epoch ", (epoch+1), ": Average generator cost was ", g_avg_cost, "\tAverage discriminator cost was ", d_avg_cost)
            save_path = self.saver.save(self.save_file)
        print("Optimization complete.")
        save_path = self.saver.save(self.save_file)
        print("Model saved as "+self.save_file)

    def load_model(self, model_file):
        self.create_model()
        self.saver.restore(model_file)
        print("Model restored from "+model_file)
        
     

In [5]:
# Search at depth of one move
MAX_DEPTH = 0

def negamax(board, depth, color, alpha, beta, evaluator):
    if board.is_checkmate() or depth > MAX_DEPTH:
        input_board = convert_board(board).flatten().reshape((1,-1))
        return (color*evaluator.get_prediction(input_board), None)
    maxval = float('-inf')
    best_move = None
    for move in board.pseudo_legal_moves:
        board.push(move)
        val = -1*negamax(board, depth+1, -1*color, -1*beta, -1*alpha, evaluator)[0]
        board.pop()
        #print(val, move)
        if val > maxval:
            maxval = val
            best_move = move
        if val > alpha:
            alpha = val
        if alpha >= beta:
            return (alpha, best_move)
    return (maxval, best_move)

In [6]:
with tf.compat.v1.Session() as sess:
    # Set up chess board
    board = chess.Board()

    # Load evaluation model
    config = {}
    config['batch_size'] = 20
    config['datafile'] = 'polgar_training.hdf5'
    config['p_datafile'] = 'polgar_player.hdf5'
    config['full_boards_file'] = 'full_boards_polgar.pkl'
    config['num_epochs'] = 1
    config['save_file'] = 'trained_model_polgar/trained_genadv_polgar.ckpt-10.index'
    
    polgar = Magikarp(config, sess)
    polgar.load_model(polgar.save_file)
    
    config = {}
    config['batch_size'] = 20
    config['datafile'] = 'alekhine_training.hdf5'
    config['p_datafile'] = 'alekhine_player.hdf5'
    config['full_boards_file'] = 'full_boards_alekhine.pkl'
    config['num_epochs'] = 1
    config['save_file'] = 'trained_model_alekhine/trained_genadv_alekhine.ckpt-11.index'
    
    alekhine = Magikarp(config, sess)
    alekhine.load_model(alekhine.save_file)

    while not board.is_checkmate():
        # First chess player move
        print('-'*50)
        print("Current Board:\n\n", board, "\n")
        move = "a1a1"
        
        # Second chess player move
        score, comp_move = negamax(board, 0, -1, float('-inf'), float('inf'), polgar)
        print(score, comp_move)
        board.push(comp_move)
        
        print("Current Board:\n\n", board, "\n")
        
        score, comp_move = negamax(board, 0, -1, float('-inf'), float('inf'), alekhine)
        print(score, comp_move)
        board.push(comp_move)
        
        game_over = board.fen()
        
        if 'k' not in game_over:
            print('Congrats - Polgar won!')
            break
        elif 'K' not in game_over:
            print('Congrast - Alekhine won!')
            break
        
        if board.is_checkmate():
            print("Congrats - someone won!")
            break
        
    

Instructions for updating:
Restoring a name-based tf.train.Saver checkpoint using the object-based restore API. This mode uses global names to match variables, and so is somewhat fragile. It also adds new restore ops to the graph each time it is called when graph building. Prefer re-encoding training checkpoints in the object-based format: run save() on the object-based saver (the same one this message is coming from) and use that checkpoint in the future.
Model restored from trained_model_polgar/trained_genadv_polgar.ckpt-10.index
Model restored from trained_model_alekhine/trained_genadv_alekhine.ckpt-11.index
--------------------------------------------------
Current Board:

 r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . . . . . . .
P P P P P P P P
R N B Q K B N R 

[[0.02978683]] d2d3
Current Board:

 r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . . P . . . .
P P P . P P P P
R N B Q K B N R 

[[-0.02557473]] 

[[0.126395]] f1g2
Current Board:

 . n b . . . k .
. . . . b . p .
. . . q p . . .
. . . p . p p .
r p . . n . r .
R . P P . . P .
. . . . P P B .
. . . Q K . N R 

[[0.24028467]] g8h7
--------------------------------------------------
Current Board:

 . n b . . . . .
. . . . b . p k
. . . q p . . .
. . . p . p p .
r p . . n . r .
R . P P . . P .
. . . . P P B .
. . . Q K . N R 

[[0.1120971]] e1f1
Current Board:

 . n b . . . . .
. . . . b . p k
. . . q p . . .
. . . p . p p .
r p . . n . r .
R . P P . . P .
. . . . P P B .
. . . Q . K N R 

[[0.26128605]] g7g6
--------------------------------------------------
Current Board:

 . n b . . . . .
. . . . b . . k
. . . q p . p .
. . . p . p p .
r p . . n . r .
R . P P . . P .
. . . . P P B .
. . . Q . K N R 

[[0.14623259]] e2e3
Current Board:

 . n b . . . . .
. . . . b . . k
. . . q p . p .
. . . p . p p .
r p . . n . r .
R . P P P . P .
. . . . . P B .
. . . Q . K N R 

[[0.2727125]] b4a3
-----------------------------------------------