In [None]:
!pip install theano
!pip install sklearn
!pip install --force-reinstall chess

!git clone https://github.com/thomasahle/sunfish

import numpy
import theano
import theano.tensor as T

import os
from sklearn.model_selection import train_test_split
import pickle
import random
import itertools
from theano.tensor.nnet import sigmoid
import scipy.sparse
import h5py
import math
import time

import chess, chess.pgn
import sys
import multiprocessing

import heapq
import re
import string
import traceback

from datetime import datetime

import sunfish.sunfish as sunfish

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting theano
  Downloading Theano-1.0.5.tar.gz (2.8 MB)
[K     |████████████████████████████████| 2.8 MB 11.2 MB/s 
Building wheels for collected packages: theano
  Building wheel for theano (setup.py) ... [?25l[?25hdone
  Created wheel for theano: filename=Theano-1.0.5-py3-none-any.whl size=2668112 sha256=0609fe6add80c6fb1d01d74e1f91bc14e34148dda0b75b2179e649ebcc7c7e73
  Stored in directory: /root/.cache/pip/wheels/26/68/6f/745330367ce7822fe0cd863712858151f5723a0a5e322cc144
Successfully built theano
Installing collected packages: theano
Successfully installed theano-1.0.5
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting sklearn
  Downloading sklearn-0.0.post1.tar.gz (3.6 kB)
Building wheels for collected packages: sklearn
  Building wheel for sklearn (setup.py) ... [?25l[?25hdone
  Created wheel for sklearn: filename=

In [None]:
rng = numpy.random

def get_parameters(n_in=None, n_hidden_units=2048, n_hidden_layers=None, WW=None, bs=None):
    if (WW is None) or (bs is None):
        print('initializing WW & bs')
        if type(n_hidden_units) != list:
            n_hidden_units = [n_hidden_units] * n_hidden_layers
        else:
            n_hidden_layers = len(n_hidden_units)

        WW = []
        bs = []

        def W_values(n_in, n_out):
            return numpy.asarray(rng.uniform(
                low=-numpy.sqrt(6. / (n_in + n_out)),
                high=numpy.sqrt(6. / (n_in + n_out)),
                size=(n_in, n_out)), dtype=theano.config.floatX)

        
        for l in range(n_hidden_layers):
            if l == 0:
                n_in_2 = n_in
            else:
                n_in_2 = n_hidden_units[l-1]
            if l < n_hidden_layers - 1:
                n_out_2 = n_hidden_units[l]
                W = W_values(n_in_2, n_out_2)
                gamma = 0.1 # initialize it to slightly positive so the derivative exists
                b = numpy.ones(n_out_2, dtype=theano.config.floatX) * gamma
            else:
                W = numpy.zeros(n_in_2, dtype=theano.config.floatX)
                b = floatX(0.)
            WW.append(W)
            bs.append(b)

    Ws_s = [theano.shared(W) for W in WW]
    bs_s = [theano.shared(b) for b in bs]

    return Ws_s, bs_s


def get_model(Ws_s, bs_s, dropout=False):
    print('building expression graph')
    x_s = T.matrix('x')

    if type(dropout) != list:
        dropout = [dropout] * len(Ws_s)

    # Convert input into a 12 * 64 list
    pieces = []
    for piece in [1,2,3,4,5,6, 8,9,10,11,12,13]:
        # pieces.append((x_s <= piece and x_s >= piece).astype(theano.config.floatX))
        pieces.append(T.eq(x_s, piece))

    binary_layer = T.concatenate(pieces, axis=1)

    srng = theano.tensor.shared_randomstreams.RandomStreams(
        rng.randint(999999))

    last_layer = binary_layer
    n = len(Ws_s)
    for l in range(n - 1):
        # h = T.tanh(T.dot(last_layer, Ws[l]) + bs[l])
        h = T.dot(last_layer, Ws_s[l]) + bs_s[l]
        h = h * (h > 0)
        
        if dropout[l]:
            mask = srng.binomial(n=1, p=0.5, size=h.shape)
            h = h * T.cast(mask, theano.config.floatX) * 2

        last_layer = h

    p_s = T.dot(last_layer, Ws_s[-1]) + bs_s[-1]
    return x_s, p_s



In [None]:


def read_games(fn):
    f = open(fn)

    while True:
        try:
            g = chess.pgn.read_game(f)
        except KeyboardInterrupt:
            raise
        except:
            continue

        if not g:
            break
        
        yield g


def bb2array(b, flip=False):

    x = numpy.zeros(64, dtype=numpy.int8)

    #flip 고려 안함
    #흑백 반전일수도 있음
    for piece in range(1, 7):
        for place in b.pieces(piece, True):
            x[place] = piece + 7
        for place in b.pieces(piece, False):
            x[place] = piece

    return x


def parse_game(g):
    rm = {'1-0': 1, '0-1': -1, '1/2-1/2': 0}
    r = g.headers['Result']
    if r not in rm:
        return None
    y = rm[r]
    # print >> sys.stderr, 'result:', y

    # Generate all boards
    gn = g.end()
    if not gn.board().is_game_over():
        return None

    gns = []
    moves_left = 0
    while gn:
        gns.append((moves_left, gn, gn.board().turn == 0))
        gn = gn.parent
        moves_left += 1

    print(len(gns))
    if len(gns) < 10:
        print(g.end())

    gns.pop()

    moves_left, gn, flip = random.choice(gns) # remove first position

    b = gn.board()
    x = bb2array(b, flip=flip)
    b_parent = gn.parent.board()
    x_parent = bb2array(b_parent, flip=(not flip))
    if flip:
        y = -y

    # generate a random baord
    moves = list(b_parent.legal_moves)
    move = random.choice(moves)
    b_parent.push(move)
    x_random = bb2array(b_parent, flip=flip)

    if moves_left < 3:
        print(moves_left, 'moves left')
        print('winner:', y)
        print(g.headers)
        print(b)
        print('checkmate:', g.end().board().is_checkmate())
    
    # print x
    # print x_parent
    # print x_random

    return (x, x_parent, x_random, moves_left, y)


def read_all_games(fn_in, fn_out):    
    g = h5py.File(fn_out, 'w')
    X, Xr, Xp = [g.create_dataset(d, (0, 64), dtype='b', maxshape=(None, 64), chunks=True) for d in ['x', 'xr', 'xp']]
    Y, M = [g.create_dataset(d, (0,), dtype='b', maxshape=(None,), chunks=True) for d in ['y', 'm']]
    size = 0
    line = 0
    for game in read_games(fn_in):
        game = parse_game(game)
        if game is None:
            continue
        x, x_parent, x_random, moves_left, y = game

        if line + 1 >= size:
            g.flush()
            size = 2 * size + 1
            print('resizing to', size)
            [d.resize(size=size, axis=0) for d in (X, Xr, Xp, Y, M)]

        X[line] = x
        Xr[line] = x_random
        Xp[line] = x_parent
        Y[line] = y
        M[line] = moves_left

        line += 1

    

    [d.resize(size=line, axis=0) for d in (X, Xr, Xp, Y, M)]
    g.close()

def read_all_games_2(a):
    return read_all_games(*a)

def parse_dir():
    files = []
    d = '/mnt'
    for fn_in in os.listdir(d):
        if not fn_in.endswith('.pgn'):
            continue
        fn_in = os.path.join(d, fn_in)
        fn_out = fn_in.replace('.pgn', '.hdf5')
        if not os.path.exists(fn_out):
            files.append((fn_in, fn_out))

    pool = multiprocessing.Pool()
    pool.map(read_all_games_2, files)



parse_dir()


58
resizing to 1
53
resizing to 3
42
resizing to 7
36
49
28
48
resizing to 15
122
263
53
170
33
63
46
180
resizing to 31
66
2 moves left
winner: -1
Headers(Event='FICS rated standard game', Site='FICS freechess.org', Date='2022.01.31', Round='?', White='Casmot', Black='ciupilica', Result='1-0', BlackClock='0:15:00.000', BlackElo='1829', BlackRD='0.0', ECO='A40', FICSGamesDBGameNo='510270686', PlyCount='65', Time='07:08:00', TimeControl='900+10', WhiteClock='0:15:00.000', WhiteElo='2187', WhiteRD='0.0')
r n . . . k . .
p p . b . . . R
. . . . p N B .
. . p p P . . .
. . . P . . p .
. . P . P . . .
P P . K . . . .
. . . . . . . .
checkmate: True
59
50
73
89
87
32
44
0 moves left
winner: -1
Headers(Event='FICS rated standard game', Site='FICS freechess.org', Date='2022.01.30', Round='?', White='Hutnik', Black='mskar', Result='1-0', BlackClock='0:15:00.000', BlackElo='1507', BlackRD='0.0', ECO='C50', FICSGamesDBGameNo='510265772', PlyCount='43', Time='14:51:00', TimeControl='900+5', WhiteC

In [None]:
def floatX(x):
    return numpy.asarray(x, dtype=theano.config.floatX)

def load_data(dir='/mnt'):
    for fn in os.listdir(dir):
        if not fn.endswith('.hdf5'):
            continue

        fn = os.path.join(dir, fn)
        #try:
        yield h5py.File(fn, 'r')
        #except:
        #    print('could not read', fn)


def get_data(series=['x', 'xr']):

    data = [[] for s in series]

    for f in load_data():
        #print(f)#
        try:
            for i, s in enumerate(series):
                #print(f[s])#
                data[i].append(f[s])
        except:
            raise
            print('failed reading from', f)

    #print(type(data), data)

    data = [stack(d) for d in data]

    test_size = int(10000.0 / len(data[0]))
    print('Splitting', len(data[0]), 'entries into train/test set')
    data = train_test_split(*data, test_size=test_size)

    print(data[0].shape[0], 'train set', data[1].shape[0], 'test set')
    return data

def stack(vectors):

    if len(vectors[0].shape) > 1:
        return numpy.vstack(vectors)
    else:
        return numpy.hstack(vectors)

def get_training_model(Ws_s, bs_s, dropout=False, lambd=10.0, kappa=1.0):
    # Build a dual network, one for the real move, one for a fake random move
    # Train on a negative log likelihood of classifying the right move

    xc_s, xc_p = get_model(Ws_s, bs_s, dropout=dropout)
    xr_s, xr_p = get_model(Ws_s, bs_s, dropout=dropout)
    xp_s, xp_p = get_model(Ws_s, bs_s, dropout=dropout)

    #loss = -T.log(sigmoid(xc_p + xp_p)).mean() # negative log likelihood
    #loss += -T.log(sigmoid(-xp_p - xr_p)).mean() # negative log likelihood

    cr_diff = xc_p - xr_p
    loss_a = -T.log(sigmoid(cr_diff)).mean()

    cp_diff = kappa * (xc_p + xp_p)
    loss_b = -T.log(sigmoid( cp_diff)).mean()
    loss_c = -T.log(sigmoid(-cp_diff)).mean()

    # Add regularization terms
    reg = 0
    for x in Ws_s + bs_s:
        reg += lambd * (x ** 2).mean()

    loss = loss_a + loss_b + loss_c
    return xc_s, xr_s, xp_s, loss, reg, loss_a, loss_b, loss_c


def nesterov_updates(loss, all_params, learn_rate, momentum):
    updates = []
    all_grads = T.grad(loss, all_params)
    for param_i, grad_i in zip(all_params, all_grads):
        # generate a momentum parameter
        mparam_i = theano.shared(
            numpy.array(param_i.get_value()*0., dtype=theano.config.floatX))
        v = momentum * mparam_i - learn_rate * grad_i
        w = param_i + momentum * v - learn_rate * grad_i
        updates.append((param_i, w))
        updates.append((mparam_i, v))
    return updates


def get_function(Ws_s, bs_s, dropout=False, update=False):
    xc_s, xr_s, xp_s, loss_f, reg_f, loss_a_f, loss_b_f, loss_c_f = get_training_model(Ws_s, bs_s, dropout=dropout)
    obj_f = loss_f + reg_f

    learning_rate = T.scalar(dtype=theano.config.floatX)

    momentum = floatX(0.9)

    if update:
        updates = nesterov_updates(obj_f, Ws_s + bs_s, learning_rate, momentum)
    else:
        updates = []

    print('compiling function')
    f = theano.function(
        inputs=[xc_s, xr_s, xp_s, learning_rate],
        outputs=[loss_f, reg_f, loss_a_f, loss_b_f, loss_c_f],
        updates=updates,
        on_unused_input='warn')

    return f

def deep_train():
    Xc_train, Xc_test, Xr_train, Xr_test, Xp_train, Xp_test = get_data(['x', 'xr', 'xp'])
    for board in [Xc_train[0], Xp_train[0]]:
        for row in range(8):
            print(' '.join('%2d' % x for x in board[(row*8):((row+1)*8)]))
        print

    n_in = 12 * 64

    Ws_s, bs_s = get_parameters(n_in=n_in, n_hidden_units=[2048] * 3)
    
    minibatch_size = min(MINIBATCH_SIZE, Xc_train.shape[0])

    train = get_function(Ws_s, bs_s, update=True, dropout=False)
    test = get_function(Ws_s, bs_s, update=False, dropout=False)

    best_test_loss = float('inf')
    base_learning_rate = 0.03
    t0 = time.time()
    
    i = 0
    while True:
        i += 1
        learning_rate = floatX(base_learning_rate * math.exp(-(time.time() - t0) / 86400))

        minibatch_index = random.randint(0, int(Xc_train.shape[0] / minibatch_size) - 1)
        lo, hi = minibatch_index * minibatch_size, (minibatch_index + 1) * minibatch_size
        loss, reg, loss_a, loss_b, loss_c = train(Xc_train[lo:hi], Xr_train[lo:hi], Xp_train[lo:hi], learning_rate)

        zs = [loss, loss_a, loss_b, loss_c, reg]
        print('iteration %6d learning rate %12.9f: %s' % (i, learning_rate, '\t'.join(['%12.9f' % z for z in zs])))

        if i % 200 == 0:
            test_loss, test_reg, _, _, _ = test(Xc_test, Xr_test, Xp_test, learning_rate)
            print('test loss %12.9f' % test_loss)

            if test_loss < best_test_loss:
                print('new record!')
                best_test_loss = test_loss

                print('dumping pickled model')
                now = datetime.now()
                new_model_name = 'model' + '_' + str(now.date()) + '_' + str(now.hour) + '-' + str(now.minute) + '.pickle'
                f = open(new_model_name, 'wb') ##########################################################################################################################################
                
                pickle.dump((values(Ws_s), values(bs_s)), f)
                f.close()

def values(zs):
    return [z.get_value(borrow=True) for z in zs]

In [None]:
#MINIBATCH_SIZE = 2000

#deep_train()

In [None]:

def values(zs):
    return [z.get_value(borrow=True) for z in zs]

def dump(Ws_s, bs_s):
    f = open('model_reinforcement.pickle', 'wb')
    pickle.dump((values(Ws_s), values(bs_s)), f)


def get_params(fns):
    for fn in fns:
        if os.path.exists(fn):
            print('loading', fn)
            ff = open(fn, 'rb')
            data = pickle.load(ff)
            Ws, bs = data

            return Ws, bs


def get_predict(Ws_s, bs_s):
    x, p = get_model(Ws_s, bs_s)
    
    predict = theano.function(
        inputs=[x],
        outputs=p)

    return predict


def get_update(Ws_s, bs_s):
    x, fx = get_model(Ws_s, bs_s)

    # Ground truth (who won)
    y = T.vector('y')

    # Compute loss (just log likelihood of a sigmoid fit)
    y_pred = sigmoid(fx)
    loss = -( y * T.log(y_pred) + (1 - y) * T.log(1 - y_pred)).mean()

    # Metrics on the number of correctly predicted ones
    frac_correct = ((fx > 0) * y + (fx < 0) * (1 - y)).mean()

    # Updates
    learning_rate_s = T.scalar(dtype=theano.config.floatX)
    momentum_s = T.scalar(dtype=theano.config.floatX)
    updates = nesterov_updates(loss, Ws_s + bs_s, learning_rate_s, momentum_s)
    
    f_update = theano.function(
        inputs=[x, y, learning_rate_s, momentum_s],
        outputs=[loss, frac_correct],
        updates=updates,
        )

    return f_update


def weighted_random_sample(ps):
    r = random.random()
    for i, p in enumerate(ps):
        r -= p
        if r < 0:
            return i


strip_whitespace = re.compile(r"\s+")
translate_pieces = str.maketrans(".pnbrqkPNBRQK", "\x00" + "\x01\x02\x03\x04\x05\x06" + "\x08\x09\x0a\x0b\x0c\x0d")


def sf2array(pos, flip):
    # Create a numpy array from a sunfish representation
    pos = strip_whitespace.sub('', pos.board) # should be 64 characters now
    pos = pos.translate(translate_pieces)
    m = numpy.fromstring(pos, dtype=numpy.int8)
    if flip:
        m = numpy.fliplr(m.reshape(8, 8)).reshape(64)
    return m


def game_rein(f_pred, f_train, learning_rate, momentum=0.9):
    pos = sunfish.Position(sunfish.initial, 0, (True,True), (True,True), 0, 0)

    data = []

    max_turns = 100

    for turn in range(max_turns):
        # Generate all possible moves
        Xs = []
        new_poss = []
        for move in pos.gen_moves():
            new_pos = pos.move(move)
            Xs.append(sf2array(new_pos, False))
            new_poss.append(new_pos)

        # Calculate softmax probabilities
        ys = f_pred(Xs)
        zs = numpy.exp(ys)
        Z = sum(zs)
        ps = zs / Z
        i = weighted_random_sample(ps)
        
        # Append moves
        data.append((turn % 2, Xs[i]))
        pos = new_poss[i]

        if pos.board.find('K') == -1:
            break

        if turn == 0 and random.random() < 0.01:
            print(ys)

    if turn == max_turns - 1:
        return

    # White moves all even turns
    # If turn is even, it means white just moved, and black is up next
    # That means if turn is even, all even (black) boards are losses
    # If turn is odd, all odd (white) boards are losses
    win = (turn % 2) # 0 = white, 1 = black

    X = numpy.array([x for t, x in data], dtype=theano.config.floatX)
    Y = numpy.array([(t ^ win) for t, x in data], dtype=theano.config.floatX)

    loss, frac_correct = f_train(X, Y, learning_rate, momentum)

    return len(data), loss, frac_correct


def reinforcement_train():
    Ws, bs = get_params(['model_reinforcement.pickle', 'model.pickle'])
    Ws_s, bs_s = get_parameters(WW=Ws, bs=bs)
    f_pred = get_predict(Ws_s, bs_s)
    f_train = get_update(Ws_s, bs_s)

    i, n, l, c = 0, 0.0, 0.0, 0.0

    base_learning_rate = 1e-2
    t0 = time.time()

    while True:
        learning_rate = base_learning_rate * math.exp(-(time.time() - t0) / 86400)
        r = game_rein(f_pred, f_train, learning_rate)
        if r is None:
            continue
        i += 1
        n_t, l_t, c_t = r
        n = n*0.999 + n_t
        l = l*0.999 + l_t*n_t
        c = c*0.999 + c_t*n_t
        print('%6d %9.5f %9.5f %9.5f' % (i, learning_rate, l / n, c / n))

        if i % 100 == 0:
            print('dumping model...')
            dump(Ws_s, bs_s)



In [None]:
reinforcement_train()

loading model_reinforcement.pickle
building expression graph
building expression graph




9
##
12
##
27
##
16
##
27
##
6
##
9
##
10
##
12
##
9
##
12
##
21
##
25
##
12
##
22
##
5
##
17
##
16
##
3
##
22
##
25
##
14
##
7
##
3
##
1
##
18
##
16
##
1
##
19
##
14
##
37
##
2
##
12
##
7
##
24
##
6
##
4
##
13
##
16
##
5
##
21
##
20
##
16
##
3
##
6
##
6
##
10
##
1
##
12
##
12
##
15
##
6
##
0
##
26
##
5
##
23
##
14
##
26
##
1
##
13
##
18
##
23
##
5
##
5
##
9
##
0
##


KeyboardInterrupt: ignored

In [None]:
class MyGameNode(chess.pgn.GameNode):
    def __init__(self, bb=chess.Board()):
        self.bb = bb

    def board(self):
        return self.bb

    def ply(self):
        pass

    #def accept(self, visitor: BaseVisitor[ResultT]) -> ResultT:
    def accept(self):
        pass

In [None]:
def get_model_from_pickle(fn):
    f = open(fn, 'rb')
    data = pickle.load(f)
    Ws, bs = data
    
    Ws_s, bs_s = get_parameters(WW=Ws, bs=bs)
    x, p = get_model(Ws_s, bs_s)
    
    predict = theano.function(
        inputs=[x],
        outputs=p)

    return predict


CHECKMATE_SCORE = 1e6

def negamax(pos, depth, alpha, beta, color, func):
    moves = []
    X = []
    pos_children = []
    for move in pos.gen_moves():
        pos_child = pos.move(move)
        moves.append(move)
        X.append(sf2array(pos_child, flip=(color==1)))
        pos_children.append(pos_child)

    if len(X) == 0:
        return Exception('eh?')

    # Use model to predict scores
    scores = func(X)

    for i, pos_child in enumerate(pos_children):
        if pos_child.board.find('K') == -1:
            scores[i] = CHECKMATE_SCORE

    child_nodes = sorted(zip(scores, moves), reverse=True)

    best_value = float('-inf')
    best_move = None
    
    for score, move in child_nodes:
        if depth == 1 or score == CHECKMATE_SCORE:
            value = score
        else:
            # print 'ok will recurse', sunfish.render(move[0]) + sunfish.render(move[1])
            pos_child = pos.move(move)
            neg_value, _ = negamax(pos_child, depth-1, -beta, -alpha, -color, func)
            value = -neg_value

        # value += random.gauss(0, 0.001)

        # crdn = sunfish.render(move[0]) + sunfish.render(move[1])
        # print '\t' * (3 - depth), crdn, score, value

        if value > best_value:
            best_value = value
            best_move = move

        if value > alpha:
            alpha = value

        if alpha > beta:
            break

    return best_value, best_move


def create_move(board, crdn):
    # workaround for pawn promotions
    move = chess.Move.from_uci(crdn)
    if board.piece_at(move.from_square).piece_type == chess.PAWN:
        if int(move.to_square/8) in [0, 7]:
            move.promotion = chess.QUEEN # always promote to queen
    return move


class Player(object):
    def move(self, gn_current):
        raise NotImplementedError()


class Computer(Player):
    def __init__(self, func, maxd=5):
        self._func = func
        self._pos = sunfish.Position(sunfish.initial, 0, (True,True), (True,True), 0, 0)
        self._maxd = maxd

    def move(self, gn_current):
        assert(gn_current.board().turn == True)

        if gn_current.move is not None:
            # Apply last_move
            crdn = str(gn_current.move)
            move = (119 - sunfish.parse(crdn[0:2]), 119 - sunfish.parse(crdn[2:4]))
            self._pos = self._pos.move(move)

        # for depth in range(1, self._maxd+1):
        alpha = float('-inf')
        beta = float('inf')

        depth = self._maxd
        t0 = time.time()
        best_value, best_move = negamax(self._pos, depth, alpha, beta, 1, self._func)
        crdn = sunfish.render(best_move[0]) + sunfish.render(best_move[1])
        print(depth, best_value, crdn, time.time() - t0)

        self._pos = self._pos.move(best_move)
        crdn = sunfish.render(best_move[0]) + sunfish.render(best_move[1])
        move = create_move(gn_current.board(), crdn)
        
        gn_new = MyGameNode()
        gn_new.parent = gn_current
        gn_new.move = move

        gn_new.__init__(gn_current.board().copy())

        gn_new.board().push(move)

        return gn_new


def get_move(move_str, bb):
        try:
            move = chess.Move.from_uci(move_str)
        except:
            print('cant parse')
            return False
        if move not in bb.legal_moves:
            print('not a legal move')
            return False
        else:
            return move

class Human(Player):
    def move(self, gn_current):
        bb = gn_current.board()

        while True:
            print('your turn:')
            move = get_move(input(), bb)
            if move:
                break

        gn_new = MyGameNode()
        gn_new.parent = gn_current
        gn_new.move = move

        gn_new.__init__(gn_current.board().copy())

        gn_new.board().push(move)
        
        return gn_new


class Sunfish(Player):
    def __init__(self, secs=1):
        self._searcher = sunfish.Searcher()
        self._pos = sunfish.Position(sunfish.initial, 0, (True,True), (True,True), 0, 0)
        self._secs = secs

    def move(self, gn_current):

        assert(gn_current.board().turn == False)

        # Apply last_move
        crdn = str(gn_current.move)
        move = (sunfish.parse(crdn[0:2]), sunfish.parse(crdn[2:4]))
        self._pos = self._pos.move(move)

        t0 = time.time()
        datas = self._searcher.search(self._pos, (self._secs,))

        for data in datas:

            depth, move, score = data
            print(time.time() - t0, move, score)
            self._pos = self._pos.move(move)

            crdn = sunfish.render(119-move[0]) + sunfish.render(119 - move[1])
            move = create_move(gn_current.board(), crdn)
            
            gn_new = MyGameNode()
            gn_new.parent = gn_current
            gn_new.move = move

            gn_new.__init__(gn_current.board().copy())

            gn_new.board().push(move)

            return gn_new

def game(func):
    gn_current = chess.pgn.Game()

    maxd = random.randint(1, 2) # max depth for deep pink
    secs = random.random() # max seconds for sunfish

    print('maxd %f secs %f' % (maxd, secs))

    player_a = Computer(func, maxd=maxd)
    player_b = Sunfish(secs=secs)
    player_c = Human()

    times = {'A': 0.0, 'B': 0.0, 'C':0.0}
    
    while True:
        for side, player in [('A', player_a), ('C', player_c)]:
            t0 = time.time()
            try:
                gn_current = player.move(gn_current)
            except KeyboardInterrupt:
                return
            except:
                traceback.print_exc()
                return side + '-exception', times

            times[side] += time.time() - t0
            print('=========== Player %s: %s' % (side, gn_current.move))
            s = str(gn_current.board())
            print(s)
            if gn_current.board().is_checkmate():
                return side, times
            elif gn_current.board().is_stalemate():
                return '-', times
            elif gn_current.board().can_claim_fifty_moves():
                return '-', times
            elif s.find('K') == -1 or s.find('k') == -1:
                # Both AI's suck at checkmating, so also detect capturing the king
                return side, times

            
            
def play():
    func = get_model_from_pickle(model_name)
    #if True:
    while True:
        side, times = game(func)
        f = open('stats.txt', 'a')
        f.write('%s %f %f\n' % (side, times['A'], times['B']))
        f.close()



In [None]:
model_name = 'model_reinforcement.pickle'

play()