In [54]:
import pandas as pd
import os
import os.path
import chess
import chess.pgn
import numpy as np

In [34]:
# make a list of all pgn files for one player 
inputs = []
data = "/Users/zoepratt/Documents/GitHub/Top-Chess-Players/data/Alekhine"
for file in os.listdir(data):
    if file.endswith(".pgn"):
        inputs.append(os.path.join(data, file))
 
 # concatanate all pgn files in a file called names for the player
with open('merged_file.pgn', 'w') as outfile:
    for fname in inputs:
        with open(fname, encoding="utf-8") as infile:
            outfile.write(infile.read())
            outfile.write('\n')

In [38]:
def extractdata(pgn):
    '''
    input: pgn files of player
    output: 
        function returns step-by-step gameplay as a list
        function returns which player is playing White as a list
    
    list 'side' will be used to ensure only moves made by 
    intended player will be used in creation of the GAN
    '''
    
    side = []
    game_moves = []
    length = 30 #used for training purposes, to remove for DA servers
    for index in range(length):
        try:
            if chess.pgn.read_game(pgn).mainline_moves():
                # extracts game moves from the pgn files
                game_moves.append(chess.pgn.read_game(pgn).mainline_moves()) 
                
                # extracts player's name playing white from pgn files
                side.append(chess.pgn.read_game(pgn).headers["White"]) 
                
        except:
            print(index,chess.pgn.read_game(pgn))
            pass

    return game_moves, side

In [39]:
def categorize_moves(game_moves, side, name):
    '''
    input: game_moves and side list from extractdata function
    output:
        function returns 2 lists, which contain all of player's move
        list PW: player's moves when they are playing white
        list PB: player's moves when they are playing black 
    '''
    
    PW = [] # empty list for all moves when player playing white
    PB = [] # empty list for all moves when player playing black
    
    match = 0
    
    for game in game_moves:
        board = chess.Board() # saves FEN notation of chess board
        white = side[match]
        if white == name:
            identifier = 0
        else:
            identifier = 1
        
        play = 0
        for move in game:
            if play % 2 == identifier: # creates list PW of moves when the player is playing white
                PW.append(board.copy())
            board.push(move) # move game forward one move
            if play % 2 == identifier: # creates list PB of moves when the player is playing black
                PB.append(board.copy())
            play = play + 1
        match = match + 1
    
    return PW, PB

In [46]:
chess_dict = {
    'p' : [1,0,0,0,0,0,0,0,0,0,0,0,0],
    'P' : [0,0,0,0,0,0,1,0,0,0,0,0,0],
    'n' : [0,1,0,0,0,0,0,0,0,0,0,0,0],
    'N' : [0,0,0,0,0,0,0,1,0,0,0,0,0],
    'b' : [0,0,1,0,0,0,0,0,0,0,0,0,0],
    'B' : [0,0,0,0,0,0,0,0,1,0,0,0,0],
    'r' : [0,0,0,1,0,0,0,0,0,0,0,0,0],
    'R' : [0,0,0,0,0,0,0,0,0,1,0,0,0],
    'q' : [0,0,0,0,1,0,0,0,0,0,0,0,0],
    'Q' : [0,0,0,0,0,0,0,0,0,0,1,0,0],
    'k' : [0,0,0,0,0,1,0,0,0,0,0,0,0],
    'K' : [0,0,0,0,0,0,0,0,0,0,0,1,0],
    '.' : [0,0,0,0,0,0,0,0,0,0,0,0,1],
}

In [58]:
def make_matrix(board): 
    '''
    input: FEN notation of a board position
    output: matrix representing board position at a given moment
    '''
    
    pgn = board.epd() # convert FEN notation of board into EPD notation
    matrix = []  

    # retrieve only the first field from EPD notation: the piece placement
    pieces = pgn.split(" ", 1)[0] 
    
    # separate into placement of individual pieces
    rows = pieces.split("/")
    
    for row in rows:
        piece_list = []  
        for item in row:
            if item.isdigit():
                for i in range(0, int(item)):
                    piece_list.append('.')
            else:
                piece_list.append(item)
        matrix.append(piece_list)
    return matrix

In [51]:
def translate(matrix,chess_dict):
    '''
    '''
    rows = []
    for row in matrix:
        terms = []
        for term in row:
            terms.append(chess_dict[term])
        rows.append(terms)
    return rows

In [56]:
def one_hot_matrix(X, Y):
    '''
    '''
    for i in range(len(X)):
        X[i] = translate(make_matrix(X[i]),chess_dict)
    for i in range(len(Y)):
        Y[i] = translate(make_matrix(Y[i]),chess_dict)
    X = np.array(X)
    Y = np.array(Y)
    return X, Y

In [59]:
adams_pgn = open("/Users/zoepratt/Documents/GitHub/Top-Chess-Players/data/test_Adams.pgn")

def main_adams():
    game_moves, side = extractdata(adams_pgn)
    PW, PB = categorize_moves(game_moves, side, 'Adams, Michael')
    X, Y = one_hot_matrix(PW, PB)

main_adams()