# ShallowMind 
## An exploratory endeavor into AI Chess


#### Download libraries that could aid the process

In [18]:
#!pip install python-chess
#!pip install pgnparser
#!pip install pgnlib
#!pip install ipython-autotime ### time is becoming an issue...

#### Import libraries for EDA, and processing

In [19]:
import chess
import pgn

import pandas as pd
import numpy as np

import itertools

### Useful sources of Chess game data - 
Chess GrandMaster games: https://chess-db.com/public/downloads/gamesfordownload.jsp - 275,606 games


#### Note parsing pgn file data returns only the first game in the file, limited documentation availble for further explorating.
#### More processing required to access game lines and results.

In [64]:
import chess.pgn
pgn_file = open("pgnprepro.pgn")
first_game = chess.pgn.read_game(pgn_file)

print(f"{first_game.mainline_moves()}\n") #fancy built in method for accessing the moves


1. d4 e6 2. c4 d5 3. Nf3 Nf6 4. Nc3 Bb4 5. Bg5 h6 6. Bxf6 Qxf6 7. e3 O-O 8. Rc1 dxc4 9. Bxc4 c5 10. O-O cxd4 11. Ne4 Qe7 12. exd4 Nc6 13. Qe2 Bd7 14. a3 Bd6 15. Rfd1 Rad8 16. Qe3 Rfe8 17. b4 a6 18. Be2 Nb8 19. Ne5 Ba4 20. Rd2 Bxe5 21. dxe5 Rxd2 22. Qxd2 Rd8 23. Qb2 Bc6 24. Nd6 Bd5 25. f4 Nc6 26. a4 Qc7 27. b5 Qb6+ 28. Kf1 axb5 29. axb5 Ne7 30. Qc3 Rf8 31. Qc5 Qa5 32. Rd1 Qa2 33. g3 f6 34. exf6 Rxf6 35. Qc3 Qa7 36. Ne8 Rf7 37. Bh5 g6 38. Nf6+ Rxf6 39. Qxf6 gxh5 40. Qxe7 Bc4+ 41. Kg2 Bd5+ 42. Kh3 Qa2 43. Rxd5 Qxd5 44. Kh4 Qxb5 45. Qxe6+ Kg7 46. f5 Qc6 47. Qe7+ Kg8 48. Kxh5 b5 49. g4 b4 50. Qxb4 Qc7 51. Qb3+ Kh8 52. Qe6 Qf7+ 53. Qg6 Qc7 54. Qxh6+ Kg8 55. Qe6+ Kh8 56. Qe8+ Kh7 57. h4 Qb7 58. Qg6+ Kh8 59. Qh6+ Kg8 60. Qe6+ Kh8 61. g5 Qf7+ 62. g6 Qf8 63. g7+



#### Work-around for accesing moves and results

In [134]:
game_list = [line for line in open('pgnprepro.pgn')] 

all_moves_list = []
i = 11
while i <= (len(game_list)):
    all_moves_list.append(game_list[i])
    i += 16
    
    
all_results = []
i = 13
while i <= (len(game_list)):
    all_results.append(game_list[i])
    i += 16

In [135]:
len(all_moves_list), len(all_results)

(10, 10)

#### Function that cleans up the move string string

In [136]:
def get_move_list(move_string): #### input is a string 
    '''
    Takes a move string that was striped from a PGN format, and removes unwanted characters and conserves SAN format.
    '''
    testing = move_string 
    testing = testing[0:-2]  ### remove last 2 characters = '\n'
    testing = testing.split('. ')

    for i in range(len(testing)):
        testing[i] = testing[i].split(' ')
    for i in testing:
        try:
            del(i[2])
        except:
            continue
    del(testing[0])

    simplelist = list(itertools.chain.from_iterable(testing))
    return(simplelist)

#### Function that clean up the results

In [148]:
def white_results(result_string_list):
    white_result_list = []
    for i in range(len(result_string_list)):
            result = result_string_list[i][:-1]
            result = result.replace('1-0', 'win')
            result = result.replace('0-1', 'lose')
            result = result.replace('1/2-1/2', 'draw')
            white_result_list.append(result)
    return(white_result_list)

In [150]:
white_results(all_results)

['win', 'lose', 'draw', 'win', 'draw', 'win', 'win', 'win', 'lose', 'draw']

#### Converts string of all the game moves into lists of moves in standard algebraic notation (san) 

In [68]:
'''
%%time
all_san_list = [] #### SLOW - need list comprehension maybe quicker?
for game in all_moves_list:
    all_san_list.append(get_move_list(game))
'''

'\n%%time\nall_san_list = [] #### SLOW - need list comprehension maybe quicker?\nfor game in all_moves_list:\n    all_san_list.append(get_move_list(game))\n'

In [69]:
%%time
all_san_list = [get_move_list(game) for game in all_moves_list] 

CPU times: user 661 µs, sys: 20 µs, total: 681 µs
Wall time: 690 µs


In [152]:
all_san_list[0]

'e6'

#### Creating a function that cleans up FEN notation

In [102]:
def clean_fen(string):
    '''
    Takes a fen with misc. 
    '''
    string = string.replace('8','1'*8)
    string = string.replace('7','1'*7)
    string = string.replace('6','1'*6)
    string = string.replace('5','1'*5)
    string = string.replace('4','1'*4)
    string = string.replace('3','1'*3)
    string = string.replace('2','1'*2)
    string = string.replace('1','1'*1)
    string = string.replace('/','')
    string_list = [i for i in string]
    return(string_list)

#### Creating function that converts moves to board states as Forsyth Enumerated Notation (FEN)

In [103]:
def get_board_state(move_list): ### i from all move_list 
    game = move_list
    board = chess.Board()

    board_states = []
    for move in game:
        try:
            bs = board.board_fen()
            board_states.append(clean_fen(bs))
            board.push_san(move)
        except:
            continue
    return(board_states)

In [104]:
def get_board_state_string(move_list): 
    game = move_list
    board = chess.Board()

    board_states = []
    for move in game:
        try:
            board_states.append(board.board_fen())
            board.push_san(move)
        except:
            continue
    return(board_states)

In [153]:
%%time
all_board_state_list = []
for game in all_san_list:
    all_board_state_list.append(get_board_state(game))

CPU times: user 178 ms, sys: 6.23 ms, total: 184 ms
Wall time: 254 ms


In [107]:
%%time
df_board_states = pd.DataFrame(all_board_state_list)

CPU times: user 21.2 ms, sys: 2.07 ms, total: 23.2 ms
Wall time: 26.7 ms


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,115,116,117,118,119,120,121,122,123,124
0,"[r, n, b, q, k, b, n, r, p, p, p, p, p, p, p, ...","[r, n, b, q, k, b, n, r, p, p, p, p, p, p, p, ...","[r, n, b, q, k, b, n, r, p, p, p, p, 1, p, p, ...","[r, n, b, q, k, b, n, r, p, p, p, p, 1, p, p, ...","[r, n, b, q, k, b, n, r, p, p, p, 1, 1, p, p, ...","[r, n, b, q, k, b, n, r, p, p, p, 1, 1, p, p, ...","[r, n, b, q, k, b, 1, r, p, p, p, 1, 1, p, p, ...","[r, n, b, q, k, b, 1, r, p, p, p, 1, 1, p, p, ...","[r, n, b, q, k, 1, 1, r, p, p, p, 1, 1, p, p, ...","[r, n, b, q, k, 1, 1, r, p, p, p, 1, 1, p, p, ...",...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, q, 1, 1, 1, 1, 1, ...","[1, 1, 1, 1, 1, 1, 1, k, 1, q, 1, 1, 1, 1, 1, ...","[1, 1, 1, 1, 1, 1, 1, k, 1, q, 1, 1, 1, 1, 1, ...","[1, 1, 1, 1, 1, 1, k, 1, 1, q, 1, 1, 1, 1, 1, ...","[1, 1, 1, 1, 1, 1, k, 1, 1, q, 1, 1, 1, 1, 1, ...","[1, 1, 1, 1, 1, 1, 1, k, 1, q, 1, 1, 1, 1, 1, ...","[1, 1, 1, 1, 1, 1, 1, k, 1, q, 1, 1, 1, 1, 1, ...","[1, 1, 1, 1, 1, 1, 1, k, 1, 1, 1, 1, 1, q, 1, ...","[1, 1, 1, 1, 1, 1, 1, k, 1, 1, 1, 1, 1, q, 1, ...","[1, 1, 1, 1, 1, q, 1, k, 1, 1, 1, 1, 1, 1, 1, ..."


In [95]:
df_board_states.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,115,116,117,118,119,120,121,122,123,124
0,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR,rnbqkbnr/pppppppp/8/8/3P4/8/PPP1PPPP/RNBQKBNR,rnbqkbnr/pppp1ppp/4p3/8/3P4/8/PPP1PPPP/RNBQKBNR,rnbqkbnr/pppp1ppp/4p3/8/2PP4/8/PP2PPPP/RNBQKBNR,rnbqkbnr/ppp2ppp/4p3/3p4/2PP4/8/PP2PPPP/RNBQKBNR,rnbqkbnr/ppp2ppp/4p3/3p4/2PP4/5N2/PP2PPPP/RNBQ...,rnbqkb1r/ppp2ppp/4pn2/3p4/2PP4/5N2/PP2PPPP/RNB...,rnbqkb1r/ppp2ppp/4pn2/3p4/2PP4/2N2N2/PP2PPPP/R...,rnbqk2r/ppp2ppp/4pn2/3p4/1bPP4/2N2N2/PP2PPPP/R...,rnbqk2r/ppp2ppp/4pn2/3p2B1/1bPP4/2N2N2/PP2PPPP...,...,8/1q5k/6Q1/5P1K/6PP/8/8/8,7k/1q6/6Q1/5P1K/6PP/8/8/8,7k/1q6/7Q/5P1K/6PP/8/8/8,6k1/1q6/7Q/5P1K/6PP/8/8/8,6k1/1q6/4Q3/5P1K/6PP/8/8/8,7k/1q6/4Q3/5P1K/6PP/8/8/8,7k/1q6/4Q3/5PPK/7P/8/8/8,7k/5q2/4Q3/5PPK/7P/8/8/8,7k/5q2/4Q1P1/5P1K/7P/8/8/8,5q1k/8/4Q1P1/5P1K/7P/8/8/8
1,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR,rnbqkbnr/pppppppp/8/8/3P4/8/PPP1PPPP/RNBQKBNR,rnbqkb1r/pppppppp/5n2/8/3P4/8/PPP1PPPP/RNBQKBNR,rnbqkb1r/pppppppp/5n2/8/2PP4/8/PP2PPPP/RNBQKBNR,rnbqkb1r/pppppp1p/5np1/8/2PP4/8/PP2PPPP/RNBQKBNR,rnbqkb1r/pppppp1p/5np1/8/2PP4/2N5/PP2PPPP/R1BQ...,rnbqkb1r/ppp1pp1p/5np1/3p4/2PP4/2N5/PP2PPPP/R1...,rnbqkb1r/ppp1pp1p/5np1/3P4/3P4/2N5/PP2PPPP/R1B...,rnbqkb1r/ppp1pp1p/6p1/3n4/3P4/2N5/PP2PPPP/R1BQ...,rnbqkb1r/ppp1pp1p/6p1/3n4/3P4/2N1P3/PP3PPP/R1B...,...,8/p2P2Bk/8/1p6/8/8/6RK/4q3,8/p2Pq1Bk/8/1p6/8/8/6RK/8,8/p2Pq1Bk/8/1p6/8/8/3R3K/8,8/p2P2Bk/8/1p6/7q/8/3R3K/8,8/p2P2Bk/8/1p6/7q/8/3R4/6K1,,,,,
2,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR,rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR,rnbqkbnr/pppp1ppp/4p3/8/4P3/8/PPPP1PPP/RNBQKBNR,rnbqkbnr/pppp1ppp/4p3/8/3PP3/8/PPP2PPP/RNBQKBNR,rnbqkbnr/ppp2ppp/4p3/3p4/3PP3/8/PPP2PPP/RNBQKBNR,rnbqkbnr/ppp2ppp/4p3/3p4/3PP3/2N5/PPP2PPP/R1BQ...,rnbqkb1r/ppp2ppp/4pn2/3p4/3PP3/2N5/PPP2PPP/R1B...,rnbqkb1r/ppp2ppp/4pn2/3p2B1/3PP3/2N5/PPP2PPP/R...,rnbqkb1r/ppp2ppp/4pn2/6B1/3Pp3/2N5/PPP2PPP/R2Q...,rnbqkb1r/ppp2ppp/4pn2/6B1/3PN3/8/PPP2PPP/R2QKBNR,...,,,,,,,,,,
3,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR,rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR,rnbqkbnr/pp1ppppp/8/2p5/4P3/8/PPPP1PPP/RNBQKBNR,rnbqkbnr/pp1ppppp/8/2p5/4P3/5N2/PPPP1PPP/RNBQKB1R,rnbqkbnr/pp2pppp/3p4/2p5/4P3/5N2/PPPP1PPP/RNBQ...,rnbqkbnr/pp2pppp/3p4/1Bp5/4P3/5N2/PPPP1PPP/RNB...,r1bqkbnr/pp1npppp/3p4/1Bp5/4P3/5N2/PPPP1PPP/RN...,r1bqkbnr/pp1npppp/3p4/1Bp5/4P3/5N2/PPPP1PPP/RN...,r1bqkbnr/1p1npppp/p2p4/1Bp5/4P3/5N2/PPPP1PPP/R...,r1bqkbnr/1p1npppp/p2p4/2p5/4P3/3B1N2/PPPP1PPP/...,...,,,,,,,,,,
4,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR,rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR,rnbqkbnr/pppp1ppp/8/4p3/4P3/8/PPPP1PPP/RNBQKBNR,rnbqkbnr/pppp1ppp/8/4p3/4P3/5N2/PPPP1PPP/RNBQKB1R,r1bqkbnr/pppp1ppp/2n5/4p3/4P3/5N2/PPPP1PPP/RNB...,r1bqkbnr/pppp1ppp/2n5/4p3/3PP3/5N2/PPP2PPP/RNB...,r1bqkbnr/pppp1ppp/2n5/8/3pP3/5N2/PPP2PPP/RNBQKB1R,r1bqkbnr/pppp1ppp/2n5/8/3NP3/8/PPP2PPP/RNBQKB1R,r1bqk1nr/pppp1ppp/2n5/2b5/3NP3/8/PPP2PPP/RNBQKB1R,r1bqk1nr/pppp1ppp/2n5/2b5/4P3/1N6/PPP2PPP/RNBQ...,...,,,,,,,,,,


In [None]:
'''
len(all_san_list)
#xtea_df = pd.DataFrame(xtea)
all_san_list.pop(252984)
len(all_san_list)
'''

In [None]:
'''
data_with_moves
'''

In [None]:
'''
data_with_moves[473].value_counts()
'''

In [None]:
'''
data_with_moves.loc[data_with_moves[473] == 'Rh2']
'''

#### using build in pgn methods

In [None]:
import chess.pgn
pgn_file = open("GMallboth.pgn")
first_game = chess.pgn.read_game(pgn_file)

# def generate_BStates(game): 
board = chess.Board()
game_1 = []
white_move = []
black_move = []
count = 0

for m in list(first_game.mainline_moves()):
    if count % 2 == 0:
        white_move.append(m)
    else:
        black_move.append(m)
    count += 1

for i in range(len(white_move)):
    print(board.board_fen())
    try:
        board.push(white_move[i])
    except:
        continue
    try:
        board.push(black_move[i])
    except:
        board.push(chess.Move.from_uci('0000'))
        continue
board

In [None]:
board

In [None]:
#board = chess.Board()
#board.push_san("O-O-O")
board.board_fen()
#board

In [None]:
board.push_san('d5')
board

In [None]:
board.push_san('b5')
board

In [None]:
board.legal_moves

In [None]:
pgn_text = open('GMallboth.pgn').read()

In [None]:
# pgn.loads(pgn_text)

In [None]:
import pgn
import sys

f = open(sys.argv[1])
pgn_text = f.read()
f.close()
games = pgn.loads(pgn_text)
for game in games:
    print (game.moves)

In [None]:
import chess.pgn
pgn_file = open("GMallboth.pgn")
first_game = chess.pgn.read_game(pgn_file)
second_game = chess.pgn.read_game(pgn_file)


# Iterate through all moves and play them on a board.
board = first_game.board()
for move in first_game.mainline_moves():
    board.push(move)

In [None]:
import chess.pgn
pgn_file = open("GMallboth.pgn")
first_game = chess.pgn.read_game(pgn_file)
second_game = chess.pgn.read_game(pgn_file)

#for move in first_game.board():
#list(first_game.mainline_moves())
#print(second_game.mainline_moves())
list(first_game.mainline_moves()) 

In [None]:
import chess.pgn
pgn_file = open("GMallboth.pgn")
first_game = chess.pgn.read_game(pgn_file)
#second_game = chess.pgn.read_game(pgn_file)

print(f'{first_game.mainline_moves()}\n\n{second_game.mainline_moves()}')

In [None]:
'''
with open("GMallboth.pgn") as f:
    line = f.readline()
    while line:
        print(line, end="")
        line = f.readline()
'''

In [None]:
board = chess.Board()


In [None]:
['d4 e6', 'c4 d5', 'Nf3 Nf6', 'Nc3 Bb4', 'Bg5 h6', 'Bxf6 Qxf6'] #, e3 O-O 8. Rc1 dxc4 9. Bxc4 c5 10. O-O cxd4 11. Ne4 Qe7 12. exd4 Nc6 13. Qe2 Bd7 14. a3 Bd6 15. Rfd1 Rad8 16. Qe3 Rfe8 17. b4 a6 18. Be2 Nb8 19. Ne5 Ba4 20. Rd2 Bxe5 21. dxe5 Rxd2 22. Qxd2 Rd8 23. Qb2 Bc6 24. Nd6 Bd5 25. f4 Nc6 26. a4 Qc7 27. b5 Qb6+ 28. Kf1 axb5 29. axb5 Ne7 30. Qc3 Rf8 31. Qc5 Qa5 32. Rd1 Qa2 33. g3 f6 34. exf6 Rxf6 35. Qc3 Qa7 36. Ne8 Rf7 37. Bh5 g6 38. Nf6+ Rxf6 39. Qxf6 gxh5 40. Qxe7 Bc4+ 41. Kg2 Bd5+ 42. Kh3 Qa2 43. Rxd5 Qxd5 44. Kh4 Qxb5 45. Qxe6+ Kg7 46. f5 Qc6 47. Qe7+ Kg8 48. Kxh5 b5 49. g4 b4 50. Qxb4 Qc7 51. Qb3+ Kh8 52. Qe6 Qf7+ 53. Qg6 Qc7 54. Qxh6+ Kg8 55. Qe6+ Kh8 56. Qe8+ Kh7 57. h4 Qb7 58. Qg6+ Kh8 59. Qh6+ Kg8 60. Qe6+ Kh8 61. g5 Qf7+ 62. g6 Qf8 63. g7+"


In [None]:
board

In [None]:
len(list(first_game.mainline_moves()))

In [None]:
len(white_move)

In [None]:
df= pd.DataFrame()

In [None]:
import chess.pgn
pgn_file = open("GMallboth.pgn")
first_game = chess.pgn.read_game(pgn_file)

# def generate_BStates(game): 
board = chess.Board()
game_1 = []
white_move = []
black_move = []
count = 0

for m in list(first_game.mainline_moves()):
    if count % 2 == 0:
        white_move.append(m)
    else:
        black_move.append(m)
    count += 1

for i in range(len(white_move)):
    print(board.board_fen())
    board.push(white_move[i])
    try:
        board.push(black_move[i])
    except:
        board.push(chess.Move.from_uci('0000'))
        continue
board


#for i in range(len(list(first_game.mainline_moves()))):
 #   board.push()
  #  boa
#board.push(move.from_uci('d2d4'))
#board.push_san('Nf6')


In [156]:
x = np.array([0,1,2,3,4,5,6,7,8,9,10])

In [162]:
for i in x:
    print(type(i + [x[i-1]]))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
