Skip to content

Commit

Permalink
Added functionality to read the csv files into pandas dataframes from…
Browse files Browse the repository at this point in the history
… the result object.
  • Loading branch information
zq99 committed Jul 1, 2022
1 parent fbd5be2 commit cf717de
Show file tree
Hide file tree
Showing 19 changed files with 49,860 additions and 19,280 deletions.
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,25 @@ If you want to check if the files have been created before doing further process
else:
print("Files not created!")

The result object also provides methods to import the created files into pandas dataframes:

pgn_data = PGNData("tal_bronstein_1982.pgn")
result = pgn_data.export()
if result.is_complete:
# read the games file
games_df = result.get_games_df()
# read the moves file
moves_df = result.get_moves_df()
# read both files joined together
combined_df = result.get_combined_df()
print(games_df.head())
print(moves_df.head())
print(combined_df.head())


## Examples

Expand Down Expand Up @@ -127,3 +146,8 @@ This is a full list of the columns in each output file:
| fen_row{number}_{colour)_count | Number of pieces for the specified colour on this row of the board |
| fen_row{number}_{colour}_value | Total value of pieces for the specified colour on this row of the board |
| move_sequence | Sequence of moves upto current position |


## Acknowledgements

This project makes use of the [python-chess](https://github.com/niklasf/python-chess) library.
47 changes: 47 additions & 0 deletions converter/result.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
import logging
import pandas as pd
import os

log = logging.getLogger("pgn2data - process")
logging.basicConfig(level=logging.INFO)


class Result:
"""
results of the extract are tracked here
Expand All @@ -21,6 +29,45 @@ def print_summary(self):
print("games file: {} | size: {}".format(self.games_file.name, self.games_file.size))
print("moves file: {} | size: {}".format(self.moves_file.name, self.moves_file.size))

def get_games_df(self):
return self.__get_as_dataframe(self.games_file.name)

def get_moves_df(self):
return self.__get_as_dataframe(self.moves_file.name)

def get_combined_df(self):
games_df = self.get_games_df()
moves_df = self.get_moves_df()
if (games_df is not None) and (moves_df is not None):
if (not games_df.empty) and (not moves_df.empty):
combined_df = pd.merge(games_df, moves_df, on='game_id')
return combined_df
else:
log.error("one or both files is empty")
else:
log.error("one or both files not found")
return None

def create_combined_file(self, filename):
combined_df = self.get_combined_df()
if combined_df is not None:
combined_df.to_csv(filename, index=False)
return os.path.exists(filename)
else:
log.error("could not combine games and moves file")
return False

def __get_as_dataframe(self, file):
if self.is_complete:
return pd.read_csv(file)
else:
self.__display_not_found(file)
return None

@staticmethod
def __display_not_found(file):
log.error("File not found: {}".format(file))


class ResultFile:
def __init__(self, name, size):
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
chess
chess
pandas
16 changes: 10 additions & 6 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,22 @@
'Development Status :: 5 - Production/Stable',
'Intended Audience :: Education',
'Operating System :: Microsoft :: Windows :: Windows 10',
'License :: OSI Approved :: GNU Lesser General Public License v2 (LGPLv2)',
'Programming Language :: Python :: 3'
'License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)',
'Programming Language :: Python :: 3 :: Only',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
]

setup(
name='pgn2data',
version='0.0.5',
version='0.0.6',
packages=['converter', 'common', 'testing'],
url='',
classifiers=classifiers,
license='GPL2',
author='Zaid Qureshi',
license='GPL-3.0+',
author='zq99',
author_email='zq99@hotmail.com',
keywords=['CHESS', 'PGN', 'NOTATION', 'DATA', 'FORSYTH–EDWARDS NOTATION', 'CSV', 'DATASET', 'DATABASE',
'NORMALIZATION', 'TABULATION', 'STRUCTURED DATA'],
Expand All @@ -24,5 +28,5 @@
],
long_description=open('README.md').read(),
long_description_content_type='text/markdown; charset=UTF-8; variant=GFM',
description='Transforms a chess pgn file into a csv dataset containing game information and move information',
description='Converts a chess pgn file into a csv dataset containing game information and move information',
)
2 changes: 1 addition & 1 deletion testing/exports/basic_format_game_info.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
game_id,game_order,event,site,date_played,round,white,black,result,white_elo,white_rating_diff,black_elo,black_rating_diff,white_title,black_title,winner,winner_elo,loser,loser_elo,winner_loser_elo_diff,eco,termination,time_control,utc_date,utc_time,variant,ply_count,date_created,file_name
a5ab6da8-88d4-4552-acf3-45d7388a845a,1,F/S Return Match,"Belgrade, Serbia JUG",1992.11.04,29,"Fischer, Robert J.","Spassky, Boris V.",1/2-1/2,,,,,,,draw,,draw,,,,,,,,,,2022-06-24T20:39:21+0000,basic_format.pgn
5580acfe-cf25-4067-bbd3-903ea22186f6,1,F/S Return Match,"Belgrade, Serbia JUG",1992.11.04,29,"Fischer, Robert J.","Spassky, Boris V.",1/2-1/2,,,,,,,draw,,draw,,,,,,,,,,2022-07-01T21:48:35+0000,basic_format.pgn

0 comments on commit cf717de

Please sign in to comment.