In [1]:
import logging

logging.basicConfig(format="%(asctime)s %(levelname)s:%(message)s", level=logging.INFO, datefmt="%I:%M:%S")

In [2]:
from os import environ
from pathlib import Path

import chess.pgn

import blunder._internal.pipeline as pl

In [3]:
logger = logging.getLogger(__name__)
logger.propagate = True

In [4]:
engine_path = environ.get("STOCKFISH_PATH", "")
engine_config = pl.EngineConfig(executable_path=Path(engine_path))
engine_config.config_hash_mb = 256
engine_config.config_threads = 2
engine_config.limit_depth = 14

In [5]:
database_url = environ.get("DATABASE_URL")
chess_db_url = database_url + "/chess" if database_url else None

In [6]:
pgn_path = Path("/home/vandy/Work/MATH6310/blunder-analysis/data/raw/lichess_db_standard_rated_2025-08.pgn")
index_path = Path("/home/vandy/Work/MATH6310/blunder-analysis/data/raw/lichess_db_standard_rated_2025-08.idx")
max_games = 5000
config = pl.ProcessingConfig(pgn_path=pgn_path, index_path=index_path, max_games=max_games, engine=engine_config)

In [7]:
offsets = pl.ensure_index(config)
offsets = pl.game_offsets(offsets, max_games=config.max_games)
len(offsets)

05:21:22 INFO:Loading offsets from /home/vandy/Work/MATH6310/blunder-analysis/data/raw/lichess_db_standard_rated_2025-08.idx


5000

In [8]:
checkpoint: pl.OffsetCheckpoint | None = None
if config.resume_from_checkpoint:
    checkpoint_path = config.checkpoint_path or config.pgn_path.with_suffix(".checkpoint.json")
    checkpoint = pl.OffsetCheckpoint.load(checkpoint_path)
    offsets = checkpoint.filter_offsets(offsets)

In [9]:
config

ProcessingConfig(pgn_path=PosixPath('/home/vandy/Work/MATH6310/blunder-analysis/data/raw/lichess_db_standard_rated_2025-08.pgn'), index_path=PosixPath('/home/vandy/Work/MATH6310/blunder-analysis/data/raw/lichess_db_standard_rated_2025-08.idx'), workers=15, batch_size=64, max_games=5000, show_progress=True, chunk_size=8192, engine=EngineConfig(executable_path=PosixPath('/nix/store/pj7zp00nacxj6b1s74wal1ymgm3izhnp-stockfish-17.1/bin/stockfish'), config_hash_mb=256, config_threads=2, config_multipv=1, config_ponder=False, config_show_wdl=True, limit_depth=14, limit_info=<Info.SCORE: 2>), database_url='sqlite:///analysis.db', checkpoint_path=None, resume_from_checkpoint=True)

In [None]:
from collections.abc import Sequence


def enrich_game(
    pgn_path: Path,
    batch_offsets: Sequence[int],
) -> pl.GameRecord:
    """Parse and enrich a single game from PGN file at given offset."""
    games: list[str] = []

    curr_offsets = batch_offsets
    min_offset = curr_offsets[0]
    max_offset = curr_offsets[-1]

    with open(config.pgn_path, encoding="utf-8") as handle:
        handle.seek(min_offset)
        while handle.tell() <= max_offset:
            offset = handle.tell()
            curr_game = chess.pgn.read_game(handle)
            if not curr_game:
                logger.debug(f"Reached EOF while reading game {offset}")
                break
            if curr_game.errors:
                logger.error(f"Errors parsing game at offset {offset}: {curr_game.errors}")
                continue
            print(curr_game.headers)

In [None]:
chess.pgn.TimeControl

In [None]:
for batch_offsets in pl.chunk_offsets(offsets, config.batch_size):
    curr_offsets = batch_offsets.tolist()
    min_offset = curr_offsets[0]
    max_offset = curr_offsets[-1]
    with open(config.pgn_path, encoding="utf-8") as handle:
        handle.seek(min_offset)
        while handle.tell() <= max_offset:
            curr_game = chess.pgn.read_game(handle)
            if not curr_game or curr_game.errors:
                print(f"Error in curr game. {curr_game or curr_game.errors}")
            print(curr_game.headers)


Headers(Event='Rated Blitz game', Site='https://lichess.org/GBNcycCw', Date='2025.08.01', Round='-', White='JessieLM', Black='Trip_Team2022', Result='0-1', UTCDate='2025.08.01', UTCTime='00:00:23', WhiteElo='2253', BlackElo='2297', WhiteRatingDiff='-5', BlackRatingDiff='+7', ECO='A14', Opening='Réti Opening: Anglo-Slav Variation, Bogoljubow Variation, Stonewall Line', TimeControl='180+2', Termination='Normal')
Headers(Event='Rated Blitz game', Site='https://lichess.org/tT5omaaN', Date='2025.08.01', Round='-', White='Haytroy', Black='ABS1983', Result='1-0', UTCDate='2025.08.01', UTCTime='00:00:23', WhiteElo='1111', BlackElo='1097', WhiteRatingDiff='+5', BlackRatingDiff='-6', ECO='C20', Opening="King's Pawn Game: Wayward Queen Attack", TimeControl='180+2', Termination='Time forfeit')
Headers(Event='Rated Blitz game', Site='https://lichess.org/RNqZylfV', Date='2025.08.01', Round='-', White='Xafir', Black='Fil-z-Lip', Result='0-1', UTCDate='2025.08.01', UTCTime='00:00:23', WhiteElo='1468',