In [1]:
from pathlib import Path
from typing import Dict, Iterator, Optional, Any
from enum import Enum
import sys, asyncio
import chess
import chess.pgn
import chess.engine

In [7]:
if sys.platform.startswith("win"):
    stockfish_executable_path = Path("./stockfish/stockfish-windows-x86-64-avx2.exe")
    print(f"Using Stockfish executable: {stockfish_executable_path}")

if sys.platform.startswith("darwin"):
    stockfish_executable_path = Path("./stockfish/stockfish-macos-m1-apple-silicon")
    print(f"Using Stockfish executable: {stockfish_executable_path}")

Using Stockfish executable: stockfish\stockfish-windows-x86-64-avx2.exe


In [8]:
def iter_games(pgn_path: Path) -> Iterator[chess.pgn.Game]:
    """Yield games one by one from a PGN file"""
    
    if pgn_path.suffix.lower() != ".pgn":
        raise ValueError(f"Expected a .pgn file, got: {pgn_path.suffix}")
    
    with open(pgn_path, "r", encoding="utf-8", errors="replace") as f:
        while True:
            game = chess.pgn.read_game(f)
            if game is None:
                break
            yield game

In [9]:
"""
Why this cell exists:
- python-chess launches Stockfish via asyncio.subprocess_exec.
- On Windows, the Selector event loop cannot create subprocesses, it raises NotImplementedError.
- Some Jupyter kernels on Windows start with the Selector policy by default.
- Switching to WindowsProactorEventLoopPolicy enables subprocess support in this notebook.

How to use:
- Run this cell once before creating the engine.
- On macOS or Linux this does nothing and is safe.
"""
if sys.platform.startswith("win"):
    print(f"Initial Policy: {type(asyncio.get_event_loop_policy()).__name__}")
    asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
    print(f"New Policy: {type(asyncio.get_event_loop_policy()).__name__}")


Initial Policy: WindowsSelectorEventLoopPolicy
New Policy: WindowsProactorEventLoopPolicy


In [10]:
class PositionLabel(Enum):
    WHITE_WINNING = 0
    WHITE_DECISIVE = 1
    WHITE_BETTER = 2
    EQUAL = 3
    BLACK_BETTER = 4
    BLACK_DECISIVE = 5
    BLACK_WINNING = 6

In [None]:
def get_game_result(game: chess.pgn.Game) -> float | None:
    """
    Parses the PGN header result into a float.
    Returns None if the game is unfinished or unknown ('*').
    """
    res = game.headers.get("Result", "*")
    if res == "1-0":
        return 1.0
    elif res == "0-1":
        return 0.0
    elif res == "1/2-1/2":
        return 0.5
    return None

In [19]:
def get_tapered_phase_score(board: chess.Board) -> float:
    """
    Calculates the game phase based on Non-Pawn Material (NPM).
    Returns a phase factor 'p' where:
    - 1.0 represents the Start of the game (Opening/Middlegame).
    - 0.0 represents a completely empty board (Pure Endgame).
    
    Credits: Stockfish
    """
    
    phase = 0
    MAX_PHASE = 24
    phase_weights = {
        chess.KNIGHT: 1,
        chess.BISHOP: 1,
        chess.ROOK: 2,
        chess.QUEEN: 4
    }

    for piece_type, weight in phase_weights.items():
        count = len(board.pieces(piece_type, chess.WHITE)) + \
                len(board.pieces(piece_type, chess.BLACK))
        phase += count * weight
    
    # Clamp phase to ensure it never exceeds bounds (e.g. unexpected promotions)
    phase = min(phase, MAX_PHASE)
    
    # Normalize (0.0 to 1.0)
    return phase / MAX_PHASE

In [17]:
def process_game(game: chess.pgn.Game):
    """
    Iterates through a single game and yields a dictionary for every position.
    Skip the game if result is unknown.
    """
    result = get_game_result(game)
    if result is None:
        return  
    
    board = game.board()
    
    for move in game.mainline_moves():
        try:
            board.push(move)
            fen = board.fen()
            phase = get_tapered_phase_score(board)
            is_check = board.is_check()
            
            yield {
                "fen": fen,
                "game_result": result,
                "game_phase": phase,
                "is_check": is_check
            }
            
        except ValueError:
            continue # Skip illegal moves if any

In [28]:
"""
Test functions
"""

chess_games_folder = Path("./chess_games_sample") 
pgn_files = list(chess_games_folder.glob("*.pgn"))

first_pgn = pgn_files[0]
print(f"Testing on: {first_pgn}")
    
game_gen = iter_games(first_pgn)
first_game = next(game_gen)
    
for i, data in enumerate(process_game(first_game)):
    print(data)
    if i >= 2: break


Testing on: chess_games_sample\Lipke Paul (1870-1955) 41 Games.PGN
{'fen': 'rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR b KQkq - 0 1', 'game_result': 1.0, 'game_phase': 1.0, 'is_check': False}
{'fen': 'rnbqkbnr/pppp1ppp/8/4p3/4P3/8/PPPP1PPP/RNBQKBNR w KQkq - 0 2', 'game_result': 1.0, 'game_phase': 1.0, 'is_check': False}
{'fen': 'rnbqkbnr/pppp1ppp/8/4p3/4P3/2N5/PPPP1PPP/R1BQKBNR b KQkq - 1 2', 'game_result': 1.0, 'game_phase': 1.0, 'is_check': False}
