In [10]:
import chess.pgn
import chess.engine
import pandas as pd
import os
import re

In [3]:
#read in "2025 FIDE World Cup" from folder
# Open the PGN file
pgn_file = open("2025-fide-world-cup.pgn", encoding="utf-8")

game_count = 0
for game in iter(lambda: chess.pgn.read_game(pgn_file), None):
    game_count += 1
    print(f"--- Game {game_count} ---")
    print(f"Event: {game.headers['Event']}")
    print(f"Result: {game.headers['Result']}")

# Close the file when finished
pgn_file.close()

--- Game 1 ---
Event: FIDE World Cup 2025
Result: 0-1
--- Game 2 ---
Event: FIDE World Cup 2025
Result: 1-0
--- Game 3 ---
Event: FIDE World Cup 2025
Result: 1-0
--- Game 4 ---
Event: FIDE World Cup 2025
Result: 0-1
--- Game 5 ---
Event: FIDE World Cup 2025
Result: 1-0
--- Game 6 ---
Event: FIDE World Cup 2025
Result: 0-1
--- Game 7 ---
Event: FIDE World Cup 2025
Result: 1-0
--- Game 8 ---
Event: FIDE World Cup 2025
Result: 1-0
--- Game 9 ---
Event: FIDE World Cup 2025
Result: 0-1
--- Game 10 ---
Event: FIDE World Cup 2025
Result: 1-0
--- Game 11 ---
Event: FIDE World Cup 2025
Result: 1-0
--- Game 12 ---
Event: FIDE World Cup 2025
Result: 0-1
--- Game 13 ---
Event: FIDE World Cup 2025
Result: 1-0
--- Game 14 ---
Event: FIDE World Cup 2025
Result: 1-0
--- Game 15 ---
Event: FIDE World Cup 2025
Result: 0-1
--- Game 16 ---
Event: FIDE World Cup 2025
Result: 1/2-1/2
--- Game 17 ---
Event: FIDE World Cup 2025
Result: 1-0
--- Game 18 ---
Event: FIDE World Cup 2025
Result: 0-1
--- Game 19 ---

In [11]:
# --- CONFIGURATION ---
PGN_FILE = "2025-fide-world-cup.pgn"
THRESHOLD_PERCENT = 0.08  # 8% of remaining time

# Default time control for games without a TimeControl tag
DEFAULT_INITIAL_TIME_SEC = 5400  # 90 minutes
DEFAULT_INCREMENT_SEC = 30     # 30 seconds

# Function to parse TimeControl string (e.g., "600+10" or "-")
def parse_time_control(tc_str):
    """Extracts initial time and increment from the TimeControl tag."""
    if not tc_str or tc_str == "-":
        return DEFAULT_INITIAL_TIME_SEC, DEFAULT_INCREMENT_SEC
        
    match = re.match(r"(\d+)\+(\d+)", tc_str)
    if match:
        initial_time = int(match.group(1))
        increment = int(match.group(2))
        return initial_time, increment
    
    # Fallback for unexpected formats
    return DEFAULT_INITIAL_TIME_SEC, DEFAULT_INCREMENT_SEC

def analyze_tournament(pgn_file, threshold_percent):
    
    results_tracker = {
        "Prep Retainer Wins": 0,
        "Prep Retainer Losses": 0,
        "Prep Retainer Draws": 0,
        "Games Skipped (Tie/No Data)": 0
    }
    
    with open(pgn_file, encoding="utf-8") as pgn:
        game_count = 0
        while True:
            game = chess.pgn.read_game(pgn)
            if game is None:
                break
            game_count += 1

            # --- 1. GET TIME CONTROL ---
            tc_str = game.headers.get("TimeControl", "-")
            initial_time, increment = parse_time_control(tc_str)
            
            # --- 2. INITIALIZE CLOCK TRACKERS ---
            prev_clock = {
                chess.WHITE: initial_time,  # Initial time is the 'previous remaining' for move 1
                chess.BLACK: initial_time
            }

            break_move = {
                chess.WHITE: None,
                chess.BLACK: None
            }
            
            # --- 3. ITERATE MOVES AND CALCULATE TIME SPENT ---
            for node in game.mainline():
                player_to_move = node.board().turn
                
                # If a player has already "broken" prep, we still need to update
                # the clock for the opponent's next move.
                
                current_remaining_time = node.clock()
                
                if current_remaining_time is None:
                    # Clock data is missing for this move, cannot proceed with time calc
                    continue

                prev_clock_time = prev_clock[player_to_move]
                
                # Time Available = Time from previous move + Increment
                time_available = prev_clock_time + increment
                
                # Time Spent = Time Available - Time Remaining after the move
                time_spent_sec = time_available - current_remaining_time
                
                # Check for negative time spent (a common issue with PGNs, often due to 
                # clock adjustment, but we skip it here as it's not a 'break')
                if time_spent_sec < 0:
                     time_spent_sec = 0
                
                # Check for "break" only if the player hasn't broken yet
                if break_move[player_to_move] is None:
                    
                    # --- APPLY THRESHOLD LOGIC ---
                    # Check if time spent exceeds 8% of the time they had *before* this move.
                    if time_spent_sec > prev_clock_time * threshold_percent:
                        break_move[player_to_move] = node.board().fullmove_number
                
                # Update the previous remaining clock for the *next* move
                prev_clock[player_to_move] = current_remaining_time


            # --- 4. DETERMINE THE PREP-RETAINER AND TALLY RESULTS ---
            white_break = break_move[chess.WHITE] if break_move[chess.WHITE] is not None else float('inf')
            black_break = break_move[chess.BLACK] if break_move[chess.BLACK] is not None else float('inf')

            if white_break < black_break:
                # White broke first. Black is the Prep Retainer.
                retainer_color = chess.BLACK
            elif black_break < white_break:
                # Black broke first. White is the Prep Retainer.
                retainer_color = chess.WHITE
            else:
                # Tie/Neither broke
                results_tracker["Games Skipped (Tie/No Data)"] += 1
                continue 

            # Tally results
            result_str = game.headers.get("Result")
            
            is_white_retainer = (retainer_color == chess.WHITE)
            
            if result_str == "1-0": # White wins
                if is_white_retainer:
                    results_tracker["Prep Retainer Wins"] += 1
                else:
                    results_tracker["Prep Retainer Losses"] += 1
            elif result_str == "0-1": # Black wins
                if not is_white_retainer:
                    results_tracker["Prep Retainer Wins"] += 1
                else:
                    results_tracker["Prep Retainer Losses"] += 1
            elif result_str == "1/2-1/2": # Draw
                results_tracker["Prep Retainer Draws"] += 1

    return results_tracker, game_count

# --- MAIN EXECUTION ---
if __name__ == "__main__":
    
    results, total_games = analyze_tournament(PGN_FILE, THRESHOLD_PERCENT)

    # 2. Display the results
    
    print(f"## üèÜ Prep-Retainer Analysis (Threshold: {THRESHOLD_PERCENT*100:.0f}% of Remaining Time)")
    print(f"Assumed Default Time Control: {DEFAULT_INITIAL_TIME_SEC}s + {DEFAULT_INCREMENT_SEC}s")
    print("-" * 40)
    print(f"Total Games Processed: {total_games}")
    
    total_retained_games = sum(v for k, v in results.items() if not k.startswith("Games Skipped"))
    
    print(f"Total Games with a clear 'Prep Retainer' winner: {total_retained_games}")
    
    # Calculate win rate
    if total_retained_games > 0:
        win_rate = results["Prep Retainer Wins"] / total_retained_games
    else:
        win_rate = 0
        
    print(f"Prep Retainer (P-R) Record:")
    print(f"  * Wins (as P-R):   {results['Prep Retainer Wins']}")
    print(f"  * Draws (as P-R):  {results['Prep Retainer Draws']}")
    print(f"  * Losses (as P-R): {results['Prep Retainer Losses']}")
    print(f"  * Skipped Games:   {results['Games Skipped (Tie/No Data)']}")
    print("-" * 40)
    print(f"**P-R Win Rate (of decided games): {win_rate:.2f}**")

## üèÜ Prep-Retainer Analysis (Threshold: 8% of Remaining Time)
Assumed Default Time Control: 5400s + 30s
----------------------------------------
Total Games Processed: 717
Total Games with a clear 'Prep Retainer' winner: 588
Prep Retainer (P-R) Record:
  * Wins (as P-R):   121
  * Draws (as P-R):  282
  * Losses (as P-R): 185
  * Skipped Games:   129
----------------------------------------
**P-R Win Rate (of decided games): 0.21**
