In [96]:
import pandas as pd
import chess.pgn
import io
import re

def parse_game_data(input_data):
    games = re.split(r'(?=\[Event)', input_data.strip())

    valid_time_controls = {
        "Bullet": ["60+0", "60+1", "120+1"],
        "Blitz": ["180+0", "180+2", "300+0", "300+3"],
        "Rapid": ["600+0", "600+5", "900+10"],
        "Classical": ["1800+0", "1800+20"]
    }

    all_games_data = []

    for game_text in games:
        pgn = io.StringIO(game_text)
        game = chess.pgn.read_game(pgn)

        if not game:
            continue

        headers = game.headers
        time_control = headers.get("TimeControl", "")
        format_type = next((format_name for format_name, controls in valid_time_controls.items() if time_control in controls), None)

        if format_type is None:
            continue

        result = headers.get("Result", "")
        white_elo = headers.get("WhiteElo", "")
        black_elo = headers.get("BlackElo", "")
        termination = headers.get("Termination", "")

        # Split time control to get increment
        base_time, increment = map(int, time_control.split('+'))

        moves_data = []
        board = game.board()
        last_clk = {"white": None, "black": None}

        node = game
        for move in game.mainline_moves():
            fen_before_move = board.fen()

            board.push(move)
            move_comment = node.comment

            current_time = None
            if "%clk" in move_comment:
                current_time = move_comment.split("[%clk ")[1].split("]")[0]

            player = "white" if board.turn == False else "black"
            if last_clk[player] is None or current_time is None:
                think_time = 0
            else:
                try:
                    last_hours, last_minutes, last_seconds = (['0'] + last_clk[player].split(':'))[-3:]
                    current_hours, current_minutes, current_seconds = (['0'] + current_time.split(':'))[-3:]
                    last_secs = int(last_hours) * 3600 + int(last_minutes) * 60 + int(last_seconds)
                    current_secs = int(current_hours) * 3600 + int(current_minutes) * 60 + int(current_seconds)
                    think_time = last_secs - current_secs + increment  # Add the increment to account for bonus time
                except ValueError:
                    think_time = increment

            move_data = {
                "Move": move.uci(),
                "Think-Time": think_time,
                "FEN": fen_before_move
            }
            moves_data.append(move_data)

            last_clk[player] = current_time
            node = node.variation(0)

        all_games_data.append({
            "Format": format_type,
            "Result": result,
            "White Elo": white_elo,
            "Black Elo": black_elo,
            "Termination": termination,
            "Time Control": time_control,
            "Moves Data": moves_data
        })

    return pd.DataFrame(all_games_data)

# Example usage with provided raw data
raw_data = """[Event "Rated Blitz game"]
[Site "https://lichess.org/jQ72oo74"]
[Date "2024.09.01"]
[Round "-"]
[White "talcmecz"]
[Black "uniqueldrich"]
[Result "1-0"]
[UTCDate "2024.09.01"]
[UTCTime "00:01:38"]
[WhiteElo "1665"]
[BlackElo "1661"]
[WhiteRatingDiff "+6"]
[BlackRatingDiff "-6"]
[ECO "B10"]
[Opening "Caro-Kann Defense"]
[TimeControl "180+2"]
[Termination "Normal"]

1. e4 { [%clk 0:03:00] } 1... c6 { [%clk 0:03:00] } 2. Nf3 { [%clk 0:02:58] } 2... d5 { [%clk 0:03:01] } 3. exd5 { [%clk 0:02:59] } 3... cxd5 { [%clk 0:03:03] } 4. d4 { [%clk 0:02:53] } 4... Bg4 { [%clk 0:03:04] } 5. Be2 { [%clk 0:02:54] } 5... e6 { [%clk 0:03:04] } 6. O-O { [%clk 0:02:52] } 6... Bd6 { [%clk 0:03:04] } 7. h3 { [%clk 0:02:52] } 7... Bf5 { [%clk 0:03:05] } 8. Be3 { [%clk 0:02:40] } 8... Nd7 { [%clk 0:03:04] } 9. Nc3 { [%clk 0:02:36] } 9... a6 { [%clk 0:03:04] } 10. a3 { [%clk 0:02:36] } 10... Rc8 { [%clk 0:03:05] } 11. Rc1 { [%clk 0:02:35] } 11... Ngf6 { [%clk 0:03:00] } 12. Re1 { [%clk 0:02:24] } 12... Bb8 { [%clk 0:02:55] } 13. Bg5 { [%clk 0:02:19] } 13... Kf8 { [%clk 0:02:55] } 14. Bd3 { [%clk 0:02:15] } 14... Bg6 { [%clk 0:02:54] } 15. Ne5 { [%clk 0:02:08] } 15... Qc7 { [%clk 0:02:27] } 16. Nxg6+ { [%clk 0:01:47] } 16... hxg6 { [%clk 0:02:29] } 17. Ne2 { [%clk 0:01:04] } 17... Ng4 { [%clk 0:00:12] } 18. Bf4 { [%clk 0:01:00] } 18... e5 { [%clk 0:00:07] } 19. dxe5 { [%clk 0:01:00] } 19... Ndxe5 { [%clk 0:00:08] } 20. hxg4 { [%clk 0:00:51] } 20... g5 { [%clk 0:00:08] } 21. Bg3 { [%clk 0:00:48] } 21... Qe7 { [%clk 0:00:05] } 22. Nd4 { [%clk 0:00:42] } 22... Qf6 { [%clk 0:00:05] } 23. Bf5 { [%clk 0:00:35] } 23... Re8 { [%clk 0:00:06] } 24. Qe2 { [%clk 0:00:24] } 24... g6 { [%clk 0:00:06] } 25. Bd7 { [%clk 0:00:16] } 25... Re7 { [%clk 0:00:06] } 26. Bxe5 { [%clk 0:00:09] } 26... Kg7 { [%clk 0:00:06] } 27. Bxf6+ { [%clk 0:00:10] } 27... Kxf6 { [%clk 0:00:08] } 28. Qxe7+ { [%clk 0:00:10] } 28... Kg7 { [%clk 0:00:07] } 29. Ba4 { [%clk 0:00:10] } 29... Bh2+ { [%clk 0:00:07] } 30. Kf1 { [%clk 0:00:11] } 30... Bf4 { [%clk 0:00:08] } 31. g3 { [%clk 0:00:11] } 31... Rh1+ { [%clk 0:00:09] } 32. Ke2 { [%clk 0:00:11] } 32... Bc7 { [%clk 0:00:10] } 33. c3 { [%clk 0:00:12] } 33... Rxe1+ { [%clk 0:00:08] } 34. Rxe1 { [%clk 0:00:13] } 34... Bb6 { [%clk 0:00:09] } 35. Kd3 { [%clk 0:00:13] } 35... Bxd4 { [%clk 0:00:10] } 36. Qxb7 { [%clk 0:00:13] } 36... Bxf2 { [%clk 0:00:10] } 37. Re7 { [%clk 0:00:14] } 37... Bxg3 { [%clk 0:00:10] } 38. Rxf7+ { [%clk 0:00:14] } 38... Kh6 { [%clk 0:00:11] } 39. Rh7# { [%clk 0:00:13] } 1-0

[Event "Rated Blitz game"]
[Site "https://lichess.org/o3SnhWuW"]
[Date "2024.09.01"]
[Round "-"]
[White "nkm14"]
[Black "asoch15"]
[Result "1-0"]
[UTCDate "2024.09.01"]
[UTCTime "00:01:38"]
[WhiteElo "1657"]
[BlackElo "1654"]
[WhiteRatingDiff "+6"]
[BlackRatingDiff "-6"]
[ECO "C29"]
[Opening "Vienna Game: Vienna Gambit"]
[TimeControl "180+2"]
[Termination "Normal"]

1. e4 { [%clk 0:03:00] } 1... e5 { [%clk 0:03:00] } 2. Nc3 { [%clk 0:03:01] } 2... Nf6 { [%clk 0:03:01] } 3. f4 { [%clk 0:03:02] } 3... exf4 { [%clk 0:03:00] } 4. e5 { [%clk 0:03:04] } 4... Qe7 { [%clk 0:02:57] } 5. Qe2 { [%clk 0:03:04] } 5... Ng8 { [%clk 0:02:55] } 6. Nf3 { [%clk 0:03:04] } 6... d6 { [%clk 0:02:53] } 7. Nd5 { [%clk 0:03:05] } 7... Qd7 { [%clk 0:02:36] } 8. exd6+ { [%clk 0:03:03] } 8... Kd8 { [%clk 0:02:34] } 9. dxc7+ { [%clk 0:03:03] } 1-0"""

df_games = parse_game_data(raw_data)
print(df_games.head())

  Format Result White Elo Black Elo Termination Time Control  \
0  Blitz    1-0      1665      1661      Normal        180+2   
1  Blitz    1-0      1657      1654      Normal        180+2   

                                          Moves Data  
0  [{'Move': 'e2e4', 'Think-Time': 0, 'FEN': 'rnb...  
1  [{'Move': 'e2e4', 'Think-Time': 0, 'FEN': 'rnb...  


In [97]:
first_game_moves = df_games.loc[0, "Moves Data"]
print(first_game_moves)

[{'Move': 'e2e4', 'Think-Time': 0, 'FEN': 'rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1'}, {'Move': 'c7c6', 'Think-Time': 0, 'FEN': 'rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR b KQkq - 0 1'}, {'Move': 'g1f3', 'Think-Time': 0, 'FEN': 'rnbqkbnr/pp1ppppp/2p5/8/4P3/8/PPPP1PPP/RNBQKBNR w KQkq - 0 2'}, {'Move': 'd7d5', 'Think-Time': 4, 'FEN': 'rnbqkbnr/pp1ppppp/2p5/8/4P3/5N2/PPPP1PPP/RNBQKB1R b KQkq - 1 2'}, {'Move': 'e4d5', 'Think-Time': 1, 'FEN': 'rnbqkbnr/pp2pppp/2p5/3p4/4P3/5N2/PPPP1PPP/RNBQKB1R w KQkq - 0 3'}, {'Move': 'c6d5', 'Think-Time': 1, 'FEN': 'rnbqkbnr/pp2pppp/2p5/3P4/8/5N2/PPPP1PPP/RNBQKB1R b KQkq - 0 3'}, {'Move': 'd2d4', 'Think-Time': 0, 'FEN': 'rnbqkbnr/pp2pppp/8/3p4/8/5N2/PPPP1PPP/RNBQKB1R w KQkq - 0 4'}, {'Move': 'c8g4', 'Think-Time': 8, 'FEN': 'rnbqkbnr/pp2pppp/8/3p4/3P4/5N2/PPP2PPP/RNBQKB1R b KQkq - 0 4'}, {'Move': 'f1e2', 'Think-Time': 1, 'FEN': 'rn1qkbnr/pp2pppp/8/3p4/3P2b1/5N2/PPP2PPP/RNBQKB1R w KQkq - 1 5'}, {'Move': 'e7e6', 'Think-Time': 1, 'FEN': 'rn