In [None]:
# !wget https://github.com/official-stockfish/Stockfish/releases/latest/download/stockfish-ubuntu-x86-64-avx2.tar
# !tar -xf ./stockfish/stockfish-ubuntu-x86-64-avx2.tar
# !chmod +x ./stockfish/stockfish-ubuntu-x86-64-avx2

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1,2"

In [2]:
import os
import re
import chess
import chess.engine
import random
import torch
from datasets import Dataset
from unsloth import FastLanguageModel, PatchDPOTrainer, is_bfloat16_supported
from trl import GRPOConfig, GRPOTrainer, SFTTrainer, SFTConfig
import pandas as pd

# ----------------------------------------------------
# 1. USER PROVIDED LOGIC (FEN Parsing & Prompting)
# ----------------------------------------------------

def parse_fen_and_generate_prompt(fen: str):
    """
    Parse a FEN string, generate a labeled 2D chess board representation,
    extract castling rights, en passant target, side to move, and return
    a clean prompt.
    """
    # 1. Split FEN fields
    parts = fen.split()
    board_part = parts[0]
    side_to_move = "white" if parts[1] == "w" else "black"
    castling_rights = parts[2]
    en_passant = parts[3] if parts[3] != "-" else "none"

    # 2. Piece counters for stable naming
    white_counters = {k: 1 for k in ['K','Q','R','B','N','P']}
    black_counters = {k: 1 for k in ['k','q','r','b','n','p']}

    def get_label(piece):
        if piece.isupper():
            color = "W"
            counters = white_counters
        else:
            color = "B"
            counters = black_counters
        base = piece.upper()
        label = f"{color}{base}{counters[piece]}"
        counters[piece] += 1
        return label

    # 3. Build 2D expanded board
    rows = board_part.split('/')
    board_2d = []
    for row in rows:
        expanded = []
        for ch in row:
            if ch.isdigit():
                expanded.extend(["--"] * int(ch))
            else:
                expanded.append(get_label(ch))
        board_2d.append(expanded)

    board_str = "\n".join(" ".join(r) for r in board_2d)

    # 4. Castling rights formatting
    def castling_info(color, rights):
        if rights in ["-", ""]:
            return f"{color} has no castling rights"
        mapping = {
            "K": "white king-side", "Q": "white queen-side",
            "k": "black king-side", "q": "black queen-side"
        }
        available = [mapping[c] for c in rights if c in mapping]
        if not available:
            return f"{color} has no castling rights"
        return f"{color} has castling rights: {', '.join(available)}"

    white_castling = castling_info("White", castling_rights)
    black_castling = castling_info("Black", castling_rights)

    # 5. Construct final LLM prompt
    final_prompt = f"""
        You are a strong chess reasoning model.

        Below is the fully parsed board state from the given FEN.

        FEN:
        {fen}

        2D Chess Board Representation:
        {board_str}

        Extracted Information:
        â€¢ Side to move: **{side_to_move}**
        â€¢ {white_castling}
        â€¢ {black_castling}
        â€¢ En passant target: {en_passant}

        INSTRUCTIONS

        1. Think deeply about the position and write your reasoning
        strictly inside:

        <reason>
        ... your chain-of-thought reasoning comes here ...
        </reason>

        2. Then provide ONLY your best legal move for this position
        (in UCI notation) inside:

        <answer>
        ... final best move here ...
        </answer>

        IMPORTANT: The <answer> tag MUST appear at the **very end** of your output.
        """.strip()
    return final_prompt

# ----------------------------------------------------
# 2. USER PROVIDED LOGIC (Stockfish & Dataset Gen)
# ----------------------------------------------------

def generate_random_legal_fen(min_moves: int = 5, max_moves: int = 40):
    board = chess.Board()
    for _ in range(random.randint(min_moves, max_moves)):
        moves = list(board.legal_moves)
        if not moves:
            break
        board.push(random.choice(moves))
    return board.fen()

def generate_dataset_for_grpo(num_samples: int, min_moves: int = 5, max_moves: int = 40):
    """
    Generates a list of FENs. For GRPO, we don't necessarily need the pre-computed
    best move in the dataset, as we calculate rewards dynamically during training.
    """
    dataset_rows = []
    for _ in range(num_samples):
        fen = generate_random_legal_fen(min_moves=min_moves, max_moves=max_moves)
        prompt = parse_fen_and_generate_prompt(fen)
        dataset_rows.append({
            "prompt": prompt,
            "fen": fen # Storing FEN to use in reward function easily
        })
    return Dataset.from_list(dataset_rows)

def stockfish_top_moves(fen: str, engine_path: str, depth: int = 15, n_best: int = 5):
    """Returns top-n Stockfish moves (sorted best â†’ worst) with scores."""
    board = chess.Board(fen)
    engine = chess.engine.SimpleEngine.popen_uci(engine_path)
    try:
        info = engine.analyse(board, chess.engine.Limit(depth=depth), multipv=n_best)
    finally:
        engine.quit()

    moves = []
    for entry in info:
        if "pv" in entry:
            move = entry["pv"][0]
            # Handle mate scores
            score_obj = entry["score"].white()
            if score_obj.is_mate():
                 score = 100000 - score_obj.mate() if score_obj.mate() > 0 else -100000
            else:
                score = score_obj.score()
            moves.append((move.uci(), score))

    moves.sort(key=lambda x: x[1], reverse=True)
    return moves


def grpo_reward_logic(fen: str, proposed_move: str, engine_path: str, depth: int = 15):
    """
    Computes GRPO-style reward based on Stockfish.
    """
    board = chess.Board(fen)

    # 1. Game Over Check
    if board.is_game_over():
        return 0.0

    # 2. Parsing Check
    try:
        move_obj = chess.Move.from_uci(proposed_move)
    except:
        return -1.0 # Invalid notation

    # 3. Legality Check
    if move_obj not in board.legal_moves:
        return -1.0 # Illegal move

    # 4. Stockfish Comparison
    top_moves = stockfish_top_moves(fen, engine_path, depth, n_best=5)
    top_move_list = [m[0] for m in top_moves]

    if not top_move_list: return 0.0

    if proposed_move == top_move_list[0]:
        return 1.0
    elif len(top_move_list) > 1 and proposed_move == top_move_list[1]:
        return 0.8
    elif len(top_move_list) > 2 and proposed_move == top_move_list[2]:
        return 0.7
    elif len(top_move_list) > 3 and proposed_move == top_move_list[3]:
        return 0.6
    elif len(top_move_list) > 4 and proposed_move == top_move_list[4]:
        return 0.5

    return -0.5 # Legal but sub-optimal

# ----------------------------------------------------
# 3. GRPO REWARD WRAPPER
# ----------------------------------------------------

reasoning_start = "<reason>"
reasoning_end = "</reason>"
solution_start = "<answer>"
solution_end = "</answer>"

def format_reward_func(prompts, completions, **kwargs):
    """
    It should check whether the output follows a template
    """
    rewards = []

    for completion in completions:
        score = 0.0
        response = completion
        # Count how many keywords are seen - we penalize if too many!
        # If we see 1, then plus some points!
        score += 0.5 if response.count(reasoning_start) == 1 else -0.5
        score += 0.5 if response.find(reasoning_start) == 0 else -0.5
        score += 0.5 if response.count(reasoning_end) == 1 else -0.5
        score += 0.5 if response.count(solution_start) == 1 else -0.5
        score += 0.5 if response.count(solution_end) == 1 else -0.5
        rewards.append(score)
    return rewards

def chess_reward_func(prompts, completions, **kwargs):
    """
    GRPO passes a list of prompts and a list of completions.
    We must extract the FEN from the prompt and the Move from the completion.
    """
    rewards = []

    # Regex to extract FEN from the prompt we built earlier
    fen_pattern = re.compile(r"FEN:\s+(.*?)\s+2D Chess Board")
    # Regex to extract content inside <answer> tags
    answer_pattern = re.compile(r"<answer>\s*(.*?)\s*</answer>", re.DOTALL)

    for prompt, completion in zip(prompts, completions):
        # 1. Extract FEN
        fen_match = fen_pattern.search(prompt)
        if not fen_match:
            # Fallback or error if prompt structure changed
            rewards.append(0.0)
            continue
        fen = fen_match.group(1).strip()

        # 2. Extract Move
        move_match = answer_pattern.search(completion)
        if not move_match:
            # Model failed to follow format
            rewards.append(-1.0)
            continue

        proposed_move = move_match.group(1).strip()

        # 3. Calculate Reward
        # IMPORTANT: Ensure engine_path is globally accessible or passed effectively
        score = grpo_reward_logic(fen, proposed_move, ENGINE_PATH, depth=10) # Lower depth for speed
        rewards.append(score)

    return rewards

# ----------------------------------------------------
# 4. SFT TRAINER FUNCTION
# ----------------------------------------------------

def train_sft(model, tokenizer, train_dataset, output_dir="qwen-chess-sft-3"):
    """
    Trains the model using SFT to learn the format and basic moves.
    Expects train_dataset to have 'prompt' and 'completion' columns.
    """

    # Define how to merge the prompt and answer for SFT
    def formatting_prompts_func(examples):
        texts = []
        # Handle both list (batch) and single items, though usually batch
        prompts = examples["prompt"]
        completions = examples["completion"]

        for prompt, completion in zip(prompts, completions):
            # Concatenate prompt + completion + EOS token
            # Explicitly adding EOS token is crucial for the model to learn when to stop
            text = f"{prompt}\n{completion}" + tokenizer.eos_token
            texts.append(text)
        return { "text" : texts }

    # Apply the formatting function *before* passing to SFTTrainer
    processed_train_dataset = train_dataset.map(
        formatting_prompts_func,
        batched=True,
        remove_columns=train_dataset.column_names # Remove 'prompt' and 'completion' to create 'text' column
    )

    # SFT Configuration
    sft_config = SFTConfig(
        output_dir = output_dir,
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_ratio = 0.03,
        num_train_epochs = 2,  # Adjust based on data size
        learning_rate = 4e-4,  # Standard SFT learning rate
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        max_seq_length = 1024, # Ensure this matches the model's max_seq_length
        packing = False,      # False is usually safer for reasoning tasks to prevent cross-contamination
        completion_only_loss=True, # Explicitly set, it's default True
    )

    trainer = SFTTrainer(
        model = model,
        tokenizer = tokenizer,
        train_dataset = processed_train_dataset, # Use the pre-processed dataset
        # formatting_func = formatting_prompts_func, # Removed as formatting is done beforehand
        args = sft_config,
    )

    print("Starting SFT Training...")
    trainer.train()

    print(f"SFT Training complete. Saving to {output_dir}")
    model.save_pretrained(output_dir)
    tokenizer.save_pretrained(output_dir)

    return model, tokenizer

def SFT_Dataset_Gen(csv_path):
    sft_df = pd.read_csv(csv_path)

    # 1. Generate Prompts: Apply the function to every row in the 'fen' column
    sft_df["prompt"] = sft_df["fen"].apply(parse_fen_and_generate_prompt)

    # 2. Generate Completions: Format 'reason' and 'answer' into the specific XML tags
    sft_df["completion"] = sft_df.apply(
        lambda row: f"<reason>\n{row['reasoning']}\n</reason>\n\n<answer>\n{row['answer']}\n</answer>",
        axis=1
    )

    # 3. Return as a Hugging Face Dataset (required for the trainer)
    return Dataset.from_pandas(sft_df[["prompt", "completion"]])

# ----------------------------------------------------
# 5. MAIN EXECUTION
# ----------------------------------------------------

if __name__ == "__main__":
    # CONFIGURATION
    # Update this path to your local Stockfish binary
    engine_path = "./stockfish/stockfish-ubuntu-x86-64-avx2"
    ENGINE_PATH = engine_path
    # Note: "unsloth/gemma-3-270m-it" does not exist on HF.
    # Using "unsloth/gemma-2-2b-it" as the closest standard small Gemma 2.
    # If you have a private 270M checkpoint, replace the string below.
    # MODEL_NAME = "unsloth/gemma-2-2b-it"
    MODEL_NAME = "unsloth/DeepSeek-R1-Distill-Qwen-1.5B-unsloth-bnb-4bit"
    MODEL_NAME = "unsloth/DeepSeek-R1-Distill-Qwen-7B-unsloth-bnb-4bit"

    print("Generating Dataset...")
    # Generate 5 examples for demonstration (increase for real training)
    # Note: This dataset is for GRPO (has 'fen', 'prompt').
    # For SFT, you would need a dataset with 'completion' (reasoning + move).
    train_dataset = generate_dataset_for_grpo(num_samples=50)

    print("Loading Model...")
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=MODEL_NAME,
        max_seq_length=1024,
        load_in_4bit=True,
        dtype=None,
        fast_inference=False,
        gpu_memory_utilization=0.9,
    )

    # Enable LoRA
    model = FastLanguageModel.get_peft_model(
        model,
        r=64,
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                        "gate_proj", "up_proj", "down_proj"],
        lora_alpha=64,
        lora_dropout=0,
        bias="none",
        use_gradient_checkpointing="unsloth",
        random_state=3407,
    )

    # Load SFT dataset
    print("Loading SFT Dataset...")
    sft_dataset = SFT_Dataset_Gen("./data/SFT_Reasoning.csv")

    # SFT
    print("Starting SFT Training...")
    model, tokenizer = train_sft(model, tokenizer, sft_dataset)

    # GRPO Configuration
    training_args = GRPOConfig(
        output_dir="qwen-chess-grpo-outputs-3",
        learning_rate=4e-6,
        adam_beta1=0.9,
        adam_beta2=0.99,
        weight_decay=0.1,
        warmup_ratio=0.1,
        lr_scheduler_type="cosine",
        logging_steps=1,
        per_device_train_batch_size=1, # Keep small for GRPO memory usage
        gradient_accumulation_steps=4,
        num_generations=10, # Number of outputs to generate per prompt to compare
        max_prompt_length=500, # Adjusted for buffer
        max_completion_length=500, # Adjusted for buffer
        num_train_epochs=20,
        save_steps=100,
        max_grad_norm=0.1,
        report_to="none",
        use_vllm=False, # Significantly speeds up generation if available
    )

    print("Starting Training...")
    trainer = GRPOTrainer(
        model=model,
        processing_class=tokenizer,
        reward_funcs=[chess_reward_func, format_reward_func], # Pass the wrapper function and format reward function
        args=training_args,
        train_dataset=train_dataset,
    )

    trainer.train()

    # Save
    print("Saving model...")
    model.save_pretrained("qwen-chess-grpo-final-3")
    tokenizer.save_pretrained("qwen-chess-grpo-final-3")

ðŸ¦¥ Unsloth: Will patch your computer to enable 2x faster free finetuning.
ðŸ¦¥ Unsloth Zoo will now patch everything to make training faster!
Generating Dataset...
Loading Model...
==((====))==  Unsloth 2025.11.3: Fast Qwen2 patching. Transformers: 4.57.1. vLLM: 0.11.1.
   \\   /|    NVIDIA L40S. Num GPUs = 2. Max memory: 44.392 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu128. CUDA: 8.9. CUDA Toolkit: 12.8. Triton: 3.5.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.33.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Unsloth 2025.11.3 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


Loading SFT Dataset...
Starting SFT Training...


Map:   0%|          | 0/359 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=64):   0%|          | 0/359 [00:00<?, ? examples/s]

The model is already on multiple devices. Skipping the move to device specified in `args`.


Starting SFT Training...


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 359 | Num Epochs = 2 | Total steps = 90
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 161,480,704 of 7,777,097,216 (2.08% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,3.1177
2,3.137
3,2.921
4,2.1614
5,1.7713
6,1.3525
7,1.2103
8,0.7964
9,0.7803
10,0.623


SFT Training complete. Saving to qwen-chess-sft-3
Unsloth: We now expect `per_device_train_batch_size` to be a multiple of `num_generations`.
We will change the batch size of 1 to the `num_generations` of 10
Starting Training...


The model is already on multiple devices. Skipping the move to device specified in `args`.
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 50 | Num Epochs = 20 | Total steps = 240
O^O/ \_/ \    Batch size per device = 10 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (10 x 4 x 1) = 40
 "-____-"     Trainable parameters = 161,480,704 of 7,777,097,216 (2.08% trained)


Step,Training Loss,reward,reward_std,completions / mean_length,completions / min_length,completions / max_length,completions / clipped_ratio,completions / mean_terminated_length,completions / min_terminated_length,completions / max_terminated_length,sampling / sampling_logp_difference / mean,sampling / sampling_logp_difference / max,sampling / importance_sampling_ratio / min,sampling / importance_sampling_ratio / mean,sampling / importance_sampling_ratio / max,kl,rewards / chess_reward_func / mean,rewards / chess_reward_func / std,rewards / format_reward_func / mean,rewards / format_reward_func / std
1,0.0041,-0.775,2.419755,27.825001,1.0,103.0,0.0,27.825001,1.0,103.0,0,0,0,0,0,4.130183,-1.0,0.0,0.225,2.396445
2,0.0044,-1.0875,2.474394,19.75,1.0,58.0,0.0,19.75,1.0,58.0,No Log,No Log,No Log,No Log,No Log,4.391936,-0.9875,0.079057,-0.1,2.447395
3,0.0042,-1.0875,2.408503,19.300001,1.0,71.0,0.0,19.300001,1.0,71.0,No Log,No Log,No Log,No Log,No Log,4.23126,-0.9875,0.079057,-0.1,2.45785
4,0.0039,-1.2125,2.448563,22.125,1.0,123.0,0.0,22.125,1.0,123.0,No Log,No Log,No Log,No Log,No Log,3.897303,-0.9875,0.079057,-0.225,2.438868
5,0.0039,-1.0375,2.39913,20.225,1.0,94.0,0.0,20.225,1.0,94.0,No Log,No Log,No Log,No Log,No Log,3.941906,-0.9875,0.079057,-0.05,2.490495
6,0.004,-0.975,2.24985,19.450001,1.0,52.0,0.0,19.450001,1.0,52.0,No Log,No Log,No Log,No Log,No Log,3.958949,-1.0,0.0,0.025,2.449359
7,0.0037,-0.23,2.484554,25.475,1.0,64.0,0.0,25.475,1.0,64.0,No Log,No Log,No Log,No Log,No Log,3.682895,-0.93,0.301449,0.7,2.388004
8,0.0035,0.0125,2.204017,28.6,1.0,59.0,0.0,28.6,1.0,59.0,No Log,No Log,No Log,No Log,No Log,3.53791,-0.9875,0.079057,1.0,2.207214
9,0.0032,0.7375,1.55837,34.049999,1.0,87.0,0.0,34.049999,1.0,87.0,No Log,No Log,No Log,No Log,No Log,3.216204,-0.9875,0.079057,1.725,1.804375
10,0.0023,0.22,2.228859,36.875,1.0,160.0,0.0,36.875,1.0,160.0,No Log,No Log,No Log,No Log,No Log,2.284181,-0.93,0.301449,1.15,2.154899


Saving model...


In [52]:
import chess
import chess.engine
import pandas as pd
from unsloth import FastLanguageModel
import torch
import re, html
from tqdm import tqdm
# =========================================================
# LOAD MODEL
# =========================================================
print("Loading saved model from 'qwen-chess-grpo-final'...")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "qwen-chess-grpo-final-3/",
    max_seq_length = 1024,
    dtype = None,
    load_in_4bit = True,
)

FastLanguageModel.for_inference(model)

def extract_answer(response: str):
    # Unescape any &lt;answer&gt; HTML escaping
    response = html.unescape(response[-50:])

    # Find ALL answer blocks
    matches = re.findall(r"<answer>\s*(.*?)\s*</answer>", response, re.DOTALL | re.IGNORECASE)

    if not matches:
        return "ERR"

    # Return the LAST <answer>...</answer>
    return matches[-1].strip()


def predict_move(model, tokenizer, fen: str):
    prompt = parse_fen_and_generate_prompt(fen)
    inputs = tokenizer([prompt], return_tensors="pt").to("cuda")

    outputs = model.generate(
        **inputs,
        max_new_tokens=64,
        use_cache=True,
        temperature=0.1,    # deterministic prediction
        top_p=1.0,
    )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    move = extract_answer(response)

    # Debug-print: show extracted move
    # print("Extracted answer:", move)

    return move, response


def stockfish_best_move(stockfish_path: str, fen: str, depth: int = 15):
    board = chess.Board(fen)
    engine = chess.engine.SimpleEngine.popen_uci(stockfish_path)
    result = engine.analyse(board, chess.engine.Limit(depth=depth))
    engine.quit()
    return result["pv"][0].uci()

def evaluate_model(stockfish_path, fens):
    results = []

    for fen in tqdm(fens):
        # print("\n====================================")
        # print("Evaluating FEN:", fen)

        model_move, raw_response = predict_move(model, tokenizer, fen)

        # Stockfish best move
        sf_move = stockfish_best_move(stockfish_path, fen, depth=15)

        results.append({
            "FEN": fen,
            "Model": model_move,
            "Stockfish": sf_move,
            "Correct": model_move == sf_move
        })

    df = pd.DataFrame(results)
    accuracy = df["Correct"].mean()

    print("\n==================== RESULTS ====================")
    print(df)
    print(f"\nAccuracy: {accuracy * 100:.2f}%")

    return df, accuracy


# =========================================================
# RUN EVALUATION
# =========================================================
ENGINE_PATH = "./stockfish/stockfish-ubuntu-x86-64-avx2"

# Load FEN test set
df_eval = pd.read_csv("./data/eval_data.csv")
fens_to_test = df_eval["FEN"].tolist()

# Run evaluation
df, acc = evaluate_model(ENGINE_PATH, fens_to_test)


Loading saved model from 'qwen-chess-grpo-final'...
==((====))==  Unsloth 2025.11.3: Fast Qwen2 patching. Transformers: 4.57.1. vLLM: 0.11.1.
   \\   /|    NVIDIA L40S. Num GPUs = 2. Max memory: 44.392 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu128. CUDA: 8.9. CUDA Toolkit: 12.8. Triton: 3.5.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.33.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 50/50 [01:17<00:00,  1.55s/it]


                                                  FEN Model Stockfish  Correct
0   2rq1rk1/4bppp/p3pB2/np1p4/3P4/2PQ3P/PPBN1PP1/R...  h6h7      g7g6    False
1     3r2k1/5ppp/8/2Q5/8/5N2/1P3PPP/2Rq2K1 w - - 2 30  f1f3      f3e1    False
2   Q4b1r/2pkq1p1/p2p3p/np6/3P4/1BP5/PP3PPP/RNB3K1...  f1f3      c1e3    False
3   r3Q1kr/pNqp1ppp/np6/2pP4/2n5/8/PPP2PPP/R1B1R1K...  g7g6      a8e8    False
4   2r2rk1/1p3ppp/2n1b3/q2p4/3b4/P1N1B2P/2Q2PP1/RB...  f7f6      g7g6    False
5   r4rk1/pp1q1p1p/5p2/2bpp3/5P2/1PNP3b/1PP1N1P1/R...  f1f3      d1e1    False
6           3r2k1/5pbp/8/8/8/5Q2/P5PP/3r3K w - - 3 39  f1f3      f3d1    False
7   3r1rk1/p4pp1/1pQ1b2p/4q3/8/P7/1P4PP/4R2K w - -...  f1f3      c6c3    False
8   r1bqk2r/1pp2p2/pn2p3/6p1/3P3B/P1P1PQ2/2PN2PP/R...  h7h6      f7f5    False
9   5rk1/pR4pp/4p3/q2pPp2/3n1P2/PQP5/6PP/6K1 w - -...  f1f3      b3b4    False
10  5rk1/2p1b1pp/Q3n1p1/3pP3/P2P4/2P5/3B2PP/RN3q1K...  f1f3      a6f1    False
11     2r2rk1/1p4b1/p5Q1/5p2/8/8/PP6/K1q4R w - - 5 


