In [1]:
import argparse
from datasets import load_dataset, Dataset

def format_poker_data(example):
    """Format PokerBench data for the GRPO trainer."""
    # Adjust these field names based on the actual structure of the PokerBench dataset
    hand_info = example["hand_info"]
    player_cards = example["player_cards"]
    board_cards = example["board_cards"]
    position = example["position"]
    action_history = example["action_history"]
    gto_decision = example["gto_decision"]
    
    # Create a structured prompt
    prompt = f"""
You are playing Texas Hold'em Poker. Analyze the following situation and make the optimal GTO decision.

Hand Information:
{hand_info}

Your Cards: {player_cards}
Board: {board_cards}
Your Position: {position}
Action History: {action_history}

What is the optimal GTO decision in this situation?
"""
    
    # Format for Open-R1 GRPO trainer
    return {
        "instruction": prompt,
        "response": gto_decision,
    }

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/zero2.yaml \
    --num_processes=8 src/open_r1/grpo.py \
    --config recipes/DeepSeek-R1-Distill-Qwen-7B/grpo/config_poker.yaml

In [1]:
parser = argparse.ArgumentParser(description="Preprocess PokerBench for GRPO training")
parser.add_argument("--output_dataset", type=str, default="processed-poker-gto", 
                    help="Name of the output dataset on the Hub")
args = parser.parse_args([])

# Load the dataset

dataset = load_dataset("RZ412/PokerBench")

# Process the dataset
processed_dataset = dataset.map(
    format_poker_data,
    num_proc=8,
    remove_columns=dataset["train"].column_names  # Remove original columns
)

# Push to the Hub
processed_dataset.push_to_hub(args.output_dataset)
print(f"Preprocessed dataset pushed to: {args.output_dataset}")

NameError: name 'argparse' is not defined

In [11]:
# Display first few examples from the dataset
print("Dataset sample:")
for i, example in enumerate(dataset['train'].select(range(3))):
    print(f"\nExample {i+1}:")
    print("Instruction:", example['instruction'])
    print("Output:", example['output'])

# Display dataset info
print("\nDataset info:")
print(dataset['train'])



Dataset sample:

Example 1:
Instruction: 

You are a specialist in playing 6-handed No Limit Texas Holdem. The following will be a game scenario and you need to make the optimal decision.

Here is a game summary:

The small blind is 0.5 chips and the big blind is 1 chips. Everyone started with 100 chips.
The player positions involved in this game are UTG, HJ, CO, BTN, SB, BB.
In this hand, your position is HJ, and your holding is [King of Diamond and Jack of Spade].
Before the flop, HJ raise 2.0 chips, and BB call. Assume that all other players that is not mentioned folded.
The flop comes King Of Spade, Seven Of Heart, and Two Of Diamond, then BB check, and HJ check.
The turn comes Jack Of Club, then BB check, HJ bet 3 chips, BB raise 10 chips, and HJ call.
The river comes Seven Of Club, then BB check.


Now it is your turn to make a move.
To remind you, the current pot size is 24.0 chips, and your holding is [King of Diamond and Jack of Spade].

Decide on an action based on the streng