# Poker LLM Tournament

Single-elimination tournament comparing poker LLMs with **1000 hands per matchup**.

## Models
| Model | Type | Description |
|-------|------|-------------|
| Qwen3-SFT | HuggingFace | `YiPz/qwen3-4b-pokergpt-o3-sft-lora` - Fine-tuned on 5k hands |
| Qwen3-GRPO | HuggingFace | `YiPz/qwen3-4b-pokerbench-grpo` - GRPO fine-tuned model |
| Llama3-SFT | HuggingFace | `YiPz/llama3-8b-pokerbench-sft` - From PokerBench paper |
| GPT-4 | OpenAI API | Only runs if your model beats Llama3 |

## Gauntlet Format (Cost Optimized)
```
Round 1: Qwen3-SFT vs Qwen3-GRPO     (compare finetuning approaches)
Round 2: Winner R1 vs Llama3-SFT     (benchmark test)
Round 3: Winner R2 vs GPT-4          (only if your model wins R2)
```

**Winner determined by total chip profit after 1000 hands.**


## 1. Setup & Install

In [None]:
# Mount Google Drive for model caching
from google.colab import drive
drive.mount('/content/drive')

# Install dependencies
!pip install -q transformers accelerate torch pokerkit
!pip install -q tqdm pandas matplotlib openai

import os
os.environ["HF_HOME"] = "/content/drive/MyDrive/hf_cache"

print("Setup complete!")

In [None]:
# Set OpenAI API key from Colab secrets
from google.colab import userdata

try:
    os.environ["OPENAI_API_KEY"] = userdata.get("OPENAI_API_KEY")
    print("OpenAI API key loaded from secrets")
except:
    print("Warning: OPENAI_API_KEY not found in secrets. GPT-4 matchup will be skipped.")

## 2. GPU Detection

In [None]:
import subprocess
import torch

def detect_gpu():
    """Detect GPU and VRAM."""
    try:
        result = subprocess.run(
            ["nvidia-smi", "--query-gpu=name,memory.total", "--format=csv,noheader,nounits"],
            capture_output=True, text=True, check=True
        )
        gpu_name, vram_mb = result.stdout.strip().split(", ")
        vram_gb = float(vram_mb) / 1024
    except:
        gpu_name, vram_gb = "Unknown", 16.0
    return gpu_name, vram_gb

GPU_NAME, VRAM_GB = detect_gpu()
print(f"GPU: {GPU_NAME} ({VRAM_GB:.0f}GB)")
print(f"CUDA available: {torch.cuda.is_available()}")

# Check if we have enough VRAM for full weight Llama3-8B (~16GB)
if VRAM_GB < 20:
    print(f"\nWarning: Llama3-8B requires ~16GB VRAM at FP16.")
    print("Models will be loaded/unloaded sequentially to manage memory.")

## 3. Tournament Configuration

In [None]:
# Tournament settings
DRY_RUN = True  # Set to False for full 1000-hand matchups
DRY_RUN_HANDS = 10  # Hands for quick testing
FULL_RUN_HANDS = 1000  # Hands for full tournament

HANDS_PER_MATCHUP = DRY_RUN_HANDS if DRY_RUN else FULL_RUN_HANDS

STARTING_STACK = 10000
SMALL_BLIND = 50
BIG_BLIND = 100
SEED = 42
VERBOSE = False  # Set True to see each action

# Model configurations
MODELS = {
    "Qwen3-SFT": {
        "type": "transformers",
        "model_id": "YiPz/qwen3-4b-pokergpt-o3-sft-lora",
    },
    "Qwen3-GRPO": {
        "type": "transformers",
        "model_id": "YiPz/qwen3-4b-pokerbench-grpo",
    },
    "Llama3-SFT": {
        "type": "transformers",
        "model_id": "YiPz/llama3-8b-pokerbench-sft",
    },
    "GPT-4": {
        "type": "openai",
        "model": "gpt-4",
    },
}

# Gauntlet order
GAUNTLET = [
    ("Qwen3-SFT", "Qwen3-GRPO"),    # Round 1: Compare finetuning approaches
    ("WINNER_R1", "Llama3-SFT"),    # Round 2: vs Benchmark
    ("WINNER_R2", "GPT-4"),         # Round 3: vs GPT-4 (conditional)
]

# Output directory
OUTPUT_DIR = "/content/tournament_results"
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(f"{OUTPUT_DIR}/observability/traces", exist_ok=True)
os.makedirs(f"{OUTPUT_DIR}/charts", exist_ok=True)

print(f"Tournament Config:")
print(f"  Mode: {'DRY RUN' if DRY_RUN else 'FULL'}")
print(f"  Hands per matchup: {HANDS_PER_MATCHUP}")
print(f"  Stack: {STARTING_STACK}")
print(f"  Blinds: {SMALL_BLIND}/{BIG_BLIND}")
print(f"  Output: {OUTPUT_DIR}")

## 4. Core Classes

In [None]:
import time
import random
import json
import re
import gc
from dataclasses import dataclass, field, asdict
from typing import List, Tuple, Dict, Any, Optional
from datetime import datetime
from pathlib import Path

from pokerkit import NoLimitTexasHoldem, Automation


# ============= Card & Position Utilities =============

SUIT_MAP = {"s": "♠", "h": "♥", "d": "♦", "c": "♣"}
RANK_ORDER = "23456789TJQKA"
RANK_VALUE = {r: i for i, r in enumerate(RANK_ORDER, start=2)}


def pretty_card(card: str) -> str:
    """Format a card string with pretty suit symbols. 'As' -> 'A♠'"""
    if len(card) < 2:
        return card
    rank = card[:-1]
    suit = SUIT_MAP.get(card[-1].lower(), card[-1])
    return f"{rank}{suit}"


def score_hole_cards(c1: str, c2: str) -> int:
    """Score preflop hole cards (0-128, where 128 = AA)."""
    r1 = c1[0].upper() if c1 else "2"
    r2 = c2[0].upper() if c2 else "2"
    v1 = RANK_VALUE.get(r1, 2)
    v2 = RANK_VALUE.get(r2, 2)
    high, low = max(v1, v2), min(v1, v2)
    is_pair = v1 == v2
    is_suited = len(c1) > 1 and len(c2) > 1 and c1[-1].lower() == c2[-1].lower()

    if is_pair:
        return 100 + high * 2

    score = high * 4 + low
    if is_suited:
        score += 12
    gap = high - low
    if gap == 1:
        score += 6
    elif gap == 2:
        score += 3
    if high >= 11 and low >= 10:
        score += 6
    if high == 14:
        score += 4
    return score


def get_position_name(player_idx: int, num_players: int, button_idx: int) -> str:
    """Get position name (UTG, MP, CO, BTN, SB, BB)."""
    if num_players < 2:
        return "Unknown"
    offset = (player_idx - button_idx) % num_players

    if num_players == 2:
        return "BTN/SB" if offset == 0 else "BB"
    if offset == 0:
        return "BTN"
    elif offset == 1:
        return "SB"
    elif offset == 2:
        return "BB"

    if num_players == 4:
        if offset == 3: return "CO"
    elif num_players == 5:
        if offset == 3: return "MP"
        elif offset == 4: return "CO"
    elif num_players == 6:
        if offset == 3: return "UTG"
        elif offset == 4: return "MP"
        elif offset == 5: return "CO"
    elif num_players >= 7:
        positions = ["BTN", "SB", "BB", "UTG", "UTG+1", "MP", "MP+1", "HJ", "CO"]
        if offset < len(positions):
            return positions[offset]

    return f"P{player_idx + 1}"


# ============= Action Parsing =============

@dataclass
class ParsedAction:
    action_type: str
    amount: Optional[int] = None

    def __str__(self):
        if self.amount:
            return f"{self.action_type.title()} {self.amount}"
        return self.action_type.title()


@dataclass
class ParseResult:
    action: ParsedAction
    method: str  # "tag" | "regex_*" | "default"
    raw_match: str
    error: Optional[str] = None


class ActionParser:
    RE_ACTION_TAG = re.compile(r"<action>\s*([^<]+?)\s*</action>", re.IGNORECASE)
    RE_FOLD = re.compile(r"\b(f|fold)\b", re.IGNORECASE)
    RE_CC = re.compile(r"\b(cc|call|check)\b", re.IGNORECASE)
    RE_CBR = re.compile(r"\b(?:cbr|bet|raise)(?:\s+(?:to\s+)?(\d+))?\b", re.IGNORECASE)
    RE_ALL_IN = re.compile(r"\b(all[\-\s]?in|shove)\b", re.IGNORECASE)

    def parse(self, text: str, can_check: bool = True, stack: int = 0) -> ParsedAction:
        return self.parse_with_metadata(text, can_check, stack).action

    def parse_with_metadata(self, text: str, can_check: bool = True, stack: int = 0) -> ParseResult:
        tag_match = self.RE_ACTION_TAG.search(text)
        used_tag = tag_match is not None
        content = tag_match.group(1).strip() if tag_match else text

        if self.RE_ALL_IN.search(content):
            return ParseResult(ParsedAction("all_in", stack), "tag" if used_tag else "regex_allin", content)
        if self.RE_FOLD.search(content):
            return ParseResult(ParsedAction("fold"), "tag" if used_tag else "regex_fold", content)
        if self.RE_CC.search(content):
            action = ParsedAction("check" if can_check else "call")
            return ParseResult(action, "tag" if used_tag else "regex_call", content)

        cbr = self.RE_CBR.search(content)
        if cbr:
            amt = int(cbr.group(1)) if cbr.group(1) else stack
            return ParseResult(ParsedAction("raise", amt), "tag" if used_tag else "regex_raise", content)

        default_action = ParsedAction("check" if can_check else "fold")
        return ParseResult(default_action, "default", content[:100], "No valid action pattern found")


# ============= Action Record =============

@dataclass
class ActionRecord:
    hand_id: int
    street: str
    hole_cards: Tuple[str, str]
    board: List[str]
    pot: int
    to_call: int
    stack: int
    position: str
    action: ParsedAction
    thinking: str
    response: str
    latency_ms: float
    tokens_generated: int
    parse_method: str = "unknown"
    parse_error: Optional[str] = None


# ============= PromptBuilder (pokergpt format) =============

class PromptBuilder:
    """Builds prompts in pokergpt format for LLM poker players."""

    def __init__(self, big_blind: int = 100):
        self.big_blind = big_blind
        self.action_history: List[str] = []

    def record_deal(self, player_label: str, is_hero: bool = False, blind_note: str = ""):
        """Record a hole card deal."""
        suffix = f" ({blind_note})" if blind_note else ""
        if is_hero:
            self.action_history.append(f"{player_label} were dealt your hole cards{suffix}.")
        else:
            self.action_history.append(f"{player_label} was dealt hole cards{suffix}.")

    def record_board(self, board_cards: List[str]):
        """Record a board deal."""
        n = len(board_cards)
        if n == 3:
            street = "Flop"
        elif n == 4:
            street = "Turn"
        elif n == 5:
            street = "River"
        else:
            street = "Board"
        pretty = " ".join(pretty_card(c) for c in board_cards)
        self.action_history.append(f"{street} dealt: {pretty}")

    def record_action(self, player_label: str, action: str, amount_bb: Optional[float] = None):
        """Record a player action."""
        if amount_bb is not None:
            self.action_history.append(f"{player_label} {action} {amount_bb:.1f} BB.")
        else:
            self.action_history.append(f"{player_label} {action}.")

    def reset_hand(self):
        """Reset action history for a new hand."""
        self.action_history = []

    def get_player_label(self, player_idx: int, hero_idx: int, positions: List[str]) -> str:
        """Get label for a player."""
        pos = positions[player_idx] if player_idx < len(positions) else f"P{player_idx + 1}"
        if player_idx == hero_idx:
            return f"You ({pos})"
        return pos

    def build_prompt(
        self,
        hero_idx: int,
        hero_cards: Tuple[str, str],
        board: List[str],
        stacks: List[int],
        bets: List[int],
        pot: int,
        to_call: int,
        min_raise: int,
        button_idx: int,
        num_players: int,
        street: str = "preflop",
    ) -> str:
        """Build a prompt in pokergpt format."""
        bb = self.big_blind
        positions = [get_position_name(i, num_players, button_idx) for i in range(num_players)]
        hero_pos = positions[hero_idx]

        lines = [
            "You are an expert poker player and you are playing NT poker.",
            f"There are {num_players} players at the table.",
            f"You are in the {hero_pos} position.",
            "",
            "Stacks:",
        ]

        for i, stack in enumerate(stacks):
            label = self.get_player_label(i, hero_idx, positions)
            lines.append(f"- {label}: {stack / bb:.1f} BB")

        if self.action_history:
            lines.extend(["", "Actions so far:"])
            for action in self.action_history:
                lines.append(f"- {action}")

        c1, c2 = hero_cards
        lines.extend([
            "",
            f"Your hole cards are: {pretty_card(c1)} {pretty_card(c2)}",
        ])

        if street == "preflop":
            strength = score_hole_cards(c1, c2)
            lines.append(f"Preflop hand strength score out of 128 (128 is pair Aces): {strength}")
        elif board:
            pretty_board = " ".join(pretty_card(c) for c in board)
            lines.append(f"The current board is: {pretty_board}")

        if any(b > 0 for b in bets):
            lines.extend(["", "The current bets are:"])
            for i, bet in enumerate(bets):
                if bet > 0:
                    label = self.get_player_label(i, hero_idx, positions)
                    lines.append(f"- {label}: {bet / bb:.1f} BB")

        lines.extend([
            "",
            f"The current pot size is: {pot / bb:.1f} BB",
            "It is now your turn to act.",
            "Minimum bet: 1 BB.",
            "",
            "Available actions:",
        ])

        if to_call > 0:
            lines.append("- Fold")
            lines.append(f"- Call {to_call / bb:.0f} BB")
            lines.append(f"- Raise (minimum: {min_raise / bb:.0f} BB)")
        else:
            lines.append("- Check")
            lines.append("- Bet (minimum: 1 BB)")

        return "\n".join(lines)


print("Core classes loaded!")

In [None]:
# ============= Unit Tests: Action Parsing =============
print("Testing ActionParser...")

parser = ActionParser()

# Valid tag formats
assert parser.parse("<action>f</action>").action_type == "fold"
assert parser.parse("<action>cc</action>", can_check=True).action_type == "check"
assert parser.parse("<action>cc</action>", can_check=False).action_type == "call"
assert parser.parse("<action>cbr 500</action>").action_type == "raise"
assert parser.parse("<action>cbr 500</action>").amount == 500

# Regex fallbacks (no tags)
assert parser.parse("I fold").action_type == "fold"
assert parser.parse("call").action_type == "call"
assert parser.parse("raise to 300").action_type == "raise"
assert parser.parse("all in", stack=1000).action_type == "all_in"

# Edge cases
assert parser.parse("", can_check=True).action_type == "check"  # Empty defaults to check
assert parser.parse("gibberish text", can_check=False).action_type == "fold"  # Defaults to fold

print("✓ ActionParser tests passed!")

In [None]:
# ============= Unit Tests: PromptBuilder =============
print("Testing PromptBuilder...")

pb = PromptBuilder(big_blind=100)
prompt = pb.build_prompt(
    hero_idx=0, hero_cards=("As", "Kh"), board=[],
    stacks=[10000, 10000], bets=[50, 100], pot=150,
    to_call=50, min_raise=200, button_idx=0,
    num_players=2, street="preflop"
)

# Check key elements in prompt
assert "expert poker player" in prompt.lower(), "Missing intro text"
assert "2 players" in prompt.lower() or "2-handed" in prompt.lower(), "Missing player count"
assert "preflop" in prompt.lower() or "hole cards" in prompt.lower(), "Missing street context"

# Check card formatting (should have suit symbols)
assert "♠" in prompt or "A" in prompt, "Missing card info"

# Check BB units are used
assert "BB" in prompt, "Missing BB units"

# Test with board cards (postflop)
pb.reset_hand()
prompt_flop = pb.build_prompt(
    hero_idx=0, hero_cards=("As", "Kh"), board=["Ah", "7c", "2d"],
    stacks=[9500, 9500], bets=[0, 0], pot=500,
    to_call=0, min_raise=100, button_idx=0,
    num_players=2, street="flop"
)
assert "board" in prompt_flop.lower() or "flop" in prompt_flop.lower(), "Missing board in flop prompt"

print("✓ PromptBuilder tests passed!")

In [None]:
# ============= Observability =============

@dataclass
class ModelObservability:
    model_name: str
    total_actions: int = 0
    valid_tag_parses: int = 0
    regex_fallback_parses: int = 0
    default_fallback_parses: int = 0
    action_execution_failures: int = 0
    empty_responses: int = 0
    fold_count: int = 0
    check_count: int = 0
    call_count: int = 0
    raise_count: int = 0
    all_in_count: int = 0
    latencies: List[float] = field(default_factory=list)
    total_tokens: int = 0

    @property
    def parse_error_rate(self) -> float:
        if self.total_actions == 0:
            return 0.0
        return (self.regex_fallback_parses + self.default_fallback_parses) / self.total_actions

    @property
    def avg_latency_ms(self) -> float:
        return sum(self.latencies) / len(self.latencies) if self.latencies else 0.0

    @property
    def p99_latency_ms(self) -> float:
        if not self.latencies:
            return 0.0
        sorted_lat = sorted(self.latencies)
        return sorted_lat[int(len(sorted_lat) * 0.99)]


class ObservabilityCollector:
    def __init__(self, output_dir: str):
        self.output_dir = Path(output_dir)
        self.traces: Dict[str, List[dict]] = {}
        self.metrics: Dict[str, ModelObservability] = {}

    def record_action(self, model_name: str, record: ActionRecord, executed_action: str, fallback_used: bool):
        # Store trace
        if model_name not in self.traces:
            self.traces[model_name] = []

        trace = {
            "timestamp": datetime.now().isoformat(),
            "hand_id": record.hand_id,
            "street": record.street,
            "hole_cards": list(record.hole_cards),
            "board": record.board,
            "pot": record.pot,
            "to_call": record.to_call,
            "stack": record.stack,
            "position": record.position,
            "raw_response": record.response,
            "thinking": record.thinking,
            "parsed_action": record.action.action_type,
            "parsed_amount": record.action.amount,
            "parse_method": record.parse_method,
            "parse_error": record.parse_error,
            "executed_action": executed_action,
            "fallback_used": fallback_used,
            "latency_ms": record.latency_ms,
            "tokens": record.tokens_generated,
        }
        self.traces[model_name].append(trace)

        # Update metrics
        if model_name not in self.metrics:
            self.metrics[model_name] = ModelObservability(model_name=model_name)
        m = self.metrics[model_name]

        m.total_actions += 1
        m.latencies.append(record.latency_ms)
        m.total_tokens += record.tokens_generated

        if record.parse_method == "tag":
            m.valid_tag_parses += 1
        elif record.parse_method.startswith("regex"):
            m.regex_fallback_parses += 1
        elif record.parse_method == "default":
            m.default_fallback_parses += 1

        if not record.response.strip():
            m.empty_responses += 1
        if fallback_used:
            m.action_execution_failures += 1

        action = executed_action.lower()
        if action == "fold": m.fold_count += 1
        elif action == "check": m.check_count += 1
        elif action == "call": m.call_count += 1
        elif action == "raise": m.raise_count += 1
        elif action == "all_in": m.all_in_count += 1

    def write_traces(self, matchup_id: str):
        traces_dir = self.output_dir / "observability" / "traces"
        traces_dir.mkdir(parents=True, exist_ok=True)
        for model_name, traces in self.traces.items():
            safe_name = model_name.replace("/", "_").replace(" ", "_")
            filepath = traces_dir / f"{safe_name}_{matchup_id}.jsonl"
            with open(filepath, "w") as f:
                for trace in traces:
                    f.write(json.dumps(trace) + "\n")

    def export_metrics(self):
        metrics_path = self.output_dir / "observability" / "model_metrics.json"
        data = {}
        for name, m in self.metrics.items():
            data[name] = {
                "total_actions": m.total_actions,
                "valid_tag_parses": m.valid_tag_parses,
                "regex_fallback_parses": m.regex_fallback_parses,
                "default_fallback_parses": m.default_fallback_parses,
                "parse_error_rate": round(m.parse_error_rate, 4),
                "empty_responses": m.empty_responses,
                "action_execution_failures": m.action_execution_failures,
                "action_distribution": {
                    "fold": m.fold_count, "check": m.check_count,
                    "call": m.call_count, "raise": m.raise_count, "all_in": m.all_in_count,
                },
                "avg_latency_ms": round(m.avg_latency_ms, 2),
                "p99_latency_ms": round(m.p99_latency_ms, 2),
                "total_tokens": m.total_tokens,
            }
        with open(metrics_path, "w") as f:
            json.dump(data, f, indent=2)

    def clear(self):
        self.traces = {}
        self.metrics = {}


print("Observability loaded!")

In [None]:
# ============= TransformersPlayer =============

class TransformersPlayer:
    SYSTEM_PROMPT = """
    You are an expert poker coach. Analyze the situation and provide your recommended action.

PHH Format Actions:
- 'f' = fold (give up the hand)
- 'cc' = check OR call (use for BOTH - never include an amount)
- 'cbr X' = bet or raise TO X big blinds (only when YOU are betting/raising)

IMPORTANT:
- To CALL any bet (regardless of size), always use just 'cc' with NO amount
- 'cbr' is ONLY for when YOU initiate a bet or raise, not for calling
- Do NOT include prefixes like 'phh', 'p1', 'p6' etc - just the action
- Invalid formats are penalized

Always output your final action inside <action></action> tags.

Valid examples:
- <action>f</action>
- <action>cc</action>
- <action>cbr 5</action>

Invalid examples (DO NOT use):
- <action>phh cc</action>
- <action>p6 cbr 5</action>
- <action>cc 29</action>
- <action>cbr 0</action>

Think step by step, then output exactly ONE action tag."""

    THINK_END_TOKEN_ID = 151668

    def __init__(self, name: str, model: Any, tokenizer: Any, temperature: float = 0.6, max_new_tokens: int = 2096, big_blind: int = 100):
        self.name = name
        self.model = model
        self.tokenizer = tokenizer
        self.temperature = temperature
        self.max_new_tokens = max_new_tokens
        self.big_blind = big_blind
        self.parser = ActionParser()
        self.action_history: List[ActionRecord] = []
        self._hand_id = 0
        self._street = "preflop"

        # PromptBuilder for consistent prompt format
        self.prompt_builder = PromptBuilder(big_blind=big_blind)

        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token

    def set_hand_context(self, hand_id: int, street: str):
        self._hand_id = hand_id
        self._street = street
        # Reset prompt builder for new hand
        if street == "preflop":
            self.prompt_builder.reset_hand()

    def get_action(self, hole_cards, board, pot, to_call, stack, position, num_players) -> ParsedAction:
        """Get action using PromptBuilder format."""
        start = time.perf_counter()
        prompt = self._build_simple_prompt(hole_cards, board, pot, to_call, stack, position, num_players)

        try:
            thinking, response, tokens_gen = self._generate(prompt)
            can_check = to_call == 0
            result = self.parser.parse_with_metadata(response, can_check, stack)
            action = result.action
            parse_method = result.method
            parse_error = result.error
        except Exception as e:
            thinking, response, tokens_gen = "", f"ERROR: {e}", 0
            action = ParsedAction("fold")
            parse_method = "error"
            parse_error = str(e)

        latency = (time.perf_counter() - start) * 1000

        self.action_history.append(ActionRecord(
            hand_id=self._hand_id, street=self._street, hole_cards=hole_cards,
            board=list(board), pot=pot, to_call=to_call, stack=stack,
            position=position, action=action, thinking=thinking[:1000],
            response=response[:500], latency_ms=latency, tokens_generated=tokens_gen,
            parse_method=parse_method, parse_error=parse_error,
        ))
        return action

    def get_action_with_prompt(self, prompt_text: str, hole_cards, board, pot, to_call, stack, position) -> ParsedAction:
        """Get action using a pre-built prompt (pokergpt format)."""
        start = time.perf_counter()

        # Format as chat with system prompt
        messages = [
            {"role": "system", "content": self.SYSTEM_PROMPT},
            {"role": "user", "content": prompt_text},
        ]
        full_prompt = self.tokenizer.apply_chat_template(
            messages, tokenize=False, add_generation_prompt=True
        )

        try:
            thinking, response, tokens_gen = self._generate(full_prompt)
            can_check = to_call == 0
            result = self.parser.parse_with_metadata(response, can_check, stack)
            action = result.action
            parse_method = result.method
            parse_error = result.error
        except Exception as e:
            thinking, response, tokens_gen = "", f"ERROR: {e}", 0
            action = ParsedAction("fold")
            parse_method = "error"
            parse_error = str(e)

        latency = (time.perf_counter() - start) * 1000

        self.action_history.append(ActionRecord(
            hand_id=self._hand_id, street=self._street, hole_cards=hole_cards,
            board=list(board), pot=pot, to_call=to_call, stack=stack,
            position=position, action=action, thinking=thinking[:1000],
            response=response[:500], latency_ms=latency, tokens_generated=tokens_gen,
            parse_method=parse_method, parse_error=parse_error,
        ))
        return action

    def _build_simple_prompt(self, hole_cards, board, pot, to_call, stack, position, num_players) -> str:
        """Build pokergpt-style prompt using PromptBuilder format."""
        bb = self.big_blind

        # Build pokergpt format prompt
        lines = [
            "You are an expert poker player and you are playing NT poker.",
            f"There are {num_players} players at the table.",
            f"You are in the {position} position.",
            "",
            f"Your stack: {stack / bb:.1f} BB",
        ]

        c1, c2 = hole_cards
        lines.extend([
            "",
            f"Your hole cards are: {pretty_card(c1)} {pretty_card(c2)}",
        ])

        if self._street == "preflop":
            strength = score_hole_cards(c1, c2)
            lines.append(f"Preflop hand strength score out of 128 (128 is pair Aces): {strength}")
        elif board:
            pretty_board = " ".join(pretty_card(c) for c in board)
            lines.append(f"The current board is: {pretty_board}")

        lines.extend([
            "",
            f"The current pot size is: {pot / bb:.1f} BB",
            "It is now your turn to act.",
            "Minimum bet: 1 BB.",
            "",
            "Available actions:",
        ])

        if to_call > 0:
            lines.append("- Fold")
            lines.append(f"- Call {to_call / bb:.1f} BB")
            lines.append(f"- Raise (minimum: {(to_call + bb) / bb:.1f} BB)")
        else:
            lines.append("- Check")
            lines.append("- Bet (minimum: 1 BB)")

        user_msg = "\n".join(lines)
        messages = [{"role": "system", "content": self.SYSTEM_PROMPT}, {"role": "user", "content": user_msg}]
        return self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

    def _generate(self, prompt: str) -> Tuple[str, str, int]:
        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
        input_len = inputs.input_ids.shape[1]

        with torch.no_grad():
            outputs = self.model.generate(
                **inputs, max_new_tokens=self.max_new_tokens, temperature=self.temperature,
                top_p=0.95, top_k=20, do_sample=True, pad_token_id=self.tokenizer.pad_token_id,
            )

        new_tokens = outputs[0][input_len:]
        num_tokens = len(new_tokens)

        try:
            think_end_idx = (new_tokens == self.THINK_END_TOKEN_ID).nonzero(as_tuple=True)[0][-1].item()
            thinking_tokens = new_tokens[:think_end_idx]
            response_tokens = new_tokens[think_end_idx + 1:]
        except:
            thinking_tokens = torch.tensor([], dtype=new_tokens.dtype)
            response_tokens = new_tokens

        thinking = self.tokenizer.decode(thinking_tokens, skip_special_tokens=True).strip()
        response = self.tokenizer.decode(response_tokens, skip_special_tokens=True).strip()
        return thinking, response, num_tokens

    def get_stats(self) -> dict:
        if not self.action_history:
            return {}
        total = len(self.action_history)
        preflop = [a for a in self.action_history if a.street == "preflop"]
        vpip = len([a for a in preflop if a.action.action_type in ("call", "raise", "all_in")]) / len(preflop) if preflop else 0
        pfr = len([a for a in preflop if a.action.action_type in ("raise", "all_in")]) / len(preflop) if preflop else 0
        return {
            "total_actions": total, "vpip": vpip, "pfr": pfr,
            "avg_latency_ms": sum(a.latency_ms for a in self.action_history) / total,
            "fold_pct": sum(1 for a in self.action_history if a.action.action_type == "fold") / total,
        }

    def get_last_record(self) -> Optional[ActionRecord]:
        return self.action_history[-1] if self.action_history else None

    def reset_history(self):
        self.action_history = []


print("TransformersPlayer loaded!")

In [None]:
# ============= OpenAIPlayer =============

try:
    from openai import OpenAI
    OPENAI_AVAILABLE = True
except ImportError:
    OPENAI_AVAILABLE = False


class OpenAIPlayer:
    SYSTEM_PROMPT = TransformersPlayer.SYSTEM_PROMPT  # Same prompt

    def __init__(self, name: str, model: str = "gpt-4", temperature: float = 0.6, max_tokens: int = 512):
        if not OPENAI_AVAILABLE:
            raise ImportError("openai package not installed")
        self.name = name
        self.model = model
        self.temperature = temperature
        self.max_tokens = max_tokens
        self.client = OpenAI()
        self.parser = ActionParser()
        self.action_history: List[ActionRecord] = []
        self._hand_id = 0
        self._street = "preflop"
        self.total_input_tokens = 0
        self.total_output_tokens = 0

    def set_hand_context(self, hand_id: int, street: str):
        self._hand_id = hand_id
        self._street = street

    def get_action(self, hole_cards, board, pot, to_call, stack, position, num_players) -> ParsedAction:
        """Get action using simple prompt (fallback)."""
        start = time.perf_counter()
        user_msg = self._build_simple_prompt(hole_cards, board, pot, to_call, stack, position, num_players)

        try:
            response_text, tokens_in, tokens_out = self._call_api(user_msg)
            can_check = to_call == 0
            result = self.parser.parse_with_metadata(response_text, can_check, stack)
            action = result.action
            parse_method = result.method
            parse_error = result.error
        except Exception as e:
            response_text = f"ERROR: {e}"
            tokens_in = tokens_out = 0
            action = ParsedAction("fold")
            parse_method = "error"
            parse_error = str(e)

        latency = (time.perf_counter() - start) * 1000

        self.action_history.append(ActionRecord(
            hand_id=self._hand_id, street=self._street, hole_cards=hole_cards,
            board=list(board), pot=pot, to_call=to_call, stack=stack,
            position=position, action=action, thinking="",
            response=response_text[:500], latency_ms=latency, tokens_generated=tokens_out,
            parse_method=parse_method, parse_error=parse_error,
        ))
        return action

    def get_action_with_prompt(self, prompt_text: str, hole_cards, board, pot, to_call, stack, position) -> ParsedAction:
        """Get action using a pre-built prompt (pokergpt format)."""
        start = time.perf_counter()

        try:
            response_text, tokens_in, tokens_out = self._call_api(prompt_text)
            can_check = to_call == 0
            result = self.parser.parse_with_metadata(response_text, can_check, stack)
            action = result.action
            parse_method = result.method
            parse_error = result.error
        except Exception as e:
            response_text = f"ERROR: {e}"
            tokens_in = tokens_out = 0
            action = ParsedAction("fold")
            parse_method = "error"
            parse_error = str(e)

        latency = (time.perf_counter() - start) * 1000

        self.action_history.append(ActionRecord(
            hand_id=self._hand_id, street=self._street, hole_cards=hole_cards,
            board=list(board), pot=pot, to_call=to_call, stack=stack,
            position=position, action=action, thinking="",
            response=response_text[:500], latency_ms=latency, tokens_generated=tokens_out,
            parse_method=parse_method, parse_error=parse_error,
        ))
        return action

    def _build_simple_prompt(self, hole_cards, board, pot, to_call, stack, position, num_players) -> str:
        """Build simple prompt (fallback)."""
        board_str = " ".join(board) if board else "None"
        return f"""Game: {num_players}-handed No-Limit Hold'em
Position: {position}
Stack: {stack}
Hole Cards: {hole_cards[0]} {hole_cards[1]}
Board: {board_str}
Pot: {pot}
To Call: {to_call}

What is your action?"""

    def _call_api(self, user_msg: str) -> Tuple[str, int, int]:
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[
                {"role": "system", "content": self.SYSTEM_PROMPT},
                {"role": "user", "content": user_msg},
            ],
            temperature=self.temperature,
            max_tokens=self.max_tokens,
        )
        content = response.choices[0].message.content or ""
        tokens_in = response.usage.prompt_tokens if response.usage else 0
        tokens_out = response.usage.completion_tokens if response.usage else 0
        self.total_input_tokens += tokens_in
        self.total_output_tokens += tokens_out
        return content, tokens_in, tokens_out

    def get_stats(self) -> dict:
        if not self.action_history:
            return {}
        total = len(self.action_history)
        preflop = [a for a in self.action_history if a.street == "preflop"]
        vpip = len([a for a in preflop if a.action.action_type in ("call", "raise", "all_in")]) / len(preflop) if preflop else 0
        pfr = len([a for a in preflop if a.action.action_type in ("raise", "all_in")]) / len(preflop) if preflop else 0
        return {
            "total_actions": total, "vpip": vpip, "pfr": pfr,
            "avg_latency_ms": sum(a.latency_ms for a in self.action_history) / total,
            "fold_pct": sum(1 for a in self.action_history if a.action.action_type == "fold") / total,
            "total_input_tokens": self.total_input_tokens,
            "total_output_tokens": self.total_output_tokens,
        }

    def get_last_record(self) -> Optional[ActionRecord]:
        return self.action_history[-1] if self.action_history else None

    def get_estimated_cost(self) -> float:
        if "turbo" in self.model.lower() or "4o" in self.model.lower():
            return self.total_input_tokens * 10 / 1e6 + self.total_output_tokens * 30 / 1e6
        return self.total_input_tokens * 30 / 1e6 + self.total_output_tokens * 60 / 1e6

    def reset_history(self):
        self.action_history = []
        self.total_input_tokens = 0
        self.total_output_tokens = 0


print(f"OpenAIPlayer loaded! (available: {OPENAI_AVAILABLE})")

In [None]:
# ============= Hand Result & Metrics =============

@dataclass
class HandResult:
    hand_id: int
    player_names: List[str]
    starting_stacks: List[int]
    ending_stacks: List[int]
    chip_deltas: List[int]
    hole_cards: Dict[str, Tuple[str, str]]
    board: List[str]
    winner_names: List[str]
    pot_size: int


class MetricsCollector:
    def __init__(self, session_id: str = None):
        self.session_id = session_id or f"session_{int(time.time())}"
        self.hand_results: List[HandResult] = []
        self.session_start = time.time()
        self.player_summaries = {}

    def log_hand(self, result: HandResult):
        self.hand_results.append(result)

    def finalize_session(self, player_stats: Dict[str, dict], cumulative_deltas: Dict[str, int] = None):
        """Finalize session and compute BB/100 metrics.

        Args:
            player_stats: Stats from each player
            cumulative_deltas: Total chip deltas across all hands (for reset-stack mode)
        """
        duration = time.time() - self.session_start
        total_hands = len(self.hand_results)

        player_names = set()
        for hr in self.hand_results:
            player_names.update(hr.player_names)

        for name in player_names:
            hands_played = hands_won = total_chip_delta = 0
            for hr in self.hand_results:
                if name in hr.player_names:
                    idx = hr.player_names.index(name)
                    hands_played += 1
                    total_chip_delta += hr.chip_deltas[idx]
                    if name in hr.winner_names:
                        hands_won += 1

            # Use cumulative_deltas if provided (more accurate for reset-stack mode)
            if cumulative_deltas and name in cumulative_deltas:
                total_chip_delta = cumulative_deltas[name]

            bb_per_100 = (total_chip_delta / hands_played * 100 / BIG_BLIND) if hands_played > 0 else 0
            mbb_per_hand = bb_per_100 * 10  # milli-BB per hand

            self.player_summaries[name] = {
                "hands_played": hands_played,
                "hands_won": hands_won,
                "win_rate": hands_won / hands_played if hands_played > 0 else 0,
                "total_chip_delta": total_chip_delta,
                "bb_per_100": bb_per_100,
                "mbb_per_hand": mbb_per_hand,
                **player_stats.get(name, {}),
            }

        self.duration = duration
        self.total_hands = total_hands


print("Metrics loaded with BB/100 and mBB/hand!")

In [None]:
# ============= Hand Logger =============

SUIT_SYMBOLS_LOG = {"c": "♣", "d": "♦", "h": "♥", "s": "♠"}


class HandLogger:
    """Logs sampled poker hands to a file in a pretty format."""

    def __init__(self, log_dir: str = "logs", sample_rate: int = 100):
        self.log_dir = log_dir
        self.sample_rate = sample_rate
        self.session_file: Optional[str] = None
        self._current_hand: Optional[Dict] = None

        os.makedirs(log_dir, exist_ok=True)
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        self.session_file = os.path.join(log_dir, f"poker_session_{timestamp}.log")

    def should_log(self, hand_num: int) -> bool:
        return hand_num % self.sample_rate == 0

    def start_hand(self, hand_num, player_names, stacks, hole_cards, button_pos, sb_pos, bb_pos, blinds):
        if not self.should_log(hand_num):
            self._current_hand = None
            return
        self._current_hand = {
            "hand_num": hand_num, "timestamp": datetime.now().isoformat(),
            "player_names": player_names, "stacks": stacks.copy(),
            "hole_cards": hole_cards, "button_pos": button_pos,
            "sb_pos": sb_pos, "bb_pos": bb_pos, "blinds": blinds,
            "streets": [], "current_street": None, "board": [],
            "final_stacks": None, "winners": [],
        }

    def start_street(self, street_name: str, board: List[str]):
        if self._current_hand is None:
            return
        self._current_hand["current_street"] = {"name": street_name, "board": [str(c) for c in board], "actions": []}
        self._current_hand["board"] = [str(c) for c in board]

    def log_action(self, player_idx: int, player_name: str, action_str: str):
        if self._current_hand is None or self._current_hand["current_street"] is None:
            return
        self._current_hand["current_street"]["actions"].append({
            "player_idx": player_idx, "player_name": player_name, "action": action_str,
        })

    def end_street(self):
        if self._current_hand is None or self._current_hand["current_street"] is None:
            return
        self._current_hand["streets"].append(self._current_hand["current_street"])
        self._current_hand["current_street"] = None

    def end_hand(self, final_stacks: List[int], winners: List[int], chips_won: int):
        if self._current_hand is None:
            return
        if self._current_hand["current_street"] is not None:
            self.end_street()
        self._current_hand["final_stacks"] = final_stacks
        self._current_hand["winners"] = winners
        self._current_hand["chips_won"] = chips_won
        self._write_hand()
        self._current_hand = None

    def _format_card(self, card: str) -> str:
        card_str = str(card)
        if "(" in card_str and ")" in card_str:
            start = card_str.rfind("(") + 1
            end = card_str.rfind(")")
            card_str = card_str[start:end]
        if len(card_str) >= 2:
            rank = card_str[:-1].upper()
            suit = card_str[-1].lower()
            return f"{rank}{SUIT_SYMBOLS_LOG.get(suit, suit)}"
        return card_str

    def _format_cards(self, cards: List) -> str:
        if not cards:
            return "[ ]"
        return "[" + " ".join(self._format_card(c) for c in cards) + "]"

    def _pad_line(self, content: str, width: int = 58) -> str:
        padding = max(0, width - len(content))
        return f"║{content}" + " " * padding + "║"

    def _write_hand(self):
        if self._current_hand is None:
            return
        h = self._current_hand
        lines = ["", "╔" + "═" * 58 + "╗"]
        lines.append(f"║  🎴 HAND #{h['hand_num']:>4}  │  {h['timestamp'][:19]}  ║")
        lines.append("╠" + "═" * 58 + "╣")
        lines.append("║  PLAYERS" + " " * 49 + "║")
        lines.append("╟" + "─" * 58 + "╢")

        for i, name in enumerate(h["player_names"]):
            pos_tag = " [BTN]" if i == h["button_pos"] else " [SB]" if i == h["sb_pos"] else " [BB]" if i == h["bb_pos"] else ""
            hole = self._format_cards(h["hole_cards"][i]) if h["hole_cards"][i] else "[?? ??]"
            stack_str = f"${h['stacks'][i]:,}"
            line = f"  {name[:12]:<12} {hole:<12} {stack_str:>10}{pos_tag:<8}"
            lines.append(self._pad_line(line))

        lines.append("╠" + "═" * 58 + "╣")
        sb, bb = h["blinds"]
        lines.append(self._pad_line(f"  Blinds: ${sb}/${bb}"))
        lines.append("╠" + "═" * 58 + "╣")

        for street in h["streets"]:
            board_str = self._format_cards(street["board"]) if street["board"] else ""
            lines.append(self._pad_line(f"  ▶ {street['name'].upper()} {board_str}"))
            lines.append("╟" + "─" * 58 + "╢")
            for action in street["actions"]:
                lines.append(self._pad_line(f"    {action['player_name'][:12]:<12}: {action['action']}"))
            if not street["actions"]:
                lines.append(self._pad_line("    (no actions)"))
            lines.append("╟" + "─" * 58 + "╢")

        if h["board"]:
            lines.append(self._pad_line(f"  Final Board: {self._format_cards(h['board'])}"))
            lines.append("╠" + "═" * 58 + "╣")

        lines.append("║  🏆 RESULTS" + " " * 46 + "║")
        lines.append("╟" + "─" * 58 + "╢")
        if h["winners"]:
            winner_names = [h["player_names"][w] for w in h["winners"]]
            lines.append(self._pad_line(f"  Winner: {', '.join(winner_names)} (+${h['chips_won']:,})"))
        lines.append("╟" + "─" * 58 + "╢")
        lines.append(self._pad_line("  Final Stacks:"))
        for i, name in enumerate(h["player_names"]):
            if h["final_stacks"]:
                diff = h["final_stacks"][i] - h["stacks"][i]
                diff_str = f"+{diff}" if diff > 0 else str(diff)
                lines.append(self._pad_line(f"    {name[:12]:<12}: ${h['final_stacks'][i]:,} ({diff_str})"))
        lines.append("╚" + "═" * 58 + "╝")
        lines.append("")

        with open(self.session_file, "a", encoding="utf-8") as f:
            f.write("\n".join(lines) + "\n")

    def log_session_start(self, num_players, starting_stack, blinds, num_hands):
        lines = []
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        lines.append("┌" + "─" * 58 + "┐")
        lines.append("│" + " " * 20 + "🃏 POKER SESSION 🃏" + " " * 19 + "│")
        lines.append("├" + "─" * 58 + "┤")
        lines.append(f"│  Started: {timestamp}" + " " * 27 + "│")
        lines.append(f"│  Players: {num_players}" + " " * (47 - len(str(num_players))) + "│")
        lines.append(f"│  Starting Stack: ${starting_stack:,}" + " " * max(0, 40 - len(str(starting_stack))) + "│")
        lines.append(f"│  Blinds: ${blinds[0]}/${blinds[1]}" + " " * max(0, 45 - len(str(blinds[0])) - len(str(blinds[1]))) + "│")
        lines.append(f"│  Planned Hands: {num_hands}" + " " * max(0, 41 - len(str(num_hands))) + "│")
        lines.append(f"│  Sample Rate: every {self.sample_rate} hands" + " " * max(0, 36 - len(str(self.sample_rate))) + "│")
        lines.append("└" + "─" * 58 + "┘")
        with open(self.session_file, "w", encoding="utf-8") as f:
            f.write("\n".join(lines) + "\n")

    def log_session_end(self, hands_played, final_stacks, player_names, starting_stack):
        lines = [""]
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        lines.append("┌" + "─" * 58 + "┐")
        lines.append("│" + " " * 18 + "📊 SESSION SUMMARY 📊" + " " * 18 + "│")
        lines.append("├" + "─" * 58 + "┤")
        lines.append(f"│  Ended: {timestamp}" + " " * 29 + "│")
        lines.append(f"│  Hands Played: {hands_played}" + " " * max(0, 42 - len(str(hands_played))) + "│")
        lines.append("├" + "─" * 58 + "┤")
        lines.append("│  Final Results:" + " " * 42 + "│")
        for i, name in enumerate(player_names):
            diff = final_stacks[i] - starting_stack
            diff_str = f"+{diff}" if diff > 0 else str(diff)
            emoji = "🏆" if diff > 0 else "📉" if diff < 0 else "➖"
            line = f"│    {emoji} {name[:12]:<12}: ${final_stacks[i]:,} ({diff_str})"
            lines.append(line + " " * max(0, 58 - len(line) + 1) + "│")
        lines.append("└" + "─" * 58 + "┘")
        with open(self.session_file, "a", encoding="utf-8") as f:
            f.write("\n".join(lines) + "\n")
        print(f"\n📝 Hand log saved to: {self.session_file}")


# ============= Eval Game with PromptBuilder and Hand Logging =============

class EvalPokerGame:
    def __init__(self, players, starting_stack=10000, small_blind=50, big_blind=100,
                 metrics=None, observability=None, verbose=False, progress_callback=None,
                 use_pokergpt_prompt=True, reset_stacks_each_hand=True, log_sample_rate=100):
        self.players = players
        self.num_players = len(players)
        self.starting_stack = starting_stack
        self.small_blind = small_blind
        self.big_blind = big_blind
        self.stacks = [starting_stack] * self.num_players
        self.button = 0
        self.hand_num = 0
        self.metrics = metrics or MetricsCollector()
        self.observability = observability
        self.verbose = verbose
        self.progress_callback = progress_callback
        self.use_pokergpt_prompt = use_pokergpt_prompt
        self.reset_stacks_each_hand = reset_stacks_each_hand
        self.cumulative_deltas = [0] * self.num_players  # Track total winnings across all hands

        # PromptBuilder for pokergpt-style prompts
        self.prompt_builder = PromptBuilder(big_blind=big_blind)

        # Hand logger (samples every Nth hand)
        self.logger = HandLogger(log_dir=f"{OUTPUT_DIR}/logs", sample_rate=log_sample_rate)

    def play_session(self, num_hands: int) -> MetricsCollector:
        # Log session start
        self.logger.log_session_start(
            num_players=self.num_players,
            starting_stack=self.starting_stack,
            blinds=(self.small_blind, self.big_blind),
            num_hands=num_hands,
        )

        for hand_idx in range(num_hands):
            self._play_hand()
            if self.progress_callback:
                self.progress_callback(hand_idx + 1, num_hands)
            # No early termination - always play all hands for BB/100 calculation
        self.metrics.finalize_session(
            {p.name: p.get_stats() for p in self.players},
            cumulative_deltas={self.players[i].name: self.cumulative_deltas[i] for i in range(self.num_players)}
        )

        # Log session end
        player_names = [p.name for p in self.players]
        # For reset_stacks mode, final_stacks is starting_stack but cumulative_deltas has real P/L
        final_stacks_for_log = [self.starting_stack + self.cumulative_deltas[i] for i in range(self.num_players)]
        self.logger.log_session_end(
            hands_played=self.hand_num,
            final_stacks=final_stacks_for_log,
            player_names=player_names,
            starting_stack=self.starting_stack,
        )

        return self.metrics

    def _play_hand(self):
        self.hand_num += 1
        self.button = (self.button + 1) % self.num_players
        for p in self.players:
            p.set_hand_context(self.hand_num, "preflop")

        sb_pos = (self.button + 1) % self.num_players
        bb_pos = (self.button + 2) % self.num_players
        if self.stacks[sb_pos] <= 0 or self.stacks[bb_pos] <= 0:
            # Reset stacks if someone is bust (shouldn't happen with reset mode)
            self.stacks = [self.starting_stack] * self.num_players

        starting_stacks = self.stacks.copy()

        # Reset prompt builder for new hand
        self.prompt_builder.reset_hand()

        try:
            state = NoLimitTexasHoldem.create_state(
                automations=(Automation.ANTE_POSTING, Automation.BET_COLLECTION, Automation.BLIND_OR_STRADDLE_POSTING,
                             Automation.CARD_BURNING, Automation.HOLE_DEALING, Automation.HOLE_CARDS_SHOWING_OR_MUCKING,
                             Automation.HAND_KILLING, Automation.CHIPS_PUSHING, Automation.CHIPS_PULLING),
                ante_trimming_status=True, raw_antes={-1: 0},
                raw_blinds_or_straddles=(self.small_blind, self.big_blind),
                min_bet=self.big_blind, raw_starting_stacks=self.stacks.copy(), player_count=self.num_players,
            )
        except Exception as e:
            if self.verbose:
                print(f"Error: {e}")
            return

        hole_cards = [(str(state.hole_cards[i][0]), str(state.hole_cards[i][1]))
                      if state.hole_cards[i] and len(state.hole_cards[i]) >= 2 else ("??", "??")
                      for i in range(self.num_players)]
        dealable = list(state.get_dealable_cards())
        random.shuffle(dealable)
        deck = dealable
        board = []

        # Get positions for this hand
        positions = [get_position_name(i, self.num_players, self.button) for i in range(self.num_players)]

        # Record initial deals in prompt builder
        for i in range(self.num_players):
            is_sb = i == sb_pos
            is_bb = i == bb_pos
            blind_note = ""
            if is_sb:
                blind_note = f"Small Blind {self.small_blind / self.big_blind:.1f} BB"
            elif is_bb:
                blind_note = f"Big Blind {self.big_blind / self.big_blind:.1f} BB"
            # For now, record deals without hero designation (hero changes per action)
            self.prompt_builder.action_history.append(
                f"{positions[i]} was dealt hole cards" + (f" ({blind_note})" if blind_note else "") + "."
            )

        # Log hand start
        player_names = [p.name for p in self.players]
        self.logger.start_hand(
            hand_num=self.hand_num,
            player_names=player_names,
            stacks=self.stacks,
            hole_cards=hole_cards,
            button_pos=self.button,
            sb_pos=sb_pos,
            bb_pos=bb_pos,
            blinds=(self.small_blind, self.big_blind),
        )

        for street_idx, street in enumerate(["preflop", "flop", "turn", "river"]):
            if state.status is False:
                break
            for p in self.players:
                p.set_hand_context(self.hand_num, street)

            # End previous street in logger
            if street_idx > 0:
                self.logger.end_street()

            if street == "flop":
                board = [deck.pop(), deck.pop(), deck.pop()]
                for c in board:
                    try: state.deal_board(c)
                    except: pass
                self.prompt_builder.record_board([str(c) for c in board])
            elif street in ("turn", "river"):
                board.append(deck.pop())
                try: state.deal_board(board[-1])
                except: pass
                self.prompt_builder.record_board([str(c) for c in board])

            # Log street start
            self.logger.start_street(street, board)

            board_strs = [str(c) for c in board]
            while state.actor_index is not None:
                actor = state.actor_index
                player = self.players[actor]
                pot = state.total_pot_amount if hasattr(state, 'total_pot_amount') else 0
                current_bet = max(state.bets) if state.bets else 0
                player_bet = state.bets[actor] if state.bets else 0
                to_call = current_bet - player_bet
                stack = state.stacks[actor]
                position = positions[actor]

                # Build pokergpt-style prompt
                if self.use_pokergpt_prompt and hasattr(player, 'get_action_with_prompt'):
                    # Get min raise (current bet + big blind as simplified rule)
                    min_raise = current_bet + self.big_blind if current_bet > 0 else self.big_blind

                    prompt_text = self.prompt_builder.build_prompt(
                        hero_idx=actor,
                        hero_cards=hole_cards[actor],
                        board=board_strs,
                        stacks=list(state.stacks),
                        bets=list(state.bets) if state.bets else [0] * self.num_players,
                        pot=pot,
                        to_call=to_call,
                        min_raise=min_raise,
                        button_idx=self.button,
                        num_players=self.num_players,
                        street=street,
                    )
                    action = player.get_action_with_prompt(
                        prompt_text, hole_cards[actor], board_strs, pot, to_call, stack, position
                    )
                else:
                    action = player.get_action(hole_cards[actor], board_strs, pot, to_call, stack, position, self.num_players)

                if self.verbose:
                    print(f"  H{self.hand_num} {street} {player.name}: {action}")

                executed, fallback = self._execute_action(state, action)

                # Log action
                self.logger.log_action(actor, player.name, str(action))

                # Record action in prompt builder for subsequent players
                player_label = positions[actor]
                if executed == "fold":
                    self.prompt_builder.record_action(player_label, "folded")
                elif executed == "check":
                    self.prompt_builder.record_action(player_label, "checked")
                elif executed == "call":
                    self.prompt_builder.record_action(player_label, "called", to_call / self.big_blind)
                elif executed == "raise":
                    raise_amount = action.amount if action.amount else stack
                    self.prompt_builder.record_action(player_label, "bet/raised to", raise_amount / self.big_blind)
                elif executed == "all_in":
                    self.prompt_builder.record_action(player_label, "went all-in", stack / self.big_blind)

                # Record observability
                if self.observability:
                    record = player.get_last_record()
                    if record:
                        self.observability.record_action(player.name, record, executed, fallback)

        if hasattr(state, 'stacks'):
            for i in range(self.num_players):
                self.stacks[i] = state.stacks[i]

        chip_deltas = [self.stacks[i] - starting_stacks[i] for i in range(self.num_players)]
        winners = [i for i, d in enumerate(chip_deltas) if d > 0]
        max_gain = max(chip_deltas) if chip_deltas else 0
        winner_names = [self.players[i].name for i in winners]

        # Accumulate deltas for BB/100 calculation
        for i in range(self.num_players):
            self.cumulative_deltas[i] += chip_deltas[i]

        # Log hand end
        self.logger.end_street()
        self.logger.end_hand(final_stacks=self.stacks, winners=winners, chips_won=max_gain)

        self.metrics.log_hand(HandResult(
            hand_id=self.hand_num, player_names=[p.name for p in self.players],
            starting_stacks=starting_stacks, ending_stacks=self.stacks.copy(),
            chip_deltas=chip_deltas, hole_cards={p.name: hole_cards[i] for i, p in enumerate(self.players)},
            board=[str(c) for c in board], winner_names=winner_names, pot_size=sum(abs(d) for d in chip_deltas if d < 0),
        ))

        # Reset stacks for next hand (no elimination)
        if self.reset_stacks_each_hand:
            self.stacks = [self.starting_stack] * self.num_players

    def _execute_action(self, state, action: ParsedAction) -> Tuple[str, bool]:
        """Execute action, return (executed_action_name, used_fallback)"""
        try:
            if action.action_type == "fold":
                state.fold()
                return "fold", False
            elif action.action_type in ("check", "call"):
                state.check_or_call()
                return action.action_type, False
            elif action.action_type in ("raise", "bet"):
                state.complete_bet_or_raise_to(action.amount)
                return "raise", False
            elif action.action_type == "all_in":
                actor = state.actor_index
                state.complete_bet_or_raise_to(state.stacks[actor] + state.bets[actor])
                return "all_in", False
        except:
            pass

        # Fallback
        try:
            state.check_or_call()
            return "call", True
        except:
            try:
                state.fold()
                return "fold", True
            except:
                return "error", True


print("EvalPokerGame loaded with BB/100 tracking (no elimination) and hand logging!")

In [None]:
# ============= Unit Tests: Game Mechanics =============
print("Testing EvalPokerGame with mock players...")

class MockPlayer:
    """Simple mock player for testing game mechanics."""
    def __init__(self, name, action_sequence):
        self.name = name
        self.actions = action_sequence
        self.idx = 0
        self.action_history = []

    def set_hand_context(self, hand_id, street):
        pass

    def get_action(self, *args, **kwargs):
        action = self.actions[self.idx % len(self.actions)]
        self.idx += 1
        return action

    def get_action_with_prompt(self, *args, **kwargs):
        return self.get_action(*args, **kwargs)

    def get_stats(self):
        return {"total_actions": self.idx}

    def get_last_record(self):
        return None

# Two players: one always folds preflop, one always calls
# PokerKit heads-up: idx 0 = BB, idx 1 = SB (acts first preflop)
# Folder at idx 1 (SB) folds first, Caller at idx 0 (BB) wins
folder = MockPlayer("Folder", [ParsedAction("fold")])
caller = MockPlayer("Caller", [ParsedAction("call")])

game = EvalPokerGame(
    players=[caller, folder],  # BB, SB order
    starting_stack=1000,
    small_blind=5,
    big_blind=10,
    reset_stacks_each_hand=True,
    log_sample_rate=10000,  # Effectively disable logging for test
)

result = game.play_session(5)

# Verify game completed
assert result.total_hands == 5, f"Expected 5 hands, got {result.total_hands}"
assert "Folder" in result.player_summaries, "Missing Folder in summaries"
assert "Caller" in result.player_summaries, "Missing Caller in summaries"

# Folder should lose (folding to BB wins for Caller)
assert result.player_summaries["Caller"]["total_chip_delta"] > 0, "Caller should be winning"
assert result.player_summaries["Folder"]["total_chip_delta"] < 0, "Folder should be losing"

print(f"  Folder: {result.player_summaries['Folder']['total_chip_delta']:+} chips")
print(f"  Caller: {result.player_summaries['Caller']['total_chip_delta']:+} chips")
print("✓ EvalPokerGame mock test passed!")


In [None]:
# ============= Unit Tests: HandLogger =============
print("Testing HandLogger...")

import tempfile

with tempfile.TemporaryDirectory() as tmpdir:
    logger = HandLogger(log_dir=tmpdir, sample_rate=5)

    # Test sampling logic - should NOT log hands 1,2,3,4
    assert not logger.should_log(1), "Should not log hand 1"
    assert not logger.should_log(2), "Should not log hand 2"
    assert not logger.should_log(4), "Should not log hand 4"

    # Should log hands 5, 10, 15...
    assert logger.should_log(5), "Should log hand 5"
    assert logger.should_log(10), "Should log hand 10"
    assert logger.should_log(100), "Should log hand 100"

    # Test session file creation
    assert logger.session_file is not None, "Session file not created"
    assert tmpdir in logger.session_file, "Session file in wrong directory"

    # Test hand logging workflow
    logger.log_session_start(2, 10000, (50, 100), 100)
    logger.start_hand(5, ["P1", "P2"], [10000, 10000], [("As", "Ks"), ("Jd", "Td")], 0, 0, 1, (50, 100))
    logger.start_street("preflop", [])
    logger.log_action(0, "P1", "raises to 300")
    logger.log_action(1, "P2", "calls 300")
    logger.end_street()
    logger.end_hand([10200, 9800], [0], 200)

    # Verify log file was written
    with open(logger.session_file, "r") as f:
        content = f.read()
        assert "HAND #" in content, "Missing hand header"
        assert "P1" in content, "Missing player name"
        assert "raises" in content or "300" in content, "Missing action"

print("✓ HandLogger tests passed!")

In [None]:
# ============= Unit Tests: Metrics Calculation =============
print("Testing MetricsCollector BB/100 calculation...")

mc = MetricsCollector("test_session")

# Log 10 hands where Winner wins 100 chips each hand
for i in range(10):
    mc.log_hand(HandResult(
        hand_id=i,
        player_names=["Winner", "Loser"],
        starting_stacks=[1000, 1000],
        ending_stacks=[1100, 900],
        chip_deltas=[100, -100],
        hole_cards={"Winner": ("As", "Ks"), "Loser": ("2c", "7d")},
        board=["Ah", "Kd", "7c", "2s", "9h"],
        winner_names=["Winner"],
        pot_size=200,
    ))

# Finalize with cumulative deltas (simulating reset-stack mode)
mc.finalize_session(
    {"Winner": {}, "Loser": {}},
    cumulative_deltas={"Winner": 1000, "Loser": -1000}  # 10 hands * 100 chips
)

# BB/100 = (total_delta / hands * 100) / BB
# = (1000 / 10 * 100) / 100 = 100 BB/100
expected_bb100 = 100.0
actual_bb100 = mc.player_summaries["Winner"]["bb_per_100"]
assert abs(actual_bb100 - expected_bb100) < 0.01, f"Expected BB/100={expected_bb100}, got {actual_bb100}"

assert mc.player_summaries["Loser"]["bb_per_100"] == -100.0, "Loser BB/100 should be -100"
assert mc.player_summaries["Winner"]["win_rate"] == 1.0, "Winner should have 100% win rate"
assert mc.player_summaries["Loser"]["win_rate"] == 0.0, "Loser should have 0% win rate"
assert mc.player_summaries["Winner"]["hands_won"] == 10, "Winner should have won 10 hands"

print(f"  Winner: BB/100 = {actual_bb100:+.1f}, Win Rate = {mc.player_summaries['Winner']['win_rate']*100:.0f}%")
print(f"  Loser:  BB/100 = {mc.player_summaries['Loser']['bb_per_100']:+.1f}")
print("✓ MetricsCollector tests passed!")

## 5. Model Loading

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

def load_transformers_model(name: str, model_id: str):
    """Load a HuggingFace model at full weight (FP16)."""
    print(f"Loading {name}: {model_id} (FP16 - full weight)...")

    tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="auto",
        trust_remote_code=True,
        torch_dtype=torch.float16,
    )

    allocated = torch.cuda.memory_allocated() / 1024**3
    print(f"  Loaded. VRAM: {allocated:.1f}GB")

    return TransformersPlayer(name, model, tokenizer)


def unload_model(player):
    """Unload model to free VRAM."""
    if hasattr(player, 'model'):
        del player.model
        del player.tokenizer
    del player
    gc.collect()
    torch.cuda.empty_cache()
    print(f"  Model unloaded. VRAM: {torch.cuda.memory_allocated() / 1024**3:.1f}GB")


print("Model loading functions ready!")

## 6. Run Tournament

In [None]:
from tqdm.notebook import tqdm

random.seed(SEED)

# Tournament state
matchup_results = []
champion = None
observability = ObservabilityCollector(OUTPUT_DIR)


def run_matchup(p1_name: str, p2_name: str, round_name: str) -> Tuple[str, dict]:
    """Run a single matchup. Returns (winner_name, result_dict)."""
    print(f"\n{'='*60}")
    print(f"{round_name}: {p1_name} vs {p2_name}")
    print(f"{'='*60}")

    # Load players
    if MODELS[p1_name]["type"] == "openai":
        p1 = OpenAIPlayer(p1_name, model=MODELS[p1_name].get("model", "gpt-4"))
    else:
        p1 = load_transformers_model(p1_name, MODELS[p1_name]["model_id"])

    if MODELS[p2_name]["type"] == "openai":
        p2 = OpenAIPlayer(p2_name, model=MODELS[p2_name].get("model", "gpt-4"))
    else:
        p2 = load_transformers_model(p2_name, MODELS[p2_name]["model_id"])

    # Create game
    metrics = MetricsCollector(f"{round_name}_{p1_name}_vs_{p2_name}")
    pbar = tqdm(total=HANDS_PER_MATCHUP, desc=f"{p1_name} vs {p2_name}")

    def update_progress(current, total):
        pbar.n = current
        pbar.refresh()

    game = EvalPokerGame(
        players=[p1, p2],
        starting_stack=STARTING_STACK,
        small_blind=SMALL_BLIND,
        big_blind=BIG_BLIND,
        metrics=metrics,
        observability=observability,
        verbose=VERBOSE,
        progress_callback=update_progress,
    )

    # Run matchup
    result = game.play_session(HANDS_PER_MATCHUP)
    pbar.close()

    # Write traces
    observability.write_traces(f"{round_name.replace(' ', '_')}")

    # Determine winner
    p1_delta = result.player_summaries[p1_name]["total_chip_delta"]
    p2_delta = result.player_summaries[p2_name]["total_chip_delta"]

    if p1_delta > p2_delta:
        winner = p1_name
    elif p2_delta > p1_delta:
        winner = p2_name
    else:
        # Tiebreaker: hands won
        p1_wins = result.player_summaries[p1_name]["hands_won"]
        p2_wins = result.player_summaries[p2_name]["hands_won"]
        winner = p1_name if p1_wins >= p2_wins else p2_name

    # Print result
    print(f"\n{round_name} Result:")
    print(f"  {p1_name}: {p1_delta:+} chips (BB/100: {result.player_summaries[p1_name]['bb_per_100']:+.2f})")
    print(f"  {p2_name}: {p2_delta:+} chips (BB/100: {result.player_summaries[p2_name]['bb_per_100']:+.2f})")
    print(f"  WINNER: {winner}")

    result_dict = {
        "round": round_name,
        "player1": p1_name,
        "player2": p2_name,
        "player1_chips": p1_delta,
        "player2_chips": p2_delta,
        "player1_bb100": result.player_summaries[p1_name]["bb_per_100"],
        "player2_bb100": result.player_summaries[p2_name]["bb_per_100"],
        "winner": winner,
        "hands_played": result.total_hands,
    }

    # Unload models to free VRAM
    if MODELS[p1_name]["type"] == "transformers":
        unload_model(p1)
    if MODELS[p2_name]["type"] == "transformers":
        unload_model(p2)

    return winner, result_dict


print("Tournament ready to start!")

In [None]:
# ============= Pre-flight Checks =============
print("Pre-flight checks...")

# Check model configs exist
required_models = ["Qwen3-SFT", "Qwen3-GRPO", "Llama3-SFT"]
for name in required_models:
    assert name in MODELS, f"Missing model config: {name}"
    assert "model_id" in MODELS[name] or "model" in MODELS[name], f"Missing model_id/model for {name}"
    assert "type" in MODELS[name], f"Missing type for {name}"
print(f"  ✓ Model configs validated: {', '.join(required_models)}")

# Check gauntlet structure
assert len(GAUNTLET) >= 2, f"Need at least 2 rounds in gauntlet, got {len(GAUNTLET)}"
print(f"  ✓ Gauntlet has {len(GAUNTLET)} rounds")

# Check output directory
assert os.path.exists(OUTPUT_DIR), f"Output dir missing: {OUTPUT_DIR}"
print(f"  ✓ Output directory exists: {OUTPUT_DIR}")

# Check blinds make sense
assert SMALL_BLIND < BIG_BLIND, f"Small blind ({SMALL_BLIND}) must be less than big blind ({BIG_BLIND})"
assert STARTING_STACK >= BIG_BLIND * 10, f"Starting stack ({STARTING_STACK}) too small (need at least {BIG_BLIND * 10})"
print(f"  ✓ Blinds validated: ${SMALL_BLIND}/${BIG_BLIND}, stack: ${STARTING_STACK}")

# Check hands per matchup
assert HANDS_PER_MATCHUP > 0, "HANDS_PER_MATCHUP must be positive"
print(f"  ✓ Hands per matchup: {HANDS_PER_MATCHUP}")

print(f"\n✓ Pre-flight checks passed! Ready to run tournament.")

In [None]:
# Run the gauntlet tournament
print("\n" + "="*60)
print("POKER LLM TOURNAMENT")
print("="*60)
print(f"Format: Gauntlet ({HANDS_PER_MATCHUP} hands per matchup)")
print(f"Winner: BB/100 (total chip profit)\n")

# Round 1: Qwen3-SFT vs Qwen3-GRPO
r1_winner, r1_result = run_matchup("Qwen3-SFT", "Qwen3-GRPO", "Round 1")
matchup_results.append(r1_result)

# Round 2: Winner R1 vs Llama3-SFT
r2_winner, r2_result = run_matchup(r1_winner, "Llama3-SFT", "Round 2")
matchup_results.append(r2_result)

# Round 3: Only if your model beat Llama3
if r2_winner != "Llama3-SFT" and os.environ.get("OPENAI_API_KEY"):
    print(f"\n{r2_winner} beat Llama3-SFT! Proceeding to GPT-4 matchup...")
    r3_winner, r3_result = run_matchup(r2_winner, "GPT-4", "Round 3")
    matchup_results.append(r3_result)
    champion = r3_winner
elif r2_winner == "Llama3-SFT":
    print(f"\nLlama3-SFT won Round 2. Skipping GPT-4 matchup (cost savings).")
    champion = "Llama3-SFT"
else:
    print(f"\nNo OpenAI API key. Skipping GPT-4 matchup.")
    champion = r2_winner

print("\n" + "="*60)
print(f"TOURNAMENT CHAMPION: {champion}")
print("="*60)

## 7. Observability Dashboard

In [None]:
import pandas as pd

# Export metrics
observability.export_metrics()

# Error Rate Summary Table
print("\n" + "="*70)
print("OBSERVABILITY: ERROR RATES")
print("="*70)

error_rows = []
for name, m in observability.metrics.items():
    error_rows.append({
        "Model": name,
        "Actions": m.total_actions,
        "Valid Parse": m.valid_tag_parses,
        "Regex Fallback": m.regex_fallback_parses,
        "Default Fallback": m.default_fallback_parses,
        "Error Rate": f"{m.parse_error_rate:.1%}",
        "Exec Failures": m.action_execution_failures,
    })

df_errors = pd.DataFrame(error_rows)
print(df_errors.to_string(index=False))

# Save to CSV
df_errors.to_csv(f"{OUTPUT_DIR}/observability/error_summary.csv", index=False)

In [None]:
import matplotlib.pyplot as plt

# Action Distribution Chart
fig, axes = plt.subplots(1, 3, figsize=(14, 4))

# 1. Parse Error Rate
ax = axes[0]
models = list(observability.metrics.keys())
error_rates = [m.parse_error_rate * 100 for m in observability.metrics.values()]
colors = ["green" if r < 5 else "orange" if r < 15 else "red" for r in error_rates]
ax.bar(models, error_rates, color=colors)
ax.set_title("Parse Error Rate", fontsize=14)
ax.set_ylabel("Error Rate (%)")
ax.set_ylim(0, max(error_rates) * 1.2 if error_rates else 10)

# 2. Action Distribution
ax = axes[1]
action_data = {}
for name, m in observability.metrics.items():
    total = m.total_actions or 1
    action_data[name] = {
        "Fold": m.fold_count / total * 100,
        "Check": m.check_count / total * 100,
        "Call": m.call_count / total * 100,
        "Raise": m.raise_count / total * 100,
        "All-in": m.all_in_count / total * 100,
    }

df_actions = pd.DataFrame(action_data).T
df_actions.plot(kind="bar", stacked=True, ax=ax, colormap="Set3")
ax.set_title("Action Distribution", fontsize=14)
ax.set_ylabel("%")
ax.legend(loc="upper right", fontsize=8)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")

# 3. Latency
ax = axes[2]
latency_data = {name: m.latencies for name, m in observability.metrics.items()}
ax.boxplot(latency_data.values(), labels=latency_data.keys())
ax.set_title("Latency Distribution", fontsize=14)
ax.set_ylabel("Latency (ms)")
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")

plt.tight_layout()
plt.savefig(f"{OUTPUT_DIR}/charts/observability.png", dpi=150, bbox_inches="tight")
plt.show()

## 8. Tournament Results

In [None]:
# Matchup Results Table
print("\n" + "="*70)
print("TOURNAMENT RESULTS")
print("="*70)

df_matchups = pd.DataFrame(matchup_results)
print(df_matchups[["round", "player1", "player2", "player1_chips", "player2_chips", "winner"]].to_string(index=False))

# Save
df_matchups.to_csv(f"{OUTPUT_DIR}/matchups.csv", index=False)

In [None]:
# Visualization
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# Chip progression per matchup
ax = axes[0]
for i, r in enumerate(matchup_results):
    x = [i, i]
    y = [r["player1_chips"], r["player2_chips"]]
    colors = ["green" if c > 0 else "red" for c in y]
    ax.bar([f"{r['player1']}\n({r['round']})", f"{r['player2']}\n({r['round']})"],
           [r["player1_chips"], r["player2_chips"]], color=colors, alpha=0.7)

ax.axhline(y=0, color="black", linestyle="-", linewidth=0.5)
ax.set_title("Chip Results by Matchup", fontsize=14)
ax.set_ylabel("Chip Delta")
plt.setp(ax.get_xticklabels(), rotation=45, ha="right", fontsize=9)

# BB/100 comparison
ax = axes[1]
all_players = set()
player_bb100 = {}
for r in matchup_results:
    if r["player1"] not in player_bb100:
        player_bb100[r["player1"]] = []
    if r["player2"] not in player_bb100:
        player_bb100[r["player2"]] = []
    player_bb100[r["player1"]].append(r["player1_bb100"])
    player_bb100[r["player2"]].append(r["player2_bb100"])

avg_bb100 = {p: sum(v)/len(v) for p, v in player_bb100.items()}
colors = ["green" if v > 0 else "red" for v in avg_bb100.values()]
ax.bar(avg_bb100.keys(), avg_bb100.values(), color=colors)
ax.axhline(y=0, color="black", linestyle="-", linewidth=0.5)
ax.set_title("Average BB/100 by Model", fontsize=14)
ax.set_ylabel("BB/100")
plt.setp(ax.get_xticklabels(), rotation=45, ha="right")

plt.tight_layout()
plt.savefig(f"{OUTPUT_DIR}/charts/tournament_results.png", dpi=150, bbox_inches="tight")
plt.show()

## 9. Blog-Ready Summary

In [None]:
# Generate BLOG_SUMMARY.md
blog_md = f"""# Poker LLM Tournament Results

## Champion: {champion}

### Tournament Bracket
| Round | Matchup | Winner | Chip Differential |
|-------|---------|--------|-------------------|
"""

for r in matchup_results:
    winner_chips = r["player1_chips"] if r["winner"] == r["player1"] else r["player2_chips"]
    blog_md += f"| {r['round']} | {r['player1']} vs {r['player2']} | {r['winner']} | {winner_chips:+} |\n"

blog_md += f"""
### Key Statistics
- **Total hands played**: {sum(r['hands_played'] for r in matchup_results)}
- **Matchups completed**: {len(matchup_results)}
"""

# Add model comparison
blog_md += "\n### Model Performance\n"
blog_md += "| Model | Avg BB/100 | Error Rate |\n"
blog_md += "|-------|------------|------------|\n"
for name, bb in avg_bb100.items():
    err = observability.metrics.get(name)
    err_rate = f"{err.parse_error_rate:.1%}" if err else "N/A"
    blog_md += f"| {name} | {bb:+.2f} | {err_rate} |\n"

blog_md += f"""
### Notable Findings
- Champion **{champion}** emerged victorious after {len(matchup_results)} rounds
"""

if "Llama3-SFT" in [r["winner"] for r in matchup_results if r["round"] == "Round 2"]:
    blog_md += "- The PokerBench paper model (Llama3-SFT) proved superior to custom fine-tunes\n"
else:
    blog_md += f"- Custom fine-tuned model beat the PokerBench benchmark (Llama3-SFT)\n"

# Write file
with open(f"{OUTPUT_DIR}/BLOG_SUMMARY.md", "w") as f:
    f.write(blog_md)

print(blog_md)
print(f"\nSaved to: {OUTPUT_DIR}/BLOG_SUMMARY.md")

In [None]:
# Quotable Stats (for easy copy-paste)
print("\n" + "="*60)
print("QUOTABLE STATS (copy-paste ready)")
print("="*60)

print(f"Champion: {champion}")

if len(matchup_results) > 0:
    final = matchup_results[-1]
    margin = abs(final["player1_chips"] - final["player2_chips"])
    print(f"Final margin: {margin:,} chips ({margin // BIG_BLIND} BB)")

# Best local model
local_models = ["Qwen3-SFT", "Qwen3-GRPO"]
local_bb = {m: avg_bb100.get(m, 0) for m in local_models if m in avg_bb100}
if local_bb:
    best_local = max(local_bb, key=local_bb.get)
    print(f"Best local model: {best_local} (BB/100: {local_bb[best_local]:+.2f})")

# Error rates
print("\nError rates:")
for name, m in sorted(observability.metrics.items(), key=lambda x: x[1].parse_error_rate):
    print(f"  {name}: {m.parse_error_rate:.1%}")

## 10. Export to Google Drive

In [None]:
import shutil

# Save tournament.json
tournament_data = {
    "champion": champion,
    "config": {
        "hands_per_matchup": HANDS_PER_MATCHUP,
        "starting_stack": STARTING_STACK,
        "blinds": f"{SMALL_BLIND}/{BIG_BLIND}",
        "gpu": GPU_NAME,
    },
    "matchups": matchup_results,
}

with open(f"{OUTPUT_DIR}/tournament.json", "w") as f:
    json.dump(tournament_data, f, indent=2)

# Copy to Drive
drive_path = "/content/drive/MyDrive/poker_tournament_results"
shutil.copytree(OUTPUT_DIR, drive_path, dirs_exist_ok=True)

print(f"Results exported to Google Drive: {drive_path}/")
print(f"\nFiles:")
for f in os.listdir(OUTPUT_DIR):
    print(f"  - {f}")