# Poker LLM Tournament

Gauntlet-style tournament comparing poker LLMs.

**Quick Start:** Run all cells. Results saved to `/content/tournament_results/`

## 1. Setup

In [None]:
from google.colab import drive
drive.mount('/content/drive')

!pip install -q transformers accelerate bitsandbytes torch pokerkit tqdm pandas matplotlib openai
!git clone https://github.com/yilenpan/player_poker_bot.git /content/player_poker_bot 2>/dev/null || true
!pip install -q -e /content/player_poker_bot

In [None]:
# Set OpenAI API key from Colab secrets
from google.colab import userdata
import os

try:
    os.environ["OPENAI_API_KEY"] = userdata.get("OPENAI_API_KEY")
    print("OpenAI API key loaded")
except:
    print("No OpenAI API key found - GPT-4 matchups will be skipped")

In [None]:
import os
import gc
import random
import json
from typing import Tuple

import torch
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer

from src.eval import (
    HardwareConfig,
    TransformersPlayer,
    OpenAIPlayer,
    MetricsCollector,
    EvalPokerGame,
    ObservabilityCollector,
)

print("Imports loaded!")

## 2. Configuration

In [None]:
# Hardware detection
hw = HardwareConfig.detect()
print(f"GPU: {hw.gpu_name} ({hw.vram_gb:.0f}GB), Quantization: {hw.quantization.value}")

# Tournament settings
DRY_RUN = True  # False for full 1000-hand matchups
HANDS_PER_MATCHUP = 10 if DRY_RUN else 1000

STARTING_STACK = 10000
SMALL_BLIND = 50
BIG_BLIND = 100
SEED = 42

# Models
MODELS = {
    "Qwen3-Base": {"type": "transformers", "model_id": "unsloth/Qwen3-4B-Thinking-2507"},
    "Qwen3-SFT": {"type": "transformers", "model_id": "YiPz/qwen3-4b-pokergpt-o3-sft-lora"},
    "Llama3-SFT": {"type": "transformers", "model_id": "YiPz/llama3-8b-pokerbench-sft"},
    "GPT-4": {"type": "openai", "model": "gpt-4"},
}

# Gauntlet bracket
GAUNTLET = [
    ("Qwen3-Base", "Qwen3-SFT"),   # R1: Your models
    ("WINNER_R1", "Llama3-SFT"),    # R2: vs Benchmark
    ("WINNER_R2", "GPT-4"),         # R3: vs GPT-4
]

OUTPUT_DIR = "/content/tournament_results"
os.makedirs(f"{OUTPUT_DIR}/charts", exist_ok=True)

print(f"Mode: {'DRY RUN' if DRY_RUN else 'FULL'}, Hands: {HANDS_PER_MATCHUP}")

## 3. Model Loading Helpers

In [None]:
loaded_cache = {}

def load_transformers_model(name: str, model_id: str) -> TransformersPlayer:
    """Load a transformers model, caching tokenizer."""
    if name in loaded_cache:
        model, tokenizer = loaded_cache[name]
    else:
        print(f"  Loading {name}: {model_id}...")
        tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
        
        load_kwargs = {"device_map": "auto", "trust_remote_code": True, "torch_dtype": torch.float16}
        bnb_config = hw.get_bnb_config()
        if bnb_config:
            load_kwargs["quantization_config"] = bnb_config
        
        model = AutoModelForCausalLM.from_pretrained(model_id, **load_kwargs)
        loaded_cache[name] = (model, tokenizer)
        print(f"  VRAM: {torch.cuda.memory_allocated() / 1024**3:.1f}GB")
    
    return TransformersPlayer(name, loaded_cache[name][0], loaded_cache[name][1])


def unload_model(name: str):
    """Unload a model to free VRAM."""
    if name in loaded_cache:
        del loaded_cache[name]
        gc.collect()
        torch.cuda.empty_cache()
        print(f"  Unloaded {name}, VRAM: {torch.cuda.memory_allocated() / 1024**3:.1f}GB")


print("Helpers ready!")

## 4. Run Tournament

In [None]:
random.seed(SEED)
matchup_results = []
observability = ObservabilityCollector(OUTPUT_DIR)


def run_matchup(p1_name: str, p2_name: str, round_name: str) -> Tuple[str, dict]:
    """Run a single matchup. Returns (winner_name, result_dict)."""
    print(f"\n{'='*60}")
    print(f"{round_name}: {p1_name} vs {p2_name}")
    print(f"{'='*60}")

    # Load players
    if MODELS[p1_name]["type"] == "openai":
        p1 = OpenAIPlayer(p1_name, model=MODELS[p1_name].get("model", "gpt-4"))
    else:
        p1 = load_transformers_model(p1_name, MODELS[p1_name]["model_id"])

    if MODELS[p2_name]["type"] == "openai":
        p2 = OpenAIPlayer(p2_name, model=MODELS[p2_name].get("model", "gpt-4"))
    else:
        p2 = load_transformers_model(p2_name, MODELS[p2_name]["model_id"])

    # Create game
    metrics = MetricsCollector(f"{round_name}_{p1_name}_vs_{p2_name}")
    pbar = tqdm(total=HANDS_PER_MATCHUP, desc=f"{p1_name} vs {p2_name}")

    game = EvalPokerGame(
        players=[p1, p2],
        starting_stack=STARTING_STACK,
        small_blind=SMALL_BLIND,
        big_blind=BIG_BLIND,
        metrics=metrics,
        observability=observability,
        progress_callback=lambda cur, tot: (setattr(pbar, 'n', cur), pbar.refresh()),
    )

    result = game.play_session(HANDS_PER_MATCHUP)
    pbar.close()

    # Determine winner
    p1_delta = result.player_summaries[p1_name]["total_chip_delta"]
    p2_delta = result.player_summaries[p2_name]["total_chip_delta"]
    winner = p1_name if p1_delta >= p2_delta else p2_name

    print(f"\nResult: {p1_name} {p1_delta:+} | {p2_name} {p2_delta:+} | WINNER: {winner}")

    result_dict = {
        "round": round_name,
        "player1": p1_name, "player2": p2_name,
        "player1_chips": p1_delta, "player2_chips": p2_delta,
        "player1_bb100": result.player_summaries[p1_name]["bb_per_100"],
        "player2_bb100": result.player_summaries[p2_name]["bb_per_100"],
        "winner": winner,
        "hands_played": result.total_hands,
    }

    # Unload losing model
    loser = p2_name if winner == p1_name else p1_name
    if MODELS[loser]["type"] == "transformers":
        unload_model(loser)

    return winner, result_dict


print("Tournament ready!")

In [None]:
# Run gauntlet
print("\n" + "="*60)
print("POKER LLM TOURNAMENT")
print("="*60)

champion = None

for i, (p1_template, p2_name) in enumerate(GAUNTLET):
    round_name = f"Round {i+1}"
    
    # Resolve winner placeholders
    p1_name = champion if p1_template.startswith("WINNER") else p1_template
    
    # Skip if GPT-4 and no API key
    if p2_name == "GPT-4" and not os.environ.get("OPENAI_API_KEY"):
        print(f"\nSkipping {round_name}: No OpenAI API key")
        break
    
    # Skip if SFT lost to Base
    if i == 1 and champion == "Qwen3-Base":
        print(f"\nStopping: Base model beat SFT - training needs work!")
        break
    
    champion, result = run_matchup(p1_name, p2_name, round_name)
    matchup_results.append(result)

print(f"\n{'='*60}")
print(f"CHAMPION: {champion}")
print("="*60)

## 5. Results

In [None]:
# Results table
df = pd.DataFrame(matchup_results)
print(df[["round", "player1", "player2", "player1_bb100", "player2_bb100", "winner"]].to_string(index=False))

# Observability summary
observability.export_metrics()
print(f"\nAction parse errors: {observability.total_errors()}")

In [None]:
# Visualization
if len(matchup_results) > 0:
    fig, ax = plt.subplots(figsize=(10, 5))
    
    for i, r in enumerate(matchup_results):
        ax.barh(i - 0.2, r["player1_bb100"], 0.35, label=r["player1"] if i == 0 else "", color="steelblue")
        ax.barh(i + 0.2, r["player2_bb100"], 0.35, label=r["player2"] if i == 0 else "", color="coral")
        ax.text(r["player1_bb100"], i - 0.2, f" {r['player1']}", va='center', fontsize=9)
        ax.text(r["player2_bb100"], i + 0.2, f" {r['player2']}", va='center', fontsize=9)
    
    ax.axvline(x=0, color="black", linewidth=0.5)
    ax.set_yticks(range(len(matchup_results)))
    ax.set_yticklabels([r["round"] for r in matchup_results])
    ax.set_xlabel("BB/100")
    ax.set_title("Tournament Results")
    
    plt.tight_layout()
    plt.savefig(f"{OUTPUT_DIR}/charts/tournament.png", dpi=150)
    plt.show()

## 6. Export

In [None]:
# Save results
tournament_data = {
    "champion": champion,
    "config": {"hands_per_matchup": HANDS_PER_MATCHUP, "dry_run": DRY_RUN},
    "matchups": matchup_results,
}

with open(f"{OUTPUT_DIR}/tournament.json", "w") as f:
    json.dump(tournament_data, f, indent=2)

df.to_csv(f"{OUTPUT_DIR}/results.csv", index=False)

# Blog summary
blog_md = f"""# Poker LLM Tournament Results

## Champion: {champion}

| Round | Player 1 | Player 2 | Winner |
|-------|----------|----------|--------|
"""
for r in matchup_results:
    blog_md += f"| {r['round']} | {r['player1']} ({r['player1_bb100']:+.1f}) | {r['player2']} ({r['player2_bb100']:+.1f}) | {r['winner']} |\n"

with open(f"{OUTPUT_DIR}/BLOG_SUMMARY.md", "w") as f:
    f.write(blog_md)

print(f"Saved to {OUTPUT_DIR}/: tournament.json, results.csv, BLOG_SUMMARY.md")