# 🃏 Play Against Your Poker Bot

Interactive notebook to play heads-up No-Limit Texas Hold'em against your trained models.

**How to play:**
1. Load a trained model (V14 or V15)
2. Run the game cell
3. Enter your actions when prompted
4. Watch the AI respond!

In [1]:
import sys
import os
# Add project root and src directories to path
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) if '__file__' in dir() else os.getcwd()
while 'src' not in os.listdir(project_root) and project_root != '/':
    project_root = os.path.dirname(project_root)
sys.path.insert(0, project_root)
sys.path.insert(0, os.path.join(project_root, 'src', 'models'))
sys.path.insert(0, os.path.join(project_root, 'src', 'workers'))

import random
from itertools import combinations
import numpy as np
import torch
from pokerkit import Automation, NoLimitTexasHoldem, Card, StandardHighHand, Deck

# Try loading V15 model, fallback to V14
try:
    from poker_model_v15 import DuelingPokerNet, NUM_ACTIONS_V15 as NUM_ACTIONS
    STATE_DIM = 520
    VERSION = 'V15'
except:
    from poker_model_v14 import DuelingPokerNet, NUM_ACTIONS_V14 as NUM_ACTIONS
    STATE_DIM = 385
    VERSION = 'V14'

print(f"Loaded {VERSION} model architecture")
print(f"State dimension: {STATE_DIM}")
print(f"Action space: {NUM_ACTIONS} actions")

Loaded V15 model architecture
State dimension: 520
Action space: 6 actions


In [2]:
# Action constants
ACTION_FOLD = 0
ACTION_CALL = 1
ACTION_RAISE_SMALL = 2
ACTION_RAISE_MEDIUM = 3
ACTION_RAISE_LARGE = 4
ACTION_ALL_IN = 5

ACTION_NAMES = {
    0: 'Fold',
    1: 'Check/Call',
    2: 'Small Raise (2x min)',
    3: 'Medium Raise (0.5 pot)',
    4: 'Large Raise (pot)',
    5: 'All-In'
}

def flatten_cards(items):
    """Flatten nested card lists."""
    out = []
    if isinstance(items, Card): 
        return [items]
    for x in items:
        if isinstance(x, (list, tuple)): 
            out.extend(flatten_cards(x))
        else: 
            out.append(x)
    return out

def format_cards(cards):
    """Format cards for display."""
    suit_symbols = {'s': '♠', 'h': '♥', 'd': '♦', 'c': '♣'}
    flat = flatten_cards(cards)
    return ' '.join(f"{c.rank}{suit_symbols.get(c.suit, c.suit)}" for c in flat)

def monte_carlo_equity(hole_cards, board_cards, iterations=50):
    """Calculate hand equity via Monte Carlo simulation."""
    if not hole_cards:
        return 0.5
    
    hole = flatten_cards(hole_cards)
    board = flatten_cards(board_cards)
    known = set(hole + board)
    deck = [c for c in Deck.STANDARD if c not in known]
    needed = 5 - len(board)
    wins = 0
    
    for _ in range(iterations):
        random.shuffle(deck)
        opp_hole = deck[:2]
        sim_board = board + deck[2:2+needed]
        
        my_hand = max(StandardHighHand(c) for c in combinations(hole + sim_board, 5))
        opp_hand = max(StandardHighHand(c) for c in combinations(opp_hole + sim_board, 5))
        
        if my_hand > opp_hand:
            wins += 1
        elif my_hand == opp_hand:
            wins += 0.5
    
    return wins / iterations

In [3]:
# Load trained model
MODEL_PATH = 'poker_agent_v15.pt'  # Change to your model file

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = DuelingPokerNet(state_dim=STATE_DIM).to(device)

try:
    checkpoint = torch.load(MODEL_PATH, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()
    print(f"✅ Loaded model from {MODEL_PATH}")
except FileNotFoundError:
    print(f"⚠️ Model file {MODEL_PATH} not found. Using untrained model.")
except Exception as e:
    print(f"⚠️ Error loading model: {e}. Using untrained model.")

✅ Loaded model from poker_agent_v15.pt


In [4]:
class PokerGame:
    """Interactive poker game against the AI."""
    
    def __init__(self, model, starting_stack=1000, big_blind=10):
        self.model = model
        self.starting_stack = starting_stack
        self.big_blind = big_blind
        self.small_blind = big_blind // 2
        
        # Session stats
        self.hands_played = 0
        self.human_profit = 0
        self.human_wins = 0
    
    def build_observation(self, state, player_idx):
        """Build state observation for the AI."""
        obs = []
        hole = flatten_cards(state.hole_cards[player_idx])
        board = flatten_cards(state.board_cards)
        
        ranks = '23456789TJQKA'
        suits = 'cdhs'
        
        # Encode hole cards (2 x 52)
        for i in range(2):
            encoding = np.zeros(52, dtype=np.float32)
            if i < len(hole):
                idx = ranks.index(hole[i].rank) * 4 + suits.index(hole[i].suit)
                encoding[idx] = 1.0
            obs.extend(encoding)
        
        # Encode board (5 x 52)
        for i in range(5):
            encoding = np.zeros(52, dtype=np.float32)
            if i < len(board):
                idx = ranks.index(board[i].rank) * 4 + suits.index(board[i].suit)
                encoding[idx] = 1.0
            obs.extend(encoding)
        
        # Stack and pot info
        total_pot = sum(state.bets)
        current_bet = max(state.bets)
        my_bet = state.bets[player_idx]
        to_call = current_bet - my_bet
        my_stack = state.stacks[player_idx]
        opp_stack = state.stacks[1 - player_idx]
        
        equity = monte_carlo_equity(hole, board, 30)
        obs.append(equity)
        
        pot_odds = to_call / (total_pot + to_call + 1e-6)
        obs.append(pot_odds)
        obs.append(min(to_call / (total_pot + 1e-6), 2.0))
        obs.append(min((my_stack / (total_pot + 1e-6)) / 20.0, 1.0))
        obs.append(min(my_stack, opp_stack) / self.starting_stack)
        obs.append(my_stack / self.starting_stack)
        obs.append(opp_stack / self.starting_stack)
        obs.append(total_pot / (self.starting_stack * 2))
        
        breakeven = to_call / (total_pot + to_call + 1e-6)
        obs.append(breakeven)
        obs.append(equity - breakeven)
        obs.append((self.starting_stack - my_stack) / self.starting_stack)
        obs.append(1.0 if to_call > 0 else 0.0)
        
        # Hand strength category (one-hot)
        if equity < 0.30: cat = 0
        elif equity < 0.45: cat = 1
        elif equity < 0.60: cat = 2
        elif equity < 0.75: cat = 3
        else: cat = 4
        for i in range(5):
            obs.append(1.0 if i == cat else 0.0)
        
        # Street encoding
        street = [0.0] * 4
        if len(board) == 0: street[0] = 1.0
        elif len(board) == 3: street[1] = 1.0
        elif len(board) == 4: street[2] = 1.0
        else: street[3] = 1.0
        obs.extend(street)
        
        # Pad to state_dim
        while len(obs) < STATE_DIM:
            obs.append(0.0)
        
        return np.array(obs[:STATE_DIM], dtype=np.float32)
    
    def get_legal_actions(self, state):
        """Get legal actions."""
        legal = []
        if state.can_fold():
            legal.append(ACTION_FOLD)
        if state.can_check_or_call():
            legal.append(ACTION_CALL)
        if state.can_complete_bet_or_raise_to():
            legal.extend([ACTION_RAISE_SMALL, ACTION_RAISE_MEDIUM, 
                         ACTION_RAISE_LARGE, ACTION_ALL_IN])
        return legal if legal else [ACTION_CALL]
    
    def execute_action(self, state, action, actor):
        """Execute an action."""
        if action == ACTION_FOLD:
            if state.can_fold():
                state.fold()
            elif state.can_check_or_call():
                state.check_or_call()
        
        elif action == ACTION_CALL:
            if state.can_check_or_call():
                state.check_or_call()
            elif state.can_fold():
                state.fold()
        
        elif action in [ACTION_RAISE_SMALL, ACTION_RAISE_MEDIUM, 
                        ACTION_RAISE_LARGE, ACTION_ALL_IN]:
            if state.can_complete_bet_or_raise_to():
                min_r = state.min_completion_betting_or_raising_to_amount
                max_r = state.max_completion_betting_or_raising_to_amount
                pot = sum(state.bets)
                
                if action == ACTION_RAISE_SMALL:
                    amount = min(min_r * 2, max_r)
                elif action == ACTION_RAISE_MEDIUM:
                    amount = min(max(min_r, int(pot * 0.5)), max_r)
                elif action == ACTION_RAISE_LARGE:
                    amount = min(max(min_r, pot), max_r)
                else:  # ALL_IN
                    amount = max_r
                
                state.complete_bet_or_raise_to(int(amount))
            elif state.can_check_or_call():
                state.check_or_call()
    
    def ai_action(self, state, ai_idx):
        """Get AI's action."""
        obs = self.build_observation(state, ai_idx)
        legal = self.get_legal_actions(state)
        
        with torch.no_grad():
            state_tensor = torch.FloatTensor(obs).unsqueeze(0).to(device)
            q_values = self.model(state_tensor).squeeze(0)
            
            # Mask illegal actions
            masked_q = torch.full_like(q_values, float('-inf'))
            for a in legal:
                masked_q[a] = q_values[a]
            
            action = masked_q.argmax().item()
        
        return action
    
    def run_automations(self, state):
        """Run game automations."""
        while state.can_burn_card(): state.burn_card('??')
        while state.can_deal_board(): state.deal_board()
        while state.can_push_chips(): state.push_chips()
        while state.can_pull_chips(): state.pull_chips()
    
    def display_state(self, state, human_idx, show_ai_cards=False):
        """Display current game state."""
        ai_idx = 1 - human_idx
        board = flatten_cards(state.board_cards)
        human_hole = flatten_cards(state.hole_cards[human_idx])
        ai_hole = flatten_cards(state.hole_cards[ai_idx])
        
        print("\n" + "="*50)
        print(f"💰 Pot: {sum(state.bets)} chips")
        print(f"📊 Your Stack: {state.stacks[human_idx]} | AI Stack: {state.stacks[ai_idx]}")
        print("="*50)
        
        if board:
            print(f"\n🃏 Board: {format_cards(board)}")
        else:
            print("\n🃏 Board: [Preflop]")
        
        print(f"\n🎴 Your Hand: {format_cards(human_hole)}")
        
        if show_ai_cards:
            print(f"🤖 AI Hand: {format_cards(ai_hole)}")
        else:
            print(f"🤖 AI Hand: [Hidden]")
        
        # Show equity estimate
        equity = monte_carlo_equity(human_hole, board, 50)
        print(f"\n📈 Your Equity: {equity*100:.1f}%")
    
    def get_human_action(self, state, human_idx):
        """Get human player's action."""
        legal = self.get_legal_actions(state)
        
        current_bet = max(state.bets)
        my_bet = state.bets[human_idx]
        to_call = current_bet - my_bet
        
        print("\n🎮 Your turn! Available actions:")
        for i, action in enumerate(legal):
            name = ACTION_NAMES[action]
            if action == ACTION_CALL:
                if to_call == 0:
                    name = "Check"
                else:
                    name = f"Call ({to_call} chips)"
            print(f"  [{action}] {name}")
        
        while True:
            try:
                choice = int(input("\nEnter action number: "))
                if choice in legal:
                    return choice
                print("Invalid choice. Try again.")
            except ValueError:
                print("Please enter a number.")
            except EOFError:
                return ACTION_FOLD
    
    def play_hand(self, human_is_button=True):
        """Play a single hand."""
        human_idx = 0 if human_is_button else 1
        ai_idx = 1 - human_idx
        
        # Create game state
        state = NoLimitTexasHoldem.create_state(
            automations=(
                Automation.ANTE_POSTING, Automation.BET_COLLECTION,
                Automation.BLIND_OR_STRADDLE_POSTING, 
                Automation.HOLE_CARDS_SHOWING_OR_MUCKING,
                Automation.HAND_KILLING, Automation.CHIPS_PUSHING, 
                Automation.CHIPS_PULLING,
            ),
            ante_trimming_status=True, raw_antes={-1: 0},
            raw_blinds_or_straddles=(self.small_blind, self.big_blind),
            min_bet=self.big_blind,
            raw_starting_stacks=[self.starting_stack, self.starting_stack],
            player_count=2,
        )
        
        # Deal hole cards
        while state.can_deal_hole():
            state.deal_hole()
        self.run_automations(state)
        
        position = "Button (acts first preflop, last postflop)" if human_is_button else "Big Blind"
        print(f"\n🎰 NEW HAND #{self.hands_played + 1}")
        print(f"📍 Your position: {position}")
        
        # Play the hand
        while state.status is not False:
            actor = state.actor_index
            
            if actor is None:
                self.run_automations(state)
                continue
            
            self.display_state(state, human_idx)
            
            if actor == human_idx:
                action = self.get_human_action(state, human_idx)
                print(f"\n✅ You chose: {ACTION_NAMES[action]}")
            else:
                action = self.ai_action(state, ai_idx)
                print(f"\n🤖 AI chose: {ACTION_NAMES[action]}")
            
            self.execute_action(state, action, actor)
            self.run_automations(state)
        
        # Hand complete - show results
        self.display_state(state, human_idx, show_ai_cards=True)
        
        human_profit = state.stacks[human_idx] - self.starting_stack
        self.hands_played += 1
        self.human_profit += human_profit
        
        print("\n" + "="*50)
        if human_profit > 0:
            self.human_wins += 1
            print(f"🎉 YOU WIN! +{human_profit} chips")
        elif human_profit < 0:
            print(f"😔 AI wins. {human_profit} chips")
        else:
            print("🤝 Split pot!")
        print("="*50)
        
        return human_profit
    
    def play_session(self, num_hands=10):
        """Play multiple hands."""
        print("\n" + "🃏"*20)
        print("  POKER SESSION START")
        print("🃏"*20)
        print(f"\nPlaying {num_hands} hands against the AI.")
        print("Position will alternate each hand.\n")
        
        for i in range(num_hands):
            human_is_button = (i % 2 == 0)
            self.play_hand(human_is_button)
            
            print(f"\n📊 Session Stats: {self.hands_played} hands, "
                  f"Total P/L: {self.human_profit:+d} chips, "
                  f"Win Rate: {self.human_wins/self.hands_played*100:.1f}%")
            
            if i < num_hands - 1:
                try:
                    cont = input("\nPress Enter for next hand (or 'q' to quit): ")
                    if cont.lower() == 'q':
                        break
                except EOFError:
                    break
        
        print("\n" + "🏆"*20)
        print("  SESSION COMPLETE")
        print("🏆"*20)
        print(f"\nFinal Stats:")
        print(f"  Hands Played: {self.hands_played}")
        print(f"  Total Profit: {self.human_profit:+d} chips")
        print(f"  Avg BB/hand: {self.human_profit / self.hands_played / (self.big_blind):+.2f}")
        print(f"  Win Rate: {self.human_wins/self.hands_played*100:.1f}%")

In [5]:
# Create game and start playing!
game = PokerGame(model, starting_stack=1000, big_blind=10)

# Play a session of hands
game.play_session(num_hands=10)


🃏🃏🃏🃏🃏🃏🃏🃏🃏🃏🃏🃏🃏🃏🃏🃏🃏🃏🃏🃏
  POKER SESSION START
🃏🃏🃏🃏🃏🃏🃏🃏🃏🃏🃏🃏🃏🃏🃏🃏🃏🃏🃏🃏

Playing 10 hands against the AI.
Position will alternate each hand.


🎰 NEW HAND #1
📍 Your position: Button (acts first preflop, last postflop)

💰 Pot: 15 chips
📊 Your Stack: 990 | AI Stack: 995

🃏 Board: [Preflop]

🎴 Your Hand: 4♥ T♥
🤖 AI Hand: [Hidden]

📈 Your Equity: 38.0%

🤖 AI chose: All-In

💰 Pot: 1010 chips
📊 Your Stack: 990 | AI Stack: 0

🃏 Board: [Preflop]

🎴 Your Hand: 4♥ T♥
🤖 AI Hand: [Hidden]

📈 Your Equity: 53.0%

🎮 Your turn! Available actions:
  [0] Fold
  [1] Call (990 chips)

✅ You chose: Check/Call

💰 Pot: 0 chips
📊 Your Stack: 2000 | AI Stack: 0

🃏 Board: K♣ 4♣ 5♦ Q♦ J♠

🎴 Your Hand: 4♥ T♥
🤖 AI Hand: 

📈 Your Equity: 45.0%

🎉 YOU WIN! +1000 chips

📊 Session Stats: 1 hands, Total P/L: +1000 chips, Win Rate: 100.0%

🎰 NEW HAND #2
📍 Your position: Big Blind

💰 Pot: 15 chips
📊 Your Stack: 995 | AI Stack: 990

🃏 Board: [Preflop]

🎴 Your Hand: 8♣ 6♣
🤖 AI Hand: [Hidden]

📈 Your Equity: 53.0%

🎮 Your turn! Ava

In [None]:
# Or play individual hands
# game.play_hand(human_is_button=True)