# Kuhn Poker

In [96]:
import numpy as np
import random

# 0 or p - PASS
# 1 or b - BET

N = 2  # number of actions

cards = ["K", "Q", "J"]


def cardValue(card):
    if card == "K":
        return 3
    elif card == "Q":
        return 2
    elif card == "J":
        return 1
    else:
        raise ValueError("Invalid card")


def normalize(strategy):
    normSum = np.sum(strategy)
    return strategy / normSum if normSum > 0 else np.array(N * [1 / N], dtype=np.float64)


# get current strategy through regret-matching
def getStrategy(regretSum):
    strategy = np.maximum(regretSum, 0)
    return normalize(strategy)


# get random action based on mixed strategy
def getAction(strategy):
    return np.random.choice(N, p=strategy)


# http://en.wikipedia.org/wiki/Fisher-Yates_shuffle#The_modern_algorithm
def shuffle(cards):
    for c1 in range(len(cards) - 1, 0, -1):
        c2 = random.randint(0, c1)
        cards[c1], cards[c2] = cards[c2], cards[c1]


class Node:
    def __init__(self, infoSet):
        self.infoSet = infoSet
        self.regretSum = np.zeros(N, dtype=np.float64)
        self.strategy = np.zeros(N, dtype=np.float64)
        self.strategySum = np.zeros(N, dtype=np.float64)

    def __str__(self):
        return f"{self.infoSet:3s} {normalize(self.strategy)}"


class KuhnTrainer:
    def __init__(self):
        self.nodes = {}

    def cfr(self, cards: list[str], history: str, p0: float, p1: float) -> float:
        plays = len(history)
        player = plays % 2
        opponent = 1 - player

        # Return payoff for terminal states
        if plays > 1:
            terminalPass = history[plays - 1] == "p"
            doubleBet = history[plays - 2 : plays] == "bb"
            isPlayerCardHigher = cardValue(cards[player]) > cardValue(cards[opponent])
            if terminalPass:
                if history == "pp":
                    return 1 if isPlayerCardHigher else -1
                else:
                    return 1
            elif doubleBet:
                return 2 if isPlayerCardHigher else -2

        # Get information set node or create it if nonexistant
        infoSet = cards[player] + history
        node = self.nodes.get(infoSet)
        if node is None:
            node = Node(infoSet)
            self.nodes[infoSet] = node

        # For each action, recursively call cfr with additional history and probability
        node.strategy = getStrategy(node.regretSum)
        util = np.zeros(N, dtype=np.float64)
        nodeUtil = 0.0
        for i in range(N):
            action = "pb"[i]
            nextHistory = history + action
            if player == 0:
                util[i] = self.cfr(cards, nextHistory, p0 * node.strategy[i], p1)
            else:
                util[i] = self.cfr(cards, nextHistory, p0, p1 * node.strategy[i])
            nodeUtil += node.strategy[i] * util[i]

        # For each action, update strategySum and regretSum
        realizationWeight = p0 if player == 0 else p1
        node.strategySum += realizationWeight * node.strategy
        regret = util - nodeUtil
        node.regretSum += (p1 * regret if player == 0 else p0) * regret

        return nodeUtil

    def train(self, iterations: int):
        util = 0.0
        cards = ["K", "Q", "J"]
        for _ in range(iterations):
            shuffle(cards)
            util += self.cfr(cards, "", 1.0, 1.0)
        print(f"Average game value: {util/iterations:.2f}")
        for infoSet in self.nodes:
            print(self.nodes[infoSet])

In [97]:
trainer = KuhnTrainer()
trainer.train(1000)

Average game value: 1.18
K   [0.5 0.5]
Jp  [1. 0.]
Kpb [0.5 0.5]
Jb  [0. 1.]
Q   [0.5 0.5]
Kp  [0. 1.]
Qpb [0.5 0.5]
Kb  [1. 0.]
Qp  [0. 1.]
Qb  [1. 0.]
J   [0.5 0.5]
Jpb [0.5 0.5]
