In [1]:
# %pip install numpy
# %pip install torch
# %pip install stable-baselines3
# %pip install tensorflow-gpu
# %pip install shimmy

In [2]:
class Card:
    def __init__(self, rank, suit):
        self.rank = rank
        self.suit = suit

    def __repr__(self):
        suits = {
            "S": "♠",
            "H": "♥",
            "D": "♦",
            "C": "♣",
        }
        ranks = {
            14: "A",
            11: "J",
            12: "Q",
            13: "K",
        }
        if self.rank in ranks:
            rank = ranks[self.rank]
        else:
            rank = str(self.rank)
        return f"{rank}{suits[self.suit]}"

    def __eq__(self, other):
        return self.rank == other.rank and self.suit == other.suit

    def __lt__(self, other):
        return self.rank < other.rank

    def __hash__(self):
        return hash((self.rank, self.suit))

In [3]:
class CardDeck:
    def __init__(self):
        self.deck = [Card(rank, suit) for rank in range(2, 15) for suit in "SHDC"]
    def __repr__(self):
        return str(self.deck)
    def __len__(self):
        return len(self.deck)
    def __getitem__(self, position):
        return self.deck[position]
    def shuffleCards(self):
        import random
        random.shuffle(self.deck)
    def drawCards(self, n):
        return [self.deck.pop() for i in range(n)]
    def numCards(self):
        return len(self.deck)

In [4]:
class PokerHand:
    def __init__(self, cards):
        self.cards = cards
        self.cards.sort()
        self.cards.reverse()
        self.rank = self.getRank()
    def __repr__(self):
        return str(self.cards)
    def __len__(self):
        return len(self.cards)
    def __getitem__(self, position):
        return self.cards[position]
    def getRank(self):
        if self.isRoyalFlush():
            return 10
        elif self.isStraightFlush():
            return 9
        elif self.isFourOfAKind():
            return 8
        elif self.isFullHouse():
            return 7
        elif self.isFlush():
            return 6
        elif self.isStraight():
            return 5
        elif self.isThreeOfAKind():
            return 4
        elif self.isTwoPair():
            return 3
        elif self.isPair():
            return 2
        else:
            return 1.0 * self.cards[0].rank / 14
    def isRoyalFlush(self):
        if self.isStraightFlush() and self.cards[0].rank == 14:
            return True
        else:
            return False
    def isStraightFlush(self):
        if self.isFlush() and self.isStraight():
            return True
        else:
            return False
    def isFourOfAKind(self):
        if self.cards[0].rank == self.cards[3].rank or self.cards[1].rank == self.cards[4].rank:
            return True
        else:
            return False
    def isFullHouse(self):
        if self.isThreeOfAKind() and self.isPair():
            return True
        else:
            return False
    def isFlush(self):
        if self.cards[0].suit == self.cards[1].suit == self.cards[2].suit == self.cards[3].suit == self.cards[4].suit:
            return True
        else:
            return False
    def isStraight(self):
        if self.cards[0].rank == self.cards[1].rank + 1 == self.cards[2].rank + 2 == self.cards[3].rank + 3 == self.cards[4].rank + 4:
            return True
        else:
            return False
    def isThreeOfAKind(self):
        if self.cards[0].rank == self.cards[2].rank or self.cards[1].rank == self.cards[3].rank or self.cards[2].rank == self.cards[4].rank:
            return True
        else:
            return False
    def isTwoPair(self):
        if self.cards[0].rank == self.cards[1].rank and self.cards[2].rank == self.cards[3].rank:
            return True
        elif self.cards[0].rank == self.cards[1].rank and self.cards[3].rank == self.cards[4].rank:
            return True
        elif self.cards[1].rank == self.cards[2].rank and self.cards[3].rank == self.cards[4].rank:
            return True
        else:
            return False
    def isPair(self):
        if self.cards[0].rank == self.cards[1].rank or self.cards[1].rank == self.cards[2].rank or self.cards[2].rank == self.cards[3].rank or self.cards[3].rank == self.cards[4].rank:
            return True
        else:
            return False

In [5]:
from enum import Enum

class Action(Enum):
    FOLD = 0
    CALL = 1
    RAISE = 2
    CHECK = 3
    ALLIN = 4

    def __eq__(self, other):
        if isinstance(other, int):
            return self.value == other
        if isinstance(other, Action):
            return self.value == other.value
        return False

print(Action.FOLD == 0)  # Now this will print True

True


In [6]:
len(Action)

5

In [7]:
class PokerGame:
    def __init__(self, players, smallBlind):
        self.deck = CardDeck()
        self.deck.shuffleCards()
        self.flop = []
        self.players = players
        self.smallBlind = smallBlind
        self.round = 0
        self.roundBet = 0
        self.pot = 0
        self.playersTurn = 0
        self.playersLeft = len(self.players)
        self.playersLeftToPlayInThisRound = len(self.players)
        print("num players", len(self.players))
        for player in self.players:
            if player.getBudget() == 0:
                print( "herer")
                player.folded = True
        self.nonFoldedPlayers = [player for player in self.players if player.folded == False]
        print(len(self.nonFoldedPlayers))
    
    def startRound(self):
        for player in self.players:
            player.resetRoundBet()
        self.roundBet = 0
        if self.round == 0:
            self.handleCards(2)
            self.makeInitialBets()
        elif self.round == 1:
            self.flopCards(3)
        elif self.round == 2:
            self.flopCards(1)
        elif self.round == 3:
            self.flopCards(1)
        self.playersLeftToPlayInThisRound = self.playersLeft
    
    def makeInitialBets(self):
        while(self.players[self.playersTurn].folded):
            self.playerTakeAction(self.players[self.playersTurn], Action.FOLD)
        if self.players[self.playersTurn].getBudget() <= self.smallBlind:
            self.playerTakeAction(self.players[self.playersTurn], Action.ALLIN)
        else:
            self.playerTakeAction(self.players[self.playersTurn], Action.RAISE, self.smallBlind)
        
        while(self.players[self.playersTurn].folded):
            self.playerTakeAction(self.players[self.playersTurn], Action.FOLD)
        if self.players[self.playersTurn].getBudget() <= self.smallBlind * 2:
            self.playerTakeAction(self.players[self.playersTurn], Action.ALLIN)
        else:
            self.playerTakeAction(self.players[self.playersTurn], Action.RAISE, self.smallBlind * 2)
    
    def finishRound(self):
        self.round += 1
        if self.isGameOver():
            self.getWinner()
    
    def getAmountToCall(self, player):
        return self.roundBet - player.roundBet
    
    def getPlayerActions(self, player): #all in fix needed
        if player.isFolded() == False:
            if player.isAllIn:
                return [Action.CHECK]
            if self.getAmountToCall(player) == 0:
                return [Action.FOLD, Action.RAISE, Action.ALLIN, Action.CHECK]
            elif player.getBudget() <= self.getAmountToCall(player):
                return [Action.FOLD, Action.ALLIN]
            else:
                return [Action.FOLD, Action.CALL, Action.RAISE, Action.ALLIN]
        else:
            return [Action.FOLD]
    
    def playerTakeAction(self, player, action, amount = 0):
        if self.playersTurn != self.players.index(player):
            raise ValueError
        if action == Action.FOLD:
            self.playerFold(player)
        elif action == Action.CALL:
            self.playerCall(player)
        elif action == Action.RAISE:
            self.playerRaise(player, amount)
        elif action == Action.CHECK:
            self.playerCheck(player)
        elif action == Action.ALLIN:
            self.playerAllIn(player, player.getBudget())
    
    def getCurrentPlayer(self):
        return self.players[self.playersTurn]

    def roundEnded(self):
        if self.playersLeftToPlayInThisRound == 0:
            return True
        else:
            return False
    
    def nextPlayer(self):
        self.playersTurn += 1
        if self.playersTurn == len(self.players):
            self.playersTurn = 0
        # if self.playersTurn == lastPlayer:
        #     self.finishRound()
    
    def playerRaise(self, player, amount):
        try:
            player.bet(amount)
            self.pot += amount
            self.roundBet = player.roundBet
            self.playersLeftToPlayInThisRound = len(self.players) - 1
            self.nextPlayer()
        except ValueError:
            print("Not enough money")
    
    def playerCall(self, player):
        try:
            amount = self.getAmountToCall(player)
            player.bet(amount)
            self.pot += amount
            self.playersLeftToPlayInThisRound -= 1
            self.nextPlayer()
        except ValueError:
            print("Not enough money")
    
    def playerCheck(self, player):
        try:
            if not player.isAllIn and self.getAmountToCall(player) != 0:
                raise ValueError
            self.playersLeftToPlayInThisRound -= 1
            self.nextPlayer()
        except ValueError:
            print("Not enough money")
    
    def playerAllIn(self, player, amount):
        try:
            player.bet(amount)
            self.pot += amount
            self.roundBet = max(player.roundBet, self.roundBet)
            self.playersLeftToPlayInThisRound = len(self.players) - 1
            self.nextPlayer()
        except ValueError:
            print("Not enough money")
    
    def playerFold(self, player):
        player.fold()
        self.playersLeftToPlayInThisRound -= 1
        if self.playersLeft != 1:
            self.nextPlayer()
    
    def getWinner(self):
        nonFoldedPlayers = [player for player in self.players if player.isFolded() == False]
        winner = nonFoldedPlayers[0]
        if len(nonFoldedPlayers) == 1:
            winner.budget += self.pot
            print(winner)
            return winner
        roundBets = [player.getBettedAmount() for player in self.players]
        while max(roundBets) != 0:
            winner = nonFoldedPlayers[0]
            winnerBestHand = self.getBestHand(winner)
            for player in nonFoldedPlayers:
                playerBestHand = self.getBestHand(player)
                if playerBestHand.rank > winnerBestHand.rank:
                    winner = player
                    winnerBestHand = playerBestHand
            winnerBet = winner.getBettedAmount()
            if winnerBet == max(roundBets):
                winner.budget += self.pot
                break
            else:
                for player in self.players:
                    amountWonOverThisPlayer = min(player.getBettedAmount(), winnerBet)
                    winner.budget += amountWonOverThisPlayer
                    player.bettedAmount -= amountWonOverThisPlayer
                    self.pot -= amountWonOverThisPlayer
            nonFoldedPlayers = [player for player in self.players if player.bettedAmount > 0]
            roundBets = [player.getBettedAmount() for player in self.players]
        return winner
    
    def getBestHand(self, player):
        import itertools
        bestHand = PokerHand(self.flop)
        for comb in itertools.combinations(player.hand + self.flop, 5):
            comb = list(comb)
            if PokerHand(comb).rank > bestHand.rank:
                bestHand = PokerHand(comb)
        return bestHand
    
    def isGameOver(self):
        if self.round == 4 or len([player for player in self.players if not player.isFolded()]) == 1:
            return True
        else:
            return False
    
    def handleCards(self, n):
        for player in self.players:
            player.hand = self.deck.drawCards(n)
    
    def flopCards(self, n):
        self.flop = self.flop + self.deck.drawCards(n)
    
    def getFlop(self):
        return self.flop
    
    def getPot(self):
        return self.pot

In [8]:
import random
class PokerUntilWinner:
    def __init__(self, players, smallBlind, buyin):
        self.players = players
        self.smallBlind = smallBlind
        self.buyin = buyin
        for player in self.players:
            player.budget = self.buyin
    
    def playUntilWinner(self):
        while self.areAllGamesOver() == False:
            print("New game started!")
            self.playRound()
            self.removePlayersWithNoMoney()
            self.resetPlayers()
            self.shiftPlayers()
            print("One game ended!")
        print("Game over! " + str(self.players[0].id) + " won!")

    def resetPlayers(self):
        for player in self.players:
            player.resetValues()

    def shiftPlayers(self):
        self.players.append(self.players.pop(0))
    
    def playRound(self):
        pokerGame = PokerGame(self.players, self.smallBlind)
        while pokerGame.isGameOver() == False:
            print("###################")
            print("New round started! " + str(pokerGame.round))
            pokerGame.startRound()
            while pokerGame.isGameOver() == False and pokerGame.roundEnded() == False:
                player = pokerGame.getCurrentPlayer()
                print("-------------------")
                print("player.id", player.id, "budget", player.budget)
                print("player.id", player.id, "betted amount total", player.bettedAmount)
                print(pokerGame.getFlop())
                print(pokerGame.getPot())
                print(player)
                actions = pokerGame.getPlayerActions(player)
                print("actions", actions)
                rand = random.randint(0, len(actions) - 1)
                action = actions[rand]
                print("action", action)
                if(action == Action.RAISE):
                    raiseAmount = random.randint(pokerGame.getAmountToCall(player), player.budget)
                    pokerGame.playerTakeAction(player, action, raiseAmount)
                else:
                    pokerGame.playerTakeAction(player, action)
                print("-------------------")
            print(pokerGame.pot)
            pokerGame.finishRound()
        print("Round ended! " + str(pokerGame.round))
        print("###################")
        for player in self.players:
            print("player with id:" , player.id, "has", player.budget, player.hand)

    def removePlayersWithNoMoney(self):
        players = []
        for player in self.players:
            if player.budget > 0:
                players.append(player)
        self.players = players
        print(self.players)
    
    def areAllGamesOver(self):
        return len(self.players) == 1

In [9]:
class PokerPlayer:
    def __init__(self, id, budget=0):
        self.id = id
        self.budget = budget
        self.bettedAmount = 0
        self.roundBet = 0
        self.hand = []
        self.folded = False
        self.isAllIn = False
    def getBudget(self):
        return self.budget
    def getBettedAmount(self):
        return self.bettedAmount
    def bet(self, amount):
        if(amount > self.budget):
            raise ValueError("You don't have enough money to bet that much!")
        self.roundBet += amount
        self.budget -= amount
        self.bettedAmount += amount
        if self.budget == 0:
            self.isAllIn = True
    def resetValues(self):
        self.bettedAmount = 0
        self.roundBet = 0
        self.hand = []
        self.folded = False
        self.isAllIn = False
    def resetRoundBet(self):
        self.roundBet = 0
    def isFolded(self):
        return self.folded
    def fold(self):
        self.folded = True
    def __repr__(self):
        return str(self.hand)
    def __len__(self):
        return len(self.hand)
    def __getitem__(self, position):
        return self.hand[position]


In [22]:
player1 = PokerPlayer(1)
player2 = PokerPlayer(2)
player3 = PokerPlayer(3)
player4 = PokerPlayer(4)
player5 = PokerPlayer(5)

poker = PokerUntilWinner([player1, player2, player3, player4, player5], 10, 100)
poker.playUntilWinner()

New game started!
num players 5
5
###################
New round started! 0
-------------------
player.id 3 budget 100
player.id 3 betted amount total 0
[]
30
[9♦, 9♠]
actions [<Action.FOLD: 0>, <Action.CALL: 1>, <Action.RAISE: 2>, <Action.ALLIN: 4>]
action Action.RAISE
-------------------
-------------------
player.id 4 budget 100
player.id 4 betted amount total 0
[]
116
[5♣, 10♥]
actions [<Action.FOLD: 0>, <Action.CALL: 1>, <Action.RAISE: 2>, <Action.ALLIN: 4>]
action Action.RAISE
-------------------
-------------------
player.id 5 budget 100
player.id 5 betted amount total 0
[]
214
[A♥, 7♥]
actions [<Action.FOLD: 0>, <Action.CALL: 1>, <Action.RAISE: 2>, <Action.ALLIN: 4>]
action Action.FOLD
-------------------
-------------------
player.id 1 budget 90
player.id 1 betted amount total 10
[]
214
[6♦, 4♦]
actions [<Action.FOLD: 0>, <Action.CALL: 1>, <Action.RAISE: 2>, <Action.ALLIN: 4>]
action Action.RAISE
-------------------
-------------------
player.id 2 budget 80
player.id 2 betted a

In [11]:
# player1 = PokerPlayer(11, 1000)
# player2 = PokerPlayer(12, 1000)
# player3 = PokerPlayer(13, 1000)
# player4 = PokerPlayer(14, 1000)
# player5 = PokerPlayer(15, 1000)

# poker = PokerGame([player1, player2, player3, player4, player5], 10)

# while poker.isGameOver() == False:
#     print("New round started! " + str(poker.round))
#     poker.startRound()
#     while poker.roundEnded() == False:
#         player = poker.getCurrentPlayer()
#         print(poker.getFlop())
#         print(poker.getPot())
#         print(poker.getCurrentPlayer())
#         print(poker.getPlayerActions(poker.getCurrentPlayer()))
#         action = input("Action: ")
#         if action == "fold":
#             poker.playerTakeAction(player, Action.FOLD)
#         elif action == "call":
#             poker.playerTakeAction(player, Action.CALL)
#         elif action == "raise":
#             amount = int(input("Amount: "))
#             poker.playerTakeAction(player, Action.RAISE, amount)
#         elif action == "check":
#             poker.playerTakeAction(player, Action.CHECK)
#         elif action == "allin":
#             poker.playerTakeAction(player, Action.ALLIN)
#         else:
#             print("Invalid action")
#     poker.finishRound()
#     print("Round ended! " + str(poker.round))

# print("Game over!")
# print("The flop was: " + str(poker.flop))
# print("Player budgets:")
# for player in poker.players:
#     print("player with id:" , player.id, "has", player.budget, player.hand, "hand rank:", poker.getBestHand(player).rank)

In [12]:
cardDeck = CardDeck()
cardDeck.shuffleCards()
print(cardDeck)

[J♦, 6♠, 9♠, 3♦, 10♠, 3♥, 6♥, 8♣, K♥, Q♠, 10♥, 6♦, 4♣, 3♣, Q♥, 8♠, J♣, 9♣, 5♥, 7♥, 2♠, 8♥, 5♠, Q♣, 3♠, 7♣, A♠, 8♦, 6♣, A♦, 7♠, 2♥, 7♦, 4♦, 5♦, 4♠, 10♦, J♠, 9♥, A♣, 9♦, A♥, K♠, 10♣, Q♦, 5♣, 2♦, J♥, K♦, 2♣, K♣, 4♥]


In [13]:
card = Card(1, "S")
card2 = Card(1, "H")
print(card == card2)

False


In [14]:
import itertools
flop = [Card(7, "S"), Card(3, "S"), Card(4, "S"), Card(5, "S"), Card(6, "S")]
hand = [Card(5, "H"), Card(6, "H")]
bestHand = PokerHand(flop)
allHands = []
for comb in itertools.combinations(hand + flop, 5):
    comb = list(comb)
    allHands.append(PokerHand(comb))
    if PokerHand(comb).rank > bestHand.rank:
        bestHand = PokerHand(comb)
print(bestHand)
print(len(allHands))

[7♠, 6♠, 5♠, 4♠, 3♠]
21


In [15]:
import gym
import random
from gym import spaces
import numpy as np

class PokerEnv(gym.Env):
    def __init__(self, num_players, small_blind, buyin):
        super(PokerEnv, self).__init__()
        self.num_players = num_players
        self.small_blind = small_blind
        self.buyin = buyin
        self.players = [PokerPlayer(i, self.buyin) for i in range(self.num_players)]
        for player in self.players:
            player.budget = self.buyin
        # Observation space initialization
        self.observation_space = self._create_observation_space()

        # Action space initialization
        self.action_space = self._create_action_space()

    def restart(self, num_players, small_blind, buyin):
        self.num_players = num_players
        self.small_blind = small_blind
        self.buyin = buyin
        self.players = [PokerPlayer(i, self.buyin) for i in range(self.num_players)]
        for player in self.players:
            player.budget = self.buyin
        # Observation space initialization
        self.observation_space = self._create_observation_space()

        # Action space initialization
        self.action_space = self._create_action_space()
    
    def _create_observation_space(self):
        # Calculate initial observation space shape
        observation_space_shape = self._get_observation_space_shape()
        return spaces.Box(low=0, high=1, shape=(observation_space_shape,), dtype=np.float32)

    def _create_action_space(self):
        return spaces.Discrete(5)  # 0: fold, 1: call, 2: raise, 3: check, 4: all-in 
    # spaces.Tuple((
    #         spaces.Discrete(5),  # 0: fold, 1: call, 2: raise, 3: check, 4: all-in
    #         spaces.Box(low=0, high=1, shape=(1,), dtype=np.float32)  # Continuous action for raise amount
    #     ))
    def resetPlayers(self):
        for player in self.players:
            player.resetValues()

    def shiftPlayers(self):
        return

    def removePlayersWithNoMoney(self):
        # Remove players with no money from the game
        # self.players = [player for player in self.players if player.budget > 0]
        # Update the number of players in the observation space
        #self.observation_space = self._create_observation_space()
        return
    
    def areAllGamesOver(self):
        return len([player for player in self.players if player.budget > 0]) == 1

    def _get_observation_space_shape(self):
        # Budget, current bet, folded, all-in, hand (2 cards) for each player
        player_info_length = 4 + 2 * 2
        community_cards_length = 5 * 2  # Up to 5 community cards
        return self.num_players * player_info_length + community_cards_length + 3  # Pot size, current round, amount to call

    def reset(self):
        # self.removePlayersWithNoMoney()
        self.resetPlayers()
        # self.shiftPlayers()
        if self.areAllGamesOver():
            return self._get_state()
        self.round = PokerGame(self.players, self.small_blind)
        self.round.startRound()
        return self._get_state()
    
    def _get_default_hand(self):
        return [Card(2, 'S'), Card(2, 'H')]

    def _get_state(self):
        state = []
        for player in self.players:
            state.extend([player.budget / self.buyin, player.bettedAmount / self.buyin])
            state.append(1 if player.isFolded() else 0)
            state.append(1 if player.isAllIn else 0)
            hand = player.hand if len(player.hand) == 2 else self._get_default_hand()
            state.extend(self._encode_hand(hand))
            print("player", self.round.getCurrentPlayer())
            print("player.hand", player.hand)
        
        print("flop", self.round.getFlop())
        state.extend(self._encode_hand(self.round.getFlop()))
        state.extend([0] * (5 - len(self.round.getFlop())) * 2)
        
        state.append(self.round.getPot() / (self.buyin * self.num_players))
        state.append(self.round.round / 4)
        
        current_player = self.round.getCurrentPlayer()
        amount_to_call = self.round.getAmountToCall(current_player) / self.buyin
        state.append(amount_to_call)
        
        return np.array(state, dtype=np.float32)

    def _encode_hand(self, hand):
        card_values = []
        for card in hand:
            card_values.append(card.rank / 14)
            card_values.append(self._encode_suit(card.suit))
        return card_values
    
    def _encode_suit(self, suit):
        suit_dict = {'S': 0.25, 'H': 0.5, 'D': 0.75, 'C': 1.0}
        return suit_dict[suit]

    def getCurrentPlayer(self):
        return self.round.getCurrentPlayer()

    def step(self, action):
        if self.areAllGamesOver():
            return self._get_state(), self._calculate_reward, True, {}
        discrete_action = action
        # discrete_action, raise_fraction = action
        raise_fraction = 0.5  # Fraction of the budget to raise by
        current_player = self.round.getCurrentPlayer()
        actions = self.round.getPlayerActions(current_player)

        if discrete_action not in actions:
            discrete_action = actions[random.randrange(0, len(actions))]
        
        if discrete_action == Action.RAISE:  # If the action is 'raise'
            min_raise = self.round.getAmountToCall(current_player)
            max_raise = current_player.getBudget()
            raise_amount = min_raise + raise_fraction * (max_raise - min_raise)
            chosen_action = (Action.RAISE, raise_amount)
        else:
            chosen_action = discrete_action
        
        if isinstance(chosen_action, tuple):
            action_type, amount = chosen_action
            self.round.playerTakeAction(current_player, action_type, amount)
        else:
            self.round.playerTakeAction(current_player, chosen_action)
        
        if self.round.roundEnded():
            print("round ended")
            self.round.finishRound()
            # if self.round.isGameOver():
            #     return self._get_state(), self._calculate_reward(current_player), True, {}
            self.round.startRound()

        done = self.round.isGameOver() or self.areAllGamesOver()
        print("done", done)
        reward = self._calculate_reward(current_player)
        
        return self._get_state(), reward, done, {}

    def _calculate_reward(self, player):
        return player.getBudget()  # Simple reward based on the player's budget

    def render(self, mode='human'):
        pass


In [16]:
import sys
print(sys.version)

3.11.7 (tags/v3.11.7:fa7a6f2, Dec  4 2023, 19:24:49) [MSC v.1937 64 bit (AMD64)]


In [17]:
import torch

if torch.cuda.is_available():
    print("CUDA is available. You can use GPU acceleration.")
else:
    print("CUDA is not available. You can only use CPU.")

CUDA is available. You can use GPU acceleration.


In [18]:
%pip list

Package              Version
-------------------- ------------
asttokens            2.4.1
cloudpickle          3.0.0
colorama             0.4.6
comm                 0.2.1
contourpy            1.2.1
cycler               0.12.1
debugpy              1.8.0
decorator            5.1.1
executing            2.0.1
Farama-Notifications 0.0.4
filelock             3.14.0
fonttools            4.51.0
fsspec               2024.5.0
git-filter-repo      2.38.0
gym                  0.26.2
gym-notices          0.0.8
gymnasium            0.29.1
intel-openmp         2021.4.0
ipykernel            6.29.0
ipython              8.21.0
jedi                 0.19.1
Jinja2               3.1.4
jupyter_client       8.6.0
jupyter_core         5.7.1
kiwisolver           1.4.5
MarkupSafe           2.1.5
matplotlib           3.9.0
matplotlib-inline    0.1.6
mkl                  2021.4.0
mpmath               1.3.0
nest-asyncio         1.6.0
networkx             3.3
numpy                1.26.4
packaging            23.2
pan

In [19]:
from gym import spaces
import numpy as np

class CustomActionWrapper(gym.ActionWrapper):
    def __init__(self, env):
        super().__init__(env)
        self.action_space = spaces.Discrete(6)  # 0: fold, 1: call, 2-5: raise by different amounts

    def action(self, action):
        if action == Action.RAISE:
            raise_fraction = (action - 2) / 3  # Map action index to raise fraction
            return (2, raise_fraction)  # Raise action
        else:
            return (action, 0)  # Fold, call, allin, or check action

In [20]:
from stable_baselines3.common.callbacks import BaseCallback

class EarlyStoppingCallback(BaseCallback):
    def __init__(self, check_func, verbose=0):
        super(EarlyStoppingCallback, self).__init__(verbose)
        self.check_func = check_func

    def _on_step(self) -> bool:
        # Call the environment's areAllGamesOver function
        if self.check_func():
            print("Early stopping condition met. Training terminated.")
            return False  # Returning False stops training
        return True  # Continue training

In [21]:
import gym
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

# Define the parameters
num_players = 4
small_blind = 10
buyin = 1000
num_episodes = 1000

# Create the environment
env = DummyVecEnv([lambda: PokerEnv(num_players, small_blind, buyin)])
early_stopping_callback = EarlyStoppingCallback(env.envs[0].areAllGamesOver)

# Define and train the agent
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=num_episodes, callback=early_stopping_callback)

# Save the trained model
model.save("poker_model")
print("Model saved successfully.")
model.env.envs[0].restart(num_players, small_blind, buyin)

# Evaluate the trained agent
total_rewards = [0 for _ in range(num_players)]
for _ in range(10):
    obs = env.reset()
    if env.envs[0].areAllGamesOver():
        break
    done = False
    while not done:
        action, _states = model.predict(obs)
        obs, reward, done, info = env.step(action)
        total_rewards[env.envs[0].getCurrentPlayer().id] += reward

print("Total rewards:", total_rewards)
print("Average total reward:", np.mean(total_rewards))

# Load the saved model
loaded_model = PPO.load("poker_model")

# Continue using the loaded model if needed



Using cuda device
num players 4
4
player [3♣, 4♠]
player.hand [5♣, Q♣]
player [3♣, 4♠]
player.hand [4♦, 5♠]
player [3♣, 4♠]
player.hand [3♣, 4♠]
player [3♣, 4♠]
player.hand [8♠, 10♦]
flop []
done False
player [8♠, 10♦]
player.hand [5♣, Q♣]
player [8♠, 10♦]
player.hand [4♦, 5♠]
player [8♠, 10♦]
player.hand [3♣, 4♠]
player [8♠, 10♦]
player.hand [8♠, 10♦]
flop []
done False
player [5♣, Q♣]
player.hand [5♣, Q♣]
player [5♣, Q♣]
player.hand [4♦, 5♠]
player [5♣, Q♣]
player.hand [3♣, 4♠]
player [5♣, Q♣]
player.hand [8♠, 10♦]
flop []
done False
player [4♦, 5♠]
player.hand [5♣, Q♣]
player [4♦, 5♠]
player.hand [4♦, 5♠]
player [4♦, 5♠]
player.hand [3♣, 4♠]
player [4♦, 5♠]
player.hand [8♠, 10♦]
flop []
done True
player [3♣, 4♠]
player.hand [5♣, Q♣]
player [3♣, 4♠]
player.hand [4♦, 5♠]
player [3♣, 4♠]
player.hand [3♣, 4♠]
player [3♣, 4♠]
player.hand [8♠, 10♦]
flop []
num players 4
herer
3
player [3♥, 6♠]
player.hand [3♣, 5♣]
player [3♥, 6♠]
player.hand [2♦, J♣]
player [3♥, 6♠]
player.hand [3♥, 6♠]
p