# Monte Carlo Simulation of Black Jack
## Dependencies
* jupyterlab 2.2.6
* Python 3.9.1
* numpy 1.20.1 (https://numpy.org)
* pandas 1.2.2 (https://pandas.pydata.org/docs/getting_started/install.html)
* emoji 1.2.0 (https://pypi.org/project/emoji/) for better display output


### House Rules
* 2 deck shoe used
* we shuffle when 25% of the original shoe is remaining (in addition to the initial shuffle)
* dealer stands on soft 17
* Natural / Blackjack pays 1.5x, otherwise payout is 1x
* double downs allowed after splits
* Up to 4 splits allowed (no splitting on Aces)
* No surrender, no insurance
* No minimum bet, no maximum bet


In [1]:
from enum import Enum, Flag, auto
import numpy as np
import math
from dataclasses import dataclass
import pandas as pd
import itertools
import emoji

The numpy random number generator is used. A global RNG with a seed of 0 is used.

In [2]:
RNG = np.random.default_rng(0)

Implement Card as object containing Rank and Suit. Rather than having an enum of all 52 possible cards, it is possible to created more than 1 card with same rank and suit. This was chosen to make creating decks easier.

In [3]:
class Rank(Enum):
    A = ('A', 11)
    Two = ('2', 2)
    Three = ('3', 3)
    Four = ('4',4)
    Five = ('5',5)
    Six = ('6',6)
    Seven = ('7',7)
    Eight = ('8',8)
    Nine = ('9',9)
    Ten = ('10',10)
    J = ('J', 10)
    Q = ('Q', 10)
    K = ('K', 10)
    
    def __init__(self, short, val):
        self.short = short
        self.val = val
        
class Suit(Enum):
    Clubs = ":clubs:"
    Hearts = ":hearts:"
    Diamonds = ":diamonds:"
    Spade = ":spades:"
    

@dataclass(frozen=True)
class Card:
    rank:Rank
    suit:Suit
        
    def rank_equal(self, card):
        return rank == card.rank
    def __repr__(self):
        
        return emoji.emojize(f"[{self.rank.short} {self.suit.value}]",use_aliases=True)
    
assert(len([Card(rank,suit) for rank in Rank for suit in Suit])==52)

Here we define a hand as a set of cards. Some book keeping is done to facilitate the showdown computation, as well as tracking the total. Note that we assume Aces have value of 11, unless they cause the total to be greater than 21, in which case 1 is used.

In [4]:
class HandAction(Flag):
    Hit = auto()
    Stand = auto()
    Split = auto() #only valid for player, not dealer
    DoubleDown = auto() #only valid for player, not dealer
    
class Hand:
    def __init__(self, cards:list[Card], from_split=False):
        self.cards = cards
        self.total = 0
        self.terminal = False
        self.has_ace = False
        self.from_split=False
        for card in self.cards:
            self.update_total(card)

    def natural(self):
        return len(self.cards) == 2 and self.total == 21 and not self.from_split
    
    def update_total(self,card:Card):
        self.total += card.rank.val
        if card.rank == Rank.A:
            self.has_ace = True
            if self.total > 21: #we bust, try subtracting 10
                self.total -= 10 
        
        if self.total > 21:
            self.terminal = True
    
    def update(self, action:HandAction, card:Card=None):
        if self.terminal:
            raise ValueError("hand already terminal")
            
        if action == HandAction.Hit:
            if card == None:
                raise ValueError("no card returned for Hit")
            self.cards.append(card)
            self.update_total(card)
        elif action == HandAction.Stand:
            self.terminal = True
        elif action == HandAction.DoubleDown:
            if card == None:
                raise ValueError("no card returned for Double Down")
            self.cards.append(card)
            self.update_total(card)
            self.terminal = True
        
        return self
        
    def __repr__(self):
        strlst = [f"{'Terminal ' if self.terminal else ''}Hand(total:{self.total})[" ]
        cardlist = []
        for card in self.cards:
            cardlist.append(str(card))
        strlst.append(",".join(cardlist))
        strlst.append("]")
        return "".join(strlst)

Here we define logic to book keep player state:
* Like a dealer hand, a player hand has a set of cards associated with it, but the player must place a bet (wager)
* we have logic to book keep a player's past results and active hand. These past results can be used to optimize play strategy.
* add logic to display player state as dataframe

In [5]:
class PlayerHandState(Flag):
    Loss=auto()
    Won=auto()
    Push=auto()


class PlayerHand(Hand):
    def __init__(self, cards, wager, from_split=False):
        super().__init__(cards, from_split=from_split)
        self.wager = wager
    def __repr__(self):
        return super().__repr__() + f" @ {self.wager}$"

class PlayerState:
    def __init__(self, capital:int):
        self.initial_capital = capital
        self.capital = capital
        self.past_player_hands = []
        self.past_dealer_hands = []
        self.active_hands = [] # player may have more than 1 active hand due to splitting
        self.current_hand = None
        
    def add_active_hand(self, hand):
        self.active_hands.append(hand)
        self.capital -= hand.wager
        
    def clear_active_hands(self):
        self.active_hands = []
        self.current_hand = None
        
    def set_current_hand(self, hand:PlayerHand):
        assert hand in self.active_hands, "can only set active hand as current"
        self.current_hand = hand
    
    def double_current_wager(self):
        self.capital -= self.current_hand.wager
        self.current_hand.wager *= 2
        
    def record_result(self, active_hand, dealer_hand, result, payout):
        self.capital += payout
        self.past_dealer_hands.append(dealer_hand)
        self.past_player_hands.append((result,self.capital, active_hand))
        
        
    def to_df(self):
        return pd.DataFrame([{"player_hand":player_hand.cards, "player_initial_hand": player_hand.cards[0:2],
                              "player_initial_ace": Rank.A in [card.rank for card in player_hand.cards[0:2]],
                              "player_initial_total": Hand(player_hand.cards[0:2]).total,
                              "wager": player_hand.wager,
                              "player_total":player_hand.total, "dealer_hand":dealer_hand.cards, "dealer_facing_val": dealer_hand.cards[0].rank.val, "dealer_total":dealer_hand.total, "result":result.name} for ((result,capital,player_hand),dealer_hand) in zip(self.past_player_hands, self.past_dealer_hands)])
    
    def get_win_rate(self):
        df = self.to_df()
        cnt = df.groupby('result').sum()
        return (cnt.loc['Won','wager']+ 0.5 * cnt.loc['Push','wager'])/(cnt.loc['Won','wager']+cnt.loc['Loss','wager']+ cnt.loc['Push','wager'])

    
    def __repr__(self):
        
        last_dealer_hand = f"last_dealer_hand: {self.past_dealer_hands[-1]}\n" if len(self.past_dealer_hands) else ""
        last_hand = f"last_hand: {self.past_player_hands[-1]}\n"  if len(self.past_player_hands) else ""
        active = f"Active: {self.active_hands}\n" if len(self.active_hands)>1 else "" 
        current = f"Current: {self.current_hand}"
        
        return f'''{last_hand}{last_dealer_hand}Capital: {self.capital} 
Won: {len([res[0] for res in self.past_player_hands if res[0]==PlayerHandState.Won])} 
Lost: {len([res[0] for res in self.past_player_hands if res[0]==PlayerHandState.Loss])} 
Push: {len([res[0] for res in self.past_player_hands if res[0]==PlayerHandState.Push])}
{active}{current}'''
    
assert( PlayerHand([Card(Rank.Five,Suit.Clubs), Card(Rank.A, Suit.Diamonds)],10).total == 16)

 Here we implement a Shoe. Different number of decks and cards before shuffle.
* a card is removed from the shoe each time it is dealt
* The shared RNG is used to shuffle the cards in the shoe. 
* dealt cards are tracked. When we shuffle, they are added back to the shoe, and then a shuffle is performed

In [6]:
class Shoe:
    def __init__(self,num_decks:int=2):
        self.num_decks = num_decks
        self.cards = [Card(rank,suit) for rank in Rank for suit in Suit]*num_decks
        # initial shuffle
        self.dealt = []
        self.shuffle()
        
    def shuffle(self):
        self.cards += self.dealt
        self.dealt.clear()
        RNG.shuffle(self.cards)
#        assert len(self.cards) == self.num_decks * 52, f"len of self.cards is {len(self.cards)}"
        return self
        
    def deal(self):
        assert len(self.cards) > 0, f"len of self.cards is {len(self.cards)}"
        card = self.cards.pop()
        self.dealt.append(card)
#        assert (len(self.dealt) +len(self.cards))==self.num_decks * 52
        return card
    
    def remove_card(self, rank:Rank):
        found = None
        
        for idx, card in enumerate(self.cards):
            if card.rank == rank:
                # we can only remove in place since we are removing once only
                found = self.cards.pop(idx)
                self.dealt.append(found)
#                assert found == card, "removing what we found only"
#                assert (len(self.dealt) +len(self.cards))==self.num_decks * 52, "keep trak of cards"
                return found
        
        raise KeyError(f"Did not find card with rank {rank}")
        
    
    def remaining_ratio(self):
        return len(self.cards)/(len(self.dealt)+len(self.cards))
    

## Dealer Logic ##
Here we define the main **simulate** function of the program as part of a Dealer object:
* A Dealer requires a shoe (we are default configuration), and a player state / strategy on which to simulate.
* In order to perform a simulation, we need to define a player strategy. Here we abstract the player strategy as an object that can give us the amount to bet (**bet_amount**), whether to hit or not (**hit**), whether or not we are doubling down (**doubledown**), and whether or not to split(**split**).



In [7]:
class Dealer:
    def __init__(self, shoe=Shoe()):
        self.shoe = shoe  

    def simulate(self,player_state, player_strategy, ignore_capital=False, max_sim_count=10_000):
        player_stopped = False
        total_games = 0
        num_splits = 0
        
        while (player_state.capital > 0 or ignore_capital) and total_games < max_sim_count:
            # get player bet amount
            if self.shoe.remaining_ratio() < 0.25:
                self.shoe.shuffle()
            
            
            wager = player_strategy.bet_amount(player_state)
            if wager > 0:
                total_games += 1
                
                # get dealer hand
                dealer_facing_card = self.shoe.deal()
                dealer_hand = Hand([self.shoe.deal(), dealer_facing_card])
                
                # get player hands
                self.generate_active_hands(dealer_facing_card, player_state,player_strategy, wager)
                
                # process player hand                       
                for player_hand in player_state.active_hands:                         
                    player_state.set_current_hand(player_hand)
                    
                    # double down?
                    if (ignore_capital or player_state.capital >= player_hand.wager) and player_strategy.double_down(dealer_facing_card, player_state):
                        player_state.double_current_wager()
                        player_hand.update(HandAction.Hit, self.shoe.deal())
                        player_hand.terminal = True

                    while not player_hand.terminal:
                        # get player action 
                        if player_strategy.hit(dealer_facing_card, player_state):
                            player_hand.update(HandAction.Hit, self.shoe.deal())
                        else:
                            player_hand.update(HandAction.Stand)
                            
                
                # update dealer hand 
                
                Dealer.dealer_strategy(dealer_hand, self.shoe)
                
                #showdown for each hand
                for player_hand in player_state.active_hands:
                    res, payout = Dealer.showdown(dealer_hand, player_hand, self.shoe)
                    player_state.record_result(player_hand, dealer_hand, res, payout)
                player_state.clear_active_hands()
            else:
                player_stopped=True
                break

    
    def generate_active_hands(self, dealer_facing_card:Card, player_state:PlayerState, player_strategy, wager):
        num_splits = 0

        to_be_processed = [(self.shoe.deal(), self.shoe.deal())]
        while len(to_be_processed) > 0: 
            pair = to_be_processed.pop()
            card1, card2 = pair
            if (card1.rank != card2.rank or 
                card1.rank == Rank.A or 
                num_splits >= 4):
                player_state.add_active_hand(PlayerHand([card1, card2],wager))
                continue
            
            if player_state.capital > wager and player_strategy.split(pair, to_be_processed, player_state.active_hands, dealer_facing_card, player_state):
                    to_be_processed.append((card1, self.shoe.deal()))
                    to_be_processed.append((card2, self.shoe.deal()))
                    num_splits+=1
            else:
                player_state.add_active_hand(PlayerHand([card1, card2],wager,from_split=num_splits > 0))

    def dealer_strategy(dealer_hand, shoe):
        while dealer_hand.total < 17:
            dealer_hand.update(HandAction.Hit, shoe.deal())
        
        dealer_hand.terminal = True 

    def showdown(dealer_hand, player_hand, shoe):

        # 1 = win, -1 = lose, 0 = draw
        player_natural = player_hand.natural()
        dealer_natural = dealer_hand.natural()
        
        # player busted
        if player_hand.total > 21:
            return (PlayerHandState.Loss, 0)

        # player natural
        elif player_natural and not dealer_natural:
            return (PlayerHandState.Won, player_hand.wager * 2.5)
        
        # dealer busted
        elif dealer_hand.total > 21:
            return (PlayerHandState.Won, player_hand.wager * 2)
        
        # dealer natural
        elif dealer_natural and not player_natural:
            return (PlayerHandState.Loss, 0 )
        
        # player sum higher
        elif player_hand.total > dealer_hand.total:
            return (PlayerHandState.Won, player_hand.wager * 2)

        # dealer sum higher
        elif player_hand.total < dealer_hand.total:
            return (PlayerHandState.Loss, 0 )
        # push
        elif player_hand.total == dealer_hand.total:
            return (PlayerHandState.Push, player_hand.wager) #just get back original
        else:
            return ValueError(f"should not reach this state: player {playre_hand} dealer {dealer_hand}")
            
        


Lets implement a manual strategy so we can play around with our simulator.        

In [8]:
class ManualStrategy:
    def hit(self, dealer_facing_card, player_state):
        print(f"{player_state} \n dealer facing card: {dealer_facing_card}\n Hit? (h for hit, s for stand)")
        res = input()
        return res.strip().lower()=="h"

    def bet_amount(self, player_state):
        print(f"{player_state} \nBet amount?(integer value)")
        res = input()
        return int(res)
        
    def double_down(self, dealer_facing_card, player_state):
        print(f"{player_state} \n dealer facing card: {dealer_facing_card}\n double down? (Y/N)")
        res = input()
        return res.strip().lower()=="y"
    
    def split(self, pair, to_be_processed, player_hands, dealer_facing_card, player_state):
        print(
            f'''{player_state}
        dealer facing card: {dealer_facing_card}
        player_hands(cannot split): {player_hands}
        potential split: {to_be_processed}
        split pair {pair}? (Y/N)''')
        res = input()
        return res.strip().lower()=="y"
    

In [9]:
# Uncomment to run manually

# ps = PlayerState(1000)
# Dealer().simulate(ps, ManualStrategy(),max_sim_count=100)

## First Simulation
### Mimic Strategy
As a baseline, let us define strategy where the player mimics the dealer, and bets 1% of capital all the time

In [10]:
class NaiveStrategy:
    def hit(self, dealer_facing_card, player_state):
        return player_state.current_hand.total < 17

    def bet_amount(self, player_state):
            return math.ceil(player_state.initial_capital * 0.01)
        
    def double_down(self, dealer_facing_card, player_state):
        return False

    def split(self, pair, to_be_processed, player_hands, dealer_facing_card, player_state):
        return False

Lets simulate it 100000 times and see our result. 

In [11]:
ps = PlayerState(100)
Dealer(Shoe()).simulate(ps, NaiveStrategy(),max_sim_count=10_000,ignore_capital=True)
df = ps.to_df()
df.head()

Unnamed: 0,player_hand,player_initial_hand,player_initial_ace,player_initial_total,wager,player_total,dealer_hand,dealer_facing_val,dealer_total,result
0,"[[10 ♣], [4 ♠], [K ♠]]","[[10 ♣], [4 ♠]]",False,14,1,24,"[[2 ♠], [6 ♠], [A ♣]]",2,19,Loss
1,"[[Q ♠], [3 ♠], [K ♥]]","[[Q ♠], [3 ♠]]",False,13,1,23,"[[K ♣], [10 ♥]]",10,20,Loss
2,"[[4 ♠], [A ♥], [Q ♣]]","[[4 ♠], [A ♥]]",True,15,1,25,"[[A ♦], [4 ♣], [8 ♥]]",11,23,Loss
3,"[[9 ♦], [8 ♦]]","[[9 ♦], [8 ♦]]",False,17,1,17,"[[4 ♦], [10 ♥], [8 ♦]]",4,22,Won
4,"[[3 ♣], [K ♥], [J ♠]]","[[3 ♣], [K ♥]]",False,13,1,23,"[[4 ♣], [7 ♥], [Q ♠]]",4,21,Loss


In [12]:
ps.get_win_rate()

0.44895

seems we win about 45% of the time, if we consider pushes as half wins. Lets see if we can do better.

## Second Simulation
### Hit Strategy

Lets try to come up with a strategy by simulating whether we should hit or not given a player total and the dealer displayed hand. First, let us simulate the ratio of wins for hitting vs. standing for each possible user total x soft vs. non-soft (whether not ace in hand) x dealer displayed. Also, observe that we should always hit when our hard total is less than 10, since we cannot bust in that case, and hitting will only increase the strength of our hand.

In [13]:
def simulate_hit(player_ranks:list[Rank], dealer_displayed_rank:Rank, sim_count:int=200):
        total_wins = 0.
        total_wager = 0.
        shoe = Shoe()
        for i in range(sim_count):
            shoe.shuffle()
            player_cards = [shoe.remove_card(rank) for rank in player_ranks]
            player_hand = PlayerHand(player_cards,1)
            dealer_displayed_card = shoe.remove_card(dealer_displayed_rank)
            dealer_hand = Hand([dealer_displayed_card])
            
            dealer_hand.update(HandAction.Hit, shoe.deal())
            
            # ignore case where dealer natural, since we cant do anything to change initial outcome
            if dealer_hand.natural():
                continue
                
            Dealer.dealer_strategy(dealer_hand, shoe)
                
            player_hand.update(HandAction.Hit, shoe.deal())
            if not player_hand.terminal:
                player_hand.update(HandAction.Stand)
                 
            res, payout = Dealer.showdown(dealer_hand, player_hand, shoe)
            total_wager += player_hand.wager
            if res == PlayerHandState.Won:
                total_wins+= player_hand.wager
            elif res == PlayerHandState.Push:
                total_wins+=0.5 * player_hand.wager
                
        return total_wins / total_wager
            
def simulate_stand(player_ranks, dealer_displayed_rank, sim_count=10000):
    total_wins = 0.
    total_wager = 0.
    shoe = Shoe()
    for i in range(sim_count):      
        shoe.shuffle()
        player_cards = [shoe.remove_card(rank) for rank in player_ranks]
        player_hand = PlayerHand(player_cards,1)
        dealer_displayed_card = shoe.remove_card(dealer_displayed_rank)
        dealer_hand = Hand([dealer_displayed_card])
        player_hand.update(HandAction.Stand)
        
        dealer_hand.update(HandAction.Hit, shoe.deal())
            
        # ignore case where dealer natural, since we cant do anything to change initial outcome
        if dealer_hand.natural():
            continue
                
        Dealer.dealer_strategy(dealer_hand, shoe)
            
        res, payout = Dealer.showdown(dealer_hand, player_hand,shoe)
        total_wager += player_hand.wager
        if res == PlayerHandState.Won:
            total_wins+= player_hand.wager
        elif res == PlayerHandState.Push:
            total_wins+=0.5 * player_hand.wager 
            
    return (total_wins / total_wager)  

In [14]:
player_rank_lists = (
[Rank.Two, Rank.Two],    
[Rank.Two, Rank.Three],
[Rank.Two, Rank.Four],
[Rank.Two, Rank.Five],
[Rank.Two, Rank.Six],
[Rank.Three, Rank.Six],
[Rank.Four, Rank.Six],
[Rank.Five, Rank.Six],
[Rank.Five, Rank.Seven],
[Rank.Five, Rank.Eight],
[Rank.Five, Rank.Nine],
[Rank.Five, Rank.J],
[Rank.Six, Rank.J],
[Rank.Seven, Rank.J],
[Rank.Eight, Rank.J],
[Rank.Nine, Rank.J],
[Rank.J, Rank.J],
[Rank.A, Rank.J],    
[Rank.A, Rank.A],
[Rank.A, Rank.Two],
[Rank.A, Rank.Three],
[Rank.A, Rank.Four],
[Rank.A, Rank.Five],
[Rank.A, Rank.Six],
[Rank.A, Rank.Seven],
[Rank.A, Rank.Eight],
[Rank.A, Rank.Nine]
)

hit_strategy_df = pd.DataFrame(itertools.product(player_rank_lists,(rank for rank in Rank if rank not in [Rank.J, Rank.Q, Rank.K])),columns=['player_ranks', 'dealer_rank'])

hit_strategy_df['soft']=hit_strategy_df.apply(
    lambda row: Rank.A in row.player_ranks ,axis=1)

def rank_sum(player_ranks):
    s = 0
    for rank in player_ranks:
        s+= rank.val
        if s > 21 and rank == Rank.A:
            s-= 10
    return s

sim_count = 5_000
hit_strategy_df['player_total']=hit_strategy_df.apply(
    lambda row: rank_sum(row.player_ranks), axis=1)

hit_strategy_df['hit_win_ratio'] = hit_strategy_df.apply(
    lambda row: simulate_hit(row.player_ranks, row.dealer_rank, sim_count=sim_count), axis=1)

hit_strategy_df['stand_win_ratio'] = hit_strategy_df.apply(
    lambda row: simulate_stand(row.player_ranks, row.dealer_rank, sim_count=sim_count), axis=1)

In [15]:
hit_strategy_df['should_hit'] = hit_strategy_df.hit_win_ratio > hit_strategy_df.stand_win_ratio

In [16]:
hit_strategy_df['dealer_rank_value'] = hit_strategy_df.dealer_rank.apply(lambda x: x.val)

In [17]:
hit_strategy_df.pipe(lambda df: df[df.should_hit])

Unnamed: 0,player_ranks,dealer_rank,soft,player_total,hit_win_ratio,stand_win_ratio,should_hit,dealer_rank_value
0,"[Rank.Two, Rank.Two]",Rank.A,False,4,0.312446,0.306058,True,11
1,"[Rank.Two, Rank.Two]",Rank.Two,False,4,0.387600,0.372600,True,2
2,"[Rank.Two, Rank.Two]",Rank.Three,False,4,0.407600,0.396600,True,3
3,"[Rank.Two, Rank.Two]",Rank.Four,False,4,0.410000,0.408600,True,4
4,"[Rank.Two, Rank.Two]",Rank.Five,False,4,0.440200,0.431200,True,5
...,...,...,...,...,...,...,...,...
218,"[Rank.A, Rank.Four]",Rank.Nine,True,15,0.274600,0.236200,True,9
219,"[Rank.A, Rank.Four]",Rank.Ten,True,15,0.251576,0.230553,True,10
226,"[Rank.A, Rank.Five]",Rank.Seven,True,16,0.298800,0.263200,True,7
227,"[Rank.A, Rank.Five]",Rank.Eight,True,16,0.275800,0.248000,True,8


In [18]:
class HitStrategy:
    def hit(self, dealer_facing_card, player_state):
        if player_state.current_hand.total <= 11: #no chance of player bust after hit, since max card value 11
            return True
        elif player_state.current_hand.total < 21:
            should_hit = hit_strategy_df[(hit_strategy_df.player_total==player_state.current_hand.total)&(hit_strategy_df.soft==player_state.current_hand.has_ace)
                 &(hit_strategy_df.dealer_rank_value == dealer_facing_card.rank.val)
                 ]['should_hit']
            if should_hit.empty:
                raise KeyError(f"Could not find strategy for soft: {player_state.current_hand.has_ace} total:{player_state.current_hand.total} dealer Rank: {dealer_facing_card.rank}")
            return should_hit.bool()
        else:
            return False

    def bet_amount(self, player_state):
            return math.ceil(player_state.initial_capital * 0.01)
        
    def double_down(self, dealer_facing_card, player_state):
        return False

    def split(self, pair, to_be_processed, player_hands, dealer_facing_card, player_state):
        return False        

In [19]:
ps = PlayerState(100)
Dealer(Shoe()).simulate(ps, HitStrategy(),max_sim_count=10_000,ignore_capital=True)
df = ps.to_df()
df.head()

Unnamed: 0,player_hand,player_initial_hand,player_initial_ace,player_initial_total,wager,player_total,dealer_hand,dealer_facing_val,dealer_total,result
0,"[[8 ♥], [A ♥]]","[[8 ♥], [A ♥]]",True,19,1,19,"[[5 ♣], [A ♥], [10 ♠]]",5,26,Won
1,"[[4 ♠], [Q ♥], [J ♥]]","[[4 ♠], [Q ♥]]",False,14,1,24,"[[8 ♣], [8 ♣], [6 ♦]]",8,22,Loss
2,"[[8 ♦], [6 ♣], [J ♣]]","[[8 ♦], [6 ♣]]",False,14,1,24,"[[2 ♠], [Q ♣], [A ♠], [3 ♠], [7 ♣]]",2,23,Loss
3,"[[A ♠], [J ♠]]","[[A ♠], [J ♠]]",True,21,1,21,"[[4 ♥], [6 ♠], [5 ♦], [10 ♠]]",4,25,Won
4,"[[8 ♠], [10 ♥]]","[[8 ♠], [10 ♥]]",False,18,1,18,"[[5 ♥], [5 ♦], [5 ♣], [3 ♦]]",5,18,Push


In [20]:
ps.get_win_rate()

0.47525

We win about 47% of the time. Lets see if we can improve our strategy.

## Third Simulation
### Double Down Strategy

Lets try to simulate whether doubling down improves our chances. Here we define a simulate dd strategy, with a flag **double_down** that denotes whether or not we are doubling down. 

In [21]:
def simulate_dd_strategy(player_ranks, dealer_displayed_rank, sim_count=10000):
    total_wins = 0.
    total_wager = 0.
    shoe = Shoe()
    for i in range(sim_count):      
        shoe.shuffle()
        player_cards = [shoe.remove_card(rank) for rank in player_ranks]
        player_hand = PlayerHand(player_cards,1)
        dealer_displayed_card = shoe.remove_card(dealer_displayed_rank)
        dealer_hand = Hand([dealer_displayed_card])
        dealer_hand.update(HandAction.Hit, shoe.deal())
            
        # ignore case where dealer natural, since we cant do anything to change initial outcome
        if dealer_hand.natural():
            continue
            
        player_hand.wager *= 2
        player_hand.update(HandAction.Hit, shoe.deal())
        player_hand.terminal = True
                
        Dealer.dealer_strategy(dealer_hand, shoe)
            
        res, payout = Dealer.showdown(dealer_hand, player_hand,shoe)
        total_wager+= player_hand.wager
        
        if res == PlayerHandState.Won:
            total_wins+= player_hand.wager
        elif res == PlayerHandState.Push:
            total_wins+=(0.5 * player_hand.wager)

    ratio = (total_wins / total_wager)
    
    return 0 if total_wager == 0 else ratio

We run simulations to see whether or not to double down given the situation

In [22]:
sim_count = 1000

hit_strategy_df['dd_win_ratio'] = hit_strategy_df.apply(
    lambda row: simulate_dd_strategy(row.player_ranks, row.dealer_rank, sim_count=sim_count), axis=1)

hit_strategy_df['should_dd'] = hit_strategy_df['dd_win_ratio'] > 0.5


In [23]:
hit_strategy_df[hit_strategy_df.should_dd]

Unnamed: 0,player_ranks,dealer_rank,soft,player_total,hit_win_ratio,stand_win_ratio,should_hit,dealer_rank_value,dd_win_ratio,should_dd
44,"[Rank.Two, Rank.Six]",Rank.Five,False,8,0.5307,0.4366,True,5,0.511,True
45,"[Rank.Two, Rank.Six]",Rank.Six,False,8,0.5316,0.4334,True,6,0.527,True
51,"[Rank.Three, Rank.Six]",Rank.Two,False,9,0.5405,0.3736,True,2,0.5195,True
52,"[Rank.Three, Rank.Six]",Rank.Three,False,9,0.5384,0.396,True,3,0.545,True
53,"[Rank.Three, Rank.Six]",Rank.Four,False,9,0.5632,0.4126,True,4,0.561,True
54,"[Rank.Three, Rank.Six]",Rank.Five,False,9,0.5764,0.444,True,5,0.5855,True
55,"[Rank.Three, Rank.Six]",Rank.Six,False,9,0.5687,0.417,True,6,0.578,True
56,"[Rank.Three, Rank.Six]",Rank.Seven,False,9,0.5225,0.2666,True,7,0.511,True
57,"[Rank.Three, Rank.Six]",Rank.Eight,False,9,0.4802,0.242,True,8,0.507,True
60,"[Rank.Four, Rank.Six]",Rank.A,False,10,0.596866,0.301136,True,11,0.6,True


In [27]:

dd_strategy_df = hit_strategy_df
class HitDDStrategy:
    def hit(self, dealer_facing_card, player_state):
        if player_state.current_hand.total <= 11: #no chance of player bust after hit, since max card value 11
            return True
        elif player_state.current_hand.total < 21:
            should_hit = hit_strategy_df[(hit_strategy_df.player_total==player_state.current_hand.total)&(hit_strategy_df.soft==player_state.current_hand.has_ace)
                 &(hit_strategy_df.dealer_rank_value == dealer_facing_card.rank.val)
                 ]['should_hit']
            if should_hit.empty:
                raise KeyError(f"Could not find hit strategy for soft: {player_state.current_hand.has_ace} total:{player_state.current_hand.total} dealer Rank: {dealer_facing_card.rank}")
            return should_hit.bool()
        else:
            return False

    def bet_amount(self, player_state):
            return math.ceil(player_state.initial_capital * 0.01)
        
    def double_down(self, dealer_facing_card, player_state):
        if player_state.current_hand.total == 21:
            return False
        
        should_dd = dd_strategy_df[(dd_strategy_df.player_total==player_state.current_hand.total)&(dd_strategy_df.soft==player_state.current_hand.has_ace)
                 &(dd_strategy_df.dealer_rank_value == dealer_facing_card.rank.val)]['should_dd']
        if should_dd.empty:
                raise KeyError(f"Could not find dd strategy for soft: {player_state.current_hand.has_ace} total:{player_state.current_hand.total} dealer Rank: {dealer_facing_card.rank}")
        
        return should_dd.bool()

    def split(self, pair, to_be_processed, player_hands, dealer_facing_card, player_state):
        return False    

In [28]:
ps = PlayerState(100)
Dealer(Shoe()).simulate(ps, HitDDStrategy(),max_sim_count=10_000,ignore_capital=True)
df = ps.to_df()
df.head()

Unnamed: 0,player_hand,player_initial_hand,player_initial_ace,player_initial_total,wager,player_total,dealer_hand,dealer_facing_val,dealer_total,result
0,"[[5 ♥], [A ♥], [6 ♦]]","[[5 ♥], [A ♥]]",True,16,1,22,"[[10 ♥], [9 ♦]]",10,19,Loss
1,"[[7 ♦], [10 ♦]]","[[7 ♦], [10 ♦]]",False,17,1,17,"[[5 ♠], [10 ♠], [8 ♥]]",5,23,Won
2,"[[8 ♦], [2 ♣], [5 ♣]]","[[8 ♦], [2 ♣]]",False,10,2,15,"[[Q ♥], [4 ♦], [J ♦]]",10,24,Won
3,"[[2 ♦], [3 ♦], [9 ♦]]","[[2 ♦], [3 ♦]]",False,5,1,14,"[[J ♥], [2 ♦], [J ♠]]",10,22,Won
4,"[[A ♥], [K ♠]]","[[A ♥], [K ♠]]",True,21,1,21,"[[4 ♥], [4 ♣], [5 ♠], [Q ♣]]",4,23,Won


In [29]:
ps.get_win_rate()

0.480357466884413

## Fourth Simulation
### Split Strategy

Lets try to simulate whether or not splitting improves our chances. Here we define a simulate split strategy

In [39]:

def should_hit(current_hand, dealer_facing_card):
    if current_hand.total <= 11: #no chance of player bust after hit, since max card value 11
        return True
    elif current_hand.total < 21:
        return hit_strategy_df[(hit_strategy_df.player_total==current_hand.total)&(hit_strategy_df.soft==current_hand.has_ace)
         &(hit_strategy_df.dealer_rank_value == dealer_facing_card.rank.val)]['should_hit'].bool()
    
    
def simulate_split_strategy(player_ranks, dealer_displayed_rank, split=False, sim_count=10000):
    total_wins = 0.
    total_wager = 0.
    shoe = Shoe()
    for i in range(sim_count):      
        shoe.shuffle()
        player_cards = [shoe.remove_card(rank) for rank in player_ranks]
  
        dealer_displayed_card = shoe.remove_card(dealer_displayed_rank)
        dealer_hand = Hand([dealer_displayed_card])
        dealer_hand.update(HandAction.Hit, shoe.deal())
            
        # ignore case where dealer natural, since we cant do anything to change initial outcome
        if dealer_hand.natural():
            continue
            
        # only split once
        if split:
            new1, new2 = shoe.deal(), shoe.deal()
            
            player_hands = [PlayerHand([player_cards[0], new1],1, from_split=True),
                            PlayerHand([player_cards[1], new2],1, from_split=True)] 
        else:
            player_hands = [PlayerHand(player_cards,1)]
                   
        for player_hand in player_hands:
            
            # double down logic
            if player_hand.total < 21 and (hit_strategy_df[(dd_strategy_df.player_total==player_hand.total)&(hit_strategy_df.soft==player_hand.has_ace)
                 &(hit_strategy_df.dealer_rank_value == dealer_displayed_card.rank.val)]['should_dd']).bool():
                player_hand.wager *= 2
                player_hand.update(HandAction.Hit, shoe.deal())
                player_hand.terminal = True
            else:
                
                while should_hit(player_hand, dealer_displayed_card):
                    player_hand.update(HandAction.Hit, shoe.deal())

                if not player_hand.terminal:
                    player_hand.update(HandAction.Stand)               
                        
        Dealer.dealer_strategy(dealer_hand, shoe)
        
        for player_hand in player_hands: 
            res, payout = Dealer.showdown(dealer_hand, player_hand,shoe)
            total_wager += player_hand.wager
            if res == PlayerHandState.Won:
                total_wins+=(player_hand.wager) 
            elif res == PlayerHandState.Push:
                total_wins+=(0.5 * player_hand.wager) 
                
                        
    ratio = 0 if total_wager == 0 else (total_wins / total_wager)
    
    return ratio

In [41]:
player_rank_lists = (
[Rank.Two, Rank.Two],    
[Rank.Three, Rank.Three],
[Rank.Four, Rank.Four],
[Rank.Five, Rank.Five],
[Rank.Six, Rank.Six],
[Rank.Seven, Rank.Seven],
[Rank.Eight, Rank.Eight],
[Rank.Nine, Rank.Nine],
[Rank.Ten, Rank.Ten],
)

sim_count = 1_000

split_strategy_df = pd.DataFrame(itertools.product(player_rank_lists,(rank for rank in Rank if rank not in [Rank.J, Rank.Q, Rank.K])),columns=['player_ranks', 'dealer_rank'])

split_strategy_df['soft']=split_strategy_df.apply(
    lambda row: Rank.A in row.player_ranks,axis=1)

split_strategy_df['dealer_rank_value'] = split_strategy_df.dealer_rank.apply(lambda x: x.val)

split_strategy_df['split_win_ratio'] = split_strategy_df.apply(
    lambda row: simulate_split_strategy(row.player_ranks, row.dealer_rank, split=True, sim_count=sim_count), axis=1)

split_strategy_df['split_off_win_ratio'] = split_strategy_df.apply(
    lambda row: simulate_split_strategy(row.player_ranks, row.dealer_rank, split=False, sim_count=sim_count), axis=1)
split_strategy_df['player_total']=split_strategy_df.apply(
    lambda row: rank_sum(row.player_ranks), axis=1)

split_strategy_df['should_split'] = split_strategy_df['split_win_ratio'] > split_strategy_df['split_off_win_ratio']

In [42]:
split_strategy_df

Unnamed: 0,player_ranks,dealer_rank,soft,dealer_rank_value,split_win_ratio,split_off_win_ratio,player_total,should_split
0,"[Rank.Two, Rank.Two]",Rank.A,False,11,0.477102,0.404971,4,True
1,"[Rank.Two, Rank.Two]",Rank.Two,False,2,0.465712,0.431500,4,True
2,"[Rank.Two, Rank.Two]",Rank.Three,False,3,0.518362,0.467000,4,True
3,"[Rank.Two, Rank.Two]",Rank.Four,False,4,0.512515,0.466500,4,True
4,"[Rank.Two, Rank.Two]",Rank.Five,False,5,0.543021,0.499500,4,True
...,...,...,...,...,...,...,...,...
85,"[Rank.Ten, Rank.Ten]",Rank.Six,False,6,0.639750,0.866000,20,False
86,"[Rank.Ten, Rank.Ten]",Rank.Seven,False,7,0.631750,0.869000,20,False
87,"[Rank.Ten, Rank.Ten]",Rank.Eight,False,8,0.582500,0.891500,20,False
88,"[Rank.Ten, Rank.Ten]",Rank.Nine,False,9,0.561750,0.879000,20,False


In [43]:
class HitDDSplitStrategy:
    def hit(self, dealer_facing_card, player_state):
        if player_state.current_hand.total <= 11: #no chance of player bust after hit, since max card value 11
            return True
        elif player_state.current_hand.total < 21:
            should_hit = hit_strategy_df[(hit_strategy_df.player_total==player_state.current_hand.total)&(hit_strategy_df.soft==player_state.current_hand.has_ace)
                 &(hit_strategy_df.dealer_rank_value == dealer_facing_card.rank.val)
                 ]['should_hit']
            if should_hit.empty:
                raise KeyError(f"Could not find hit strategy for soft: {player_state.current_hand.has_ace} total:{player_state.current_hand.total} dealer Rank: {dealer_facing_card.rank}")
            return should_hit.bool()
        else:
            return False

    def bet_amount(self, player_state):
            return math.ceil(player_state.initial_capital * 0.01)
        
    def double_down(self, dealer_facing_card, player_state):
        if player_state.current_hand.total == 21:
            return False
        
        should_dd = dd_strategy_df[(dd_strategy_df.player_total==player_state.current_hand.total)&(dd_strategy_df.soft==player_state.current_hand.has_ace)
                 &(dd_strategy_df.dealer_rank_value == dealer_facing_card.rank.val)]['should_dd']
        if should_dd.empty:
                raise KeyError(f"Could not find dd strategy for soft: {player_state.current_hand.has_ace} total:{player_state.current_hand.total} dealer Rank: {dealer_facing_card.rank}")
        
        return should_dd.bool()

    def split(self, pair, to_be_processed, player_hands, dealer_facing_card, player_state):
        card1,card2 = pair
        pair_total =card1.rank.val + card2.rank.val
        return (split_strategy_df[(split_strategy_df.player_total==pair_total)
                 &(split_strategy_df.dealer_rank_value == dealer_facing_card.rank.val)]['should_split']).bool()

In [44]:
ps = PlayerState(10_000)
Dealer(Shoe()).simulate(ps, HitDDSplitStrategy(),max_sim_count=10_000,ignore_capital=True)
df = ps.to_df()
df.head()

Unnamed: 0,player_hand,player_initial_hand,player_initial_ace,player_initial_total,wager,player_total,dealer_hand,dealer_facing_val,dealer_total,result
0,"[[2 ♦], [J ♥]]","[[2 ♦], [J ♥]]",False,12,100,12,"[[6 ♠], [2 ♦], [K ♥]]",6,18,Loss
1,"[[3 ♠], [K ♣]]","[[3 ♠], [K ♣]]",False,13,100,13,"[[2 ♣], [5 ♣], [J ♦]]",2,17,Loss
2,"[[6 ♠], [4 ♥], [10 ♣]]","[[6 ♠], [4 ♥]]",False,10,200,20,"[[8 ♣], [5 ♥], [9 ♣]]",8,22,Won
3,"[[7 ♣], [10 ♥]]","[[7 ♣], [10 ♥]]",False,17,100,17,"[[2 ♥], [7 ♥], [10 ♠]]",2,19,Loss
4,"[[5 ♦], [A ♦], [8 ♦]]","[[5 ♦], [A ♦]]",True,16,100,24,"[[8 ♣], [7 ♥], [Q ♣]]",8,25,Loss


In [45]:
ps.get_win_rate()

0.4816423390346557