In [3]:
import numpy as np
import pickle

In [7]:
class BlackJackSolution:
    
    def __init__(self, lr=0.1, exp_rate=0.3):
        self.player_Q_Values = {}  # key: [(player_value, show_card, usable_ace)][action] = value
        # initialise Q values | (12-21) x (1-10) x (True, False) x (1, 0) 400 in total
        for i in range(12, 22):
            for j in range(1, 11):
                for k in [True, False]:
                    self.player_Q_Values[(i, j, k)] = {}
                    for a in [1, 0]:
                        if (i == 21) and (a == 0):
                            self.player_Q_Values[(i, j, k)][a] = 1
                        else:
                            self.player_Q_Values[(i, j, k)][a] = 0
        
        self.player_state_action = []
        self.state = (0, 0, False)  # initial state
        self.actions = [1, 0]  # 1: HIT  0: STAND
        self.end = False
        self.lr = lr
        self.exp_rate = exp_rate
    
    # give card
    @staticmethod
    def giveCard():
        # 1 stands for ace
        c_list = list(range(1, 11)) + [10, 10, 10]
        return np.random.choice(c_list)
    
    def dealerPolicy(self, current_value, usable_ace, is_end):
        if current_value > 21:
            if usable_ace:
                current_value -= 10
                usable_ace = False
            else:
                return current_value, usable_ace, True
        # HIT17
        if current_value >= 17:
            return current_value, usable_ace, True
        else:
            card = self.giveCard()
            if card == 1:
                if current_value <= 10:
                    return current_value+11, True, False
                return current_value+1, usable_ace, False
            else:
                return current_value+card, usable_ace, False
            
    def chooseAction(self):
        # if current value <= 11, always hit
        current_value = self.state[0]
        if current_value <= 11:
            return 1
        
        if np.random.uniform(0, 1) <= self.exp_rate:
            action = np.random.choice(self.actions)
#             print("random action", action)
        else:
            # greedy action
            v = -999
            action = 0
            for a in self.player_Q_Values[self.state]:
                if self.player_Q_Values[self.state][a] > v:
                    action = a
                    v = self.player_Q_Values[self.state][a]
#             print("greedy action", action)
        return action
            
    # one can only has 1 usable ace 
    # return next state
    def playerNxtState(self, action):
        current_value = self.state[0]
        show_card = self.state[1]
        usable_ace = self.state[2]
        
        if action:
            # action hit
            card = self.giveCard()
            if card == 1:
                if current_value <= 10:
                    current_value += 11
                    usable_ace = True
                else:
                    current_value += 1
            else:
                current_value += card
        else:
            # action stand
            self.end = True
            return (current_value, show_card, usable_ace)
        
        if current_value > 21:
            if usable_ace:
                current_value -= 10
                usable_ace = False
            else:
                self.end = True
                return (current_value, show_card, usable_ace)
        
        return (current_value, show_card, usable_ace)
        
    def winner(self, player_value, dealer_value):
        # player 1 | draw 0 | dealer -1
        winner = 0
        if player_value > 21:
            if dealer_value > 21:
                # draw
                winner = 0
            else:
                winner = -1
        else:
            if dealer_value > 21:
                winner = 1
            else:
                if player_value < dealer_value:
                    winner = -1
                elif player_value > dealer_value:
                    winner = 1
                else:
                    # draw
                    winner = 0
        return winner
    
    def _giveCredit(self, player_value, dealer_value):
        reward = self.winner(player_value, dealer_value)
        # backpropagate reward
        for s in reversed(self.player_state_action):
            state, action = s[0], s[1]
            reward = self.player_Q_Values[state][action] + self.lr*(reward - self.player_Q_Values[state][action])
            self.player_Q_Values[state][action] = round(reward, 3)
            reward = np.max(list(self.player_Q_Values[state].values()))  # Q-learning
            
    def reset(self):
        self.player_state_action = []
        self.state = (0, 0, False)  # initial state
        self.end = False
        
    def deal2cards(self, show=False):
        # return value after 2 cards and usable ace
        value, usable_ace = 0, False
        cards = [self.giveCard(), self.giveCard()]
        if 1 in cards:
            value = sum(cards)+10
            usable_ace = True
        else:
            value = sum(cards)
            usable_ace = False
        
        if show:
            return value, usable_ace, cards[0]
        else:
            return value, usable_ace

    def play(self, rounds=1000):
        for i in range(rounds):
            if i % 1000 == 0:
                print("round", i)
                
            # give 2 cards
            dealer_value, d_usable_ace, show_card = self.deal2cards(show=True)
            player_value, p_usable_ace = self.deal2cards(show=False)
            
            self.state = (player_value, show_card, p_usable_ace)
            print("init", self.state)
            
            # judge winner after 2 cards
            if player_value == 21 or dealer_value == 21:
                # game end
                # print("reach 21 in 2 cards: player value {} | dealer value {}".format(player_value, dealer_value))
                next
            else:
                while True:
                    action = self.chooseAction()  # state -> action 
                    # print("current value {}, action {}".format(self.state[0], action))
                    if self.state[0] >= 12:
                        state_action_pair = [self.state, action]
                        # print(state_action_pair)
                        self.player_state_action.append(state_action_pair)
                    # update next state
                    self.state = self.playerNxtState(action)
                    if self.end:
                        break    

                # dealer's turn
                is_end = False
                while not is_end:
                    dealer_value, d_usable_ace, is_end = self.dealerPolicy(dealer_value, d_usable_ace, is_end)

                # judge winner
                # give reward and update Q value
                player_value = self.state[0]
                print("player value {} | dealer value {}".format(player_value, dealer_value))
                self._giveCredit(player_value, dealer_value)
            # print("player state action", self.player_state_action)
            self.reset()
            
    def savePolicy(self, file="policy"):
        fw = open(file, 'wb')
        pickle.dump(self.player_Q_Values, fw)
        fw.close()

    def loadPolicy(self, file="policy"):
        fr = open(file,'rb')
        self.player_Q_Values = pickle.load(fr)
        fr.close()
        
    # trained robot play against dealer
    def playWithDealer(self, rounds=1000):
        self.reset()
        self.loadPolicy()
        self.exp_rate = 0
        
        result = np.zeros(3)  # player [win, draw, lose]
        for _ in range(rounds):
            # hit 2 cards each
            # give 2 cards
            dealer_value, d_usable_ace, show_card = self.deal2cards(show=True)
            player_value, p_usable_ace = self.deal2cards(show=False)
            
            self.state = (player_value, show_card, p_usable_ace)
            
            # judge winner after 2 cards
            if player_value == 21 or dealer_value == 21:
                if player_value == dealer_value:
                    result[1] += 1
                elif player_value > dealer_value:
                    result[0] += 1
                else:
                    result[2] += 1
            else:
                # player's turn
                while True:
                    action = self.chooseAction()
                    # update next state
                    self.state = self.playerNxtState(action)
                    if self.end:
                        break    

                # dealer's turn
                is_end = False
                while not is_end:
                    dealer_value, d_usable_ace, is_end = self.dealerPolicy(dealer_value, d_usable_ace, is_end)

                # judge
                player_value = self.state[0]
                # print("player value {} | dealer value {}".format(player_value, dealer_value))
                w = self.winner(player_value, dealer_value)
                if w == 1:
                    result[0] += 1
                elif w == 0:
                    result[1] += 1
                else:
                    result[2] += 1
            self.reset()
        return result

In [8]:
b = BlackJackSolution()
b.play(1)

round 0
init (20, 9, False)
player value 22 | dealer value 24


In [14]:
# winner test
for _ in range(10):
    p_value = np.random.choice(range(12, 30))
    d_value = np.random.choice(range(12, 30))
    r = b.winner(p_value, d_value)
    print(p_value, d_value, r)

18 16 1
22 29 0
16 25 1
13 26 1
21 18 1
14 23 1
13 18 -1
29 29 0
17 18 -1
28 27 0


In [26]:
# test dealer policy
for _ in range(10):
    print("------------------")
    dealer_value, d_usable_ace, is_end = 0, False, False
    while not is_end:
        dealer_value, d_usable_ace, is_end = b.dealerPolicy(dealer_value, d_usable_ace, is_end)
        print(dealer_value, d_usable_ace)

------------------
4 False
14 False
15 False
16 False
20 False
20 False
------------------
3 False
10 False
19 False
19 False
------------------
9 False
11 False
17 False
17 False
------------------
7 False
16 False
18 False
18 False
------------------
5 False
15 False
25 False
25 False
------------------
11 True
17 True
17 True
------------------
7 False
11 False
14 False
24 False
24 False
------------------
10 False
16 False
24 False
24 False
------------------
7 False
13 False
19 False
19 False
------------------
6 False
9 False
16 False
26 False
26 False


In [177]:
# test deal2cards
i, j = 0, 0
for _ in range(1000):
    p, _ = b.deal2cards()
    d, _ = b.deal2cards()
    if p == 21:
        i += 1
    if d == 21:
        j += 1
print(i, j)

50 47


In [179]:
# test next state
b = BlackJackSolution()
b.state = (19, 10, True)
print(b.playerNxtState(action=1))
print(b.end)

b = BlackJackSolution()
b.state = (11, 10, True)
print(b.playerNxtState(action=1))
print(b.end)

(12, 10, False)
False
(21, 10, True)
False


In [110]:
# test play
b = BlackJackSolution()
b.play(10)

round 0
[(17, 8, False), 1]
(18, 8, False)
[(18, 8, False), 1]
(21, 8, False)
[(21, 8, False), 1]
(27, 8, False)
[(19, 8, False), 1]
(22, 8, False)
[(17, 10, False), 1]
(22, 10, False)
(13, 10, False)
[(13, 10, False), 1]
(17, 10, False)
[(17, 10, False), 0]
(17, 10, False)
[(16, 10, True), 1]
(14, 10, False)
[(14, 10, False), 1]
(17, 10, False)
[(17, 10, False), 0]
(17, 10, False)
[(16, 10, False), 1]
(26, 10, False)
[(15, 7, False), 1]
(25, 7, False)
[(15, 10, False), 1]
(16, 10, False)
[(16, 10, False), 0]
(16, 10, False)
[(16, 8, False), 1]
(26, 8, False)
[(15, 4, False), 1]
(25, 4, False)


### Play

In [9]:
b = BlackJackSolution(exp_rate=0.2, lr=0.1)
b.play(10000)
b.savePolicy()

round 0
init (9, 6, False)
player value 29 | dealer value 17
init (19, 3, False)
player value 29 | dealer value 24
init (16, 1, False)
player value 23 | dealer value 20
init (7, 8, False)
player value 26 | dealer value 18
init (17, 10, True)
player value 17 | dealer value 24
init (19, 10, False)
player value 26 | dealer value 17
init (6, 6, False)
player value 30 | dealer value 23
init (9, 7, False)
player value 19 | dealer value 18
init (13, 6, False)
player value 23 | dealer value 20
init (15, 8, False)
player value 17 | dealer value 18
init (15, 8, False)
player value 29 | dealer value 18
init (13, 2, False)
player value 23 | dealer value 26
init (15, 10, False)
init (13, 8, False)
player value 23 | dealer value 21
init (17, 10, False)
player value 21 | dealer value 22
init (18, 10, True)
player value 27 | dealer value 19
init (4, 6, False)
player value 22 | dealer value 24
init (15, 10, True)
player value 17 | dealer value 18
init (18, 5, False)
player value 27 | dealer value 23
in

init (10, 9, False)
player value 26 | dealer value 20
init (13, 10, False)
player value 13 | dealer value 18
init (14, 10, False)
player value 24 | dealer value 19
init (12, 6, False)
player value 22 | dealer value 17
init (14, 10, False)
player value 16 | dealer value 20
init (12, 10, False)
player value 19 | dealer value 17
init (14, 10, True)
player value 18 | dealer value 20
init (10, 4, False)
player value 15 | dealer value 23
init (16, 2, False)
player value 19 | dealer value 20
init (5, 10, False)
player value 15 | dealer value 25
init (12, 7, False)
player value 20 | dealer value 18
init (6, 3, False)
player value 21 | dealer value 22
init (15, 5, False)
player value 15 | dealer value 18
init (14, 9, False)
player value 24 | dealer value 17
init (6, 2, False)
player value 28 | dealer value 19
init (16, 5, False)
player value 25 | dealer value 25
init (14, 10, False)
player value 14 | dealer value 25
init (9, 6, False)
player value 23 | dealer value 22
init (15, 4, False)
player

init (13, 6, False)
player value 13 | dealer value 26
init (14, 10, False)
player value 24 | dealer value 19
init (17, 10, False)
player value 20 | dealer value 17
init (11, 10, False)
player value 20 | dealer value 17
init (9, 3, False)
player value 20 | dealer value 18
init (14, 4, False)
player value 14 | dealer value 22
init (18, 10, True)
player value 18 | dealer value 17
init (20, 8, False)
player value 20 | dealer value 24
init (15, 10, False)
player value 15 | dealer value 18
init (12, 8, False)
player value 22 | dealer value 21
init (20, 9, False)
player value 20 | dealer value 18
init (12, 9, False)
player value 22 | dealer value 19
init (13, 8, True)
player value 18 | dealer value 19
init (15, 2, False)
player value 24 | dealer value 18
init (20, 6, False)
player value 20 | dealer value 21
init (15, 10, False)
player value 15 | dealer value 20
init (20, 10, False)
player value 20 | dealer value 20
init (19, 7, False)
player value 19 | dealer value 17
init (13, 1, False)
play

init (11, 9, False)
player value 21 | dealer value 23
init (16, 10, False)
player value 26 | dealer value 20
init (11, 9, False)
player value 23 | dealer value 19
init (14, 7, True)
player value 14 | dealer value 17
init (17, 6, False)
player value 27 | dealer value 18
init (11, 3, False)
player value 19 | dealer value 20
init (15, 8, False)
player value 24 | dealer value 18
init (12, 5, False)
player value 12 | dealer value 19
init (20, 1, False)
player value 23 | dealer value 21
init (17, 10, False)
init (8, 10, False)
player value 18 | dealer value 17
init (10, 10, False)
player value 13 | dealer value 21
init (20, 2, False)
player value 20 | dealer value 26
init (21, 8, True)
init (12, 8, False)
player value 20 | dealer value 20
init (20, 10, False)
player value 20 | dealer value 17
init (18, 10, False)
player value 18 | dealer value 17
init (10, 7, False)
player value 30 | dealer value 17
init (21, 10, True)
init (10, 10, False)
player value 18 | dealer value 20
init (14, 10, True

player value 13 | dealer value 24
init (21, 5, True)
init (18, 4, False)
player value 20 | dealer value 18
init (15, 10, False)
player value 21 | dealer value 20
init (17, 5, True)
player value 23 | dealer value 23
init (15, 10, False)
player value 25 | dealer value 19
init (6, 2, False)
player value 17 | dealer value 20
init (20, 10, False)
player value 20 | dealer value 20
init (11, 6, False)
player value 23 | dealer value 22
init (5, 6, False)
player value 14 | dealer value 26
init (20, 4, False)
player value 27 | dealer value 25
init (16, 10, False)
player value 23 | dealer value 20
init (12, 5, False)
player value 22 | dealer value 20
init (8, 4, False)
player value 28 | dealer value 19
init (19, 10, False)
player value 19 | dealer value 20
init (11, 1, False)
player value 12 | dealer value 18
init (10, 2, False)
player value 23 | dealer value 24
init (15, 4, False)
player value 15 | dealer value 18
init (21, 2, True)
init (14, 5, False)
player value 14 | dealer value 22
init (13,

player value 20 | dealer value 24
init (20, 9, False)
player value 26 | dealer value 19
init (13, 10, False)
player value 13 | dealer value 20
init (20, 4, False)
player value 26 | dealer value 24
init (5, 8, False)
player value 24 | dealer value 17
init (5, 8, False)
player value 15 | dealer value 21
init (13, 7, False)
player value 17 | dealer value 22
init (13, 8, False)
player value 18 | dealer value 17
init (9, 10, False)
player value 18 | dealer value 18
init (15, 9, False)
player value 15 | dealer value 19
init (17, 10, False)
player value 17 | dealer value 22
init (5, 10, False)
player value 25 | dealer value 19
init (19, 10, True)
init (15, 9, False)
player value 22 | dealer value 20
init (12, 6, False)
player value 21 | dealer value 26
init (16, 8, False)
player value 18 | dealer value 18
init (14, 10, False)
player value 18 | dealer value 20
init (12, 10, False)
player value 18 | dealer value 23
init (10, 9, False)
player value 18 | dealer value 20
init (14, 10, False)
playe

init (18, 2, True)
player value 18 | dealer value 21
init (19, 5, False)
player value 19 | dealer value 17
init (12, 10, False)
player value 20 | dealer value 20
init (14, 8, False)
player value 14 | dealer value 24
init (17, 6, False)
player value 17 | dealer value 17
init (13, 10, False)
player value 13 | dealer value 23
init (13, 10, False)
player value 25 | dealer value 19
init (8, 2, False)
player value 21 | dealer value 17
init (13, 8, False)
player value 22 | dealer value 19
init (18, 10, True)
player value 17 | dealer value 19
init (14, 10, True)
init (20, 10, False)
player value 20 | dealer value 20
init (7, 8, False)
player value 27 | dealer value 22
init (17, 10, False)
player value 17 | dealer value 19
init (15, 10, True)
player value 26 | dealer value 20
init (17, 10, False)
init (17, 1, False)
player value 17 | dealer value 19
init (13, 3, False)
player value 13 | dealer value 22
init (11, 6, False)
player value 22 | dealer value 26
init (21, 7, True)
init (20, 3, False)


init (20, 10, False)
player value 20 | dealer value 20
init (19, 9, False)
player value 19 | dealer value 18
init (20, 10, False)
player value 20 | dealer value 23
init (12, 5, False)
player value 19 | dealer value 22
init (9, 10, False)
player value 24 | dealer value 19
init (15, 7, False)
player value 26 | dealer value 26
init (10, 10, False)
player value 19 | dealer value 25
init (12, 10, False)
player value 19 | dealer value 19
init (9, 5, False)
player value 19 | dealer value 24
init (16, 3, False)
player value 18 | dealer value 22
init (14, 10, False)
player value 24 | dealer value 17
init (16, 5, False)
player value 16 | dealer value 25
init (8, 7, False)
player value 19 | dealer value 17
init (16, 2, True)
player value 19 | dealer value 25
init (10, 6, False)
player value 20 | dealer value 23
init (17, 10, False)
player value 17 | dealer value 20
init (6, 7, False)
player value 21 | dealer value 19
init (11, 8, False)
player value 25 | dealer value 17
init (19, 8, True)
player 

init (8, 8, False)
player value 18 | dealer value 25
init (7, 5, False)
player value 27 | dealer value 18
init (10, 10, False)
player value 27 | dealer value 24
init (13, 10, False)
player value 23 | dealer value 20
init (19, 7, False)
player value 19 | dealer value 17
init (13, 2, True)
player value 20 | dealer value 22
init (20, 6, False)
player value 24 | dealer value 17
init (9, 10, False)
player value 21 | dealer value 17
init (19, 10, False)
player value 19 | dealer value 20
init (8, 7, False)
player value 18 | dealer value 24
init (16, 5, False)
player value 16 | dealer value 18
init (17, 4, False)
player value 27 | dealer value 20
init (14, 2, True)
player value 15 | dealer value 17
init (20, 5, False)
player value 20 | dealer value 18
init (14, 8, False)
player value 14 | dealer value 25
init (20, 3, False)
player value 30 | dealer value 20
init (16, 1, True)
player value 26 | dealer value 20
init (16, 7, False)
player value 22 | dealer value 21
init (14, 1, False)
init (7, 3,

init (15, 1, False)
init (19, 10, False)
player value 19 | dealer value 22
init (17, 4, False)
player value 24 | dealer value 23
init (10, 7, False)
player value 17 | dealer value 18
init (9, 1, False)
init (13, 10, False)
player value 19 | dealer value 18
init (18, 2, False)
player value 18 | dealer value 18
init (12, 3, False)
player value 21 | dealer value 21
init (12, 10, False)
init (18, 10, False)
player value 26 | dealer value 20
init (16, 5, False)
player value 16 | dealer value 20
init (14, 1, False)
player value 14 | dealer value 22
init (17, 10, False)
player value 27 | dealer value 17
init (20, 10, False)
init (16, 9, False)
player value 18 | dealer value 17
init (14, 3, False)
player value 14 | dealer value 17
init (19, 10, False)
player value 19 | dealer value 20
init (15, 10, False)
player value 15 | dealer value 21
init (6, 2, False)
player value 25 | dealer value 18
init (14, 10, False)
player value 21 | dealer value 20
init (13, 8, False)
player value 20 | dealer valu

init (19, 10, False)
player value 19 | dealer value 20
init (11, 2, False)
player value 14 | dealer value 18
init (13, 6, True)
player value 16 | dealer value 17
init (12, 8, False)
player value 26 | dealer value 18
init (14, 10, False)
player value 25 | dealer value 17
init (9, 10, False)
player value 18 | dealer value 18
init (6, 7, False)
player value 19 | dealer value 18
init (16, 7, False)
player value 20 | dealer value 23
init (15, 10, False)
init (17, 7, False)
player value 27 | dealer value 18
init (15, 4, False)
player value 25 | dealer value 23
init (18, 2, True)
player value 15 | dealer value 17
init (20, 6, False)
player value 20 | dealer value 23
init (20, 6, False)
player value 20 | dealer value 18
init (11, 1, False)
init (15, 10, True)
player value 15 | dealer value 22
init (10, 10, False)
player value 20 | dealer value 22
init (9, 10, False)
player value 20 | dealer value 17
init (7, 8, False)
player value 28 | dealer value 18
init (10, 4, False)
player value 19 | deal

init (12, 10, False)
init (9, 6, False)
player value 19 | dealer value 22
init (15, 4, False)
player value 15 | dealer value 21
init (12, 2, False)
player value 22 | dealer value 19
init (19, 7, False)
player value 19 | dealer value 17
init (13, 5, False)
player value 22 | dealer value 18
init (16, 3, False)
player value 17 | dealer value 22
init (6, 1, False)
player value 27 | dealer value 21
init (12, 6, False)
player value 12 | dealer value 26
init (16, 7, False)
player value 26 | dealer value 17
init (17, 6, False)
player value 17 | dealer value 20
init (15, 9, False)
player value 15 | dealer value 19
init (19, 8, False)
player value 19 | dealer value 23
init (20, 4, False)
player value 20 | dealer value 22
init (21, 10, True)
init (11, 10, False)
player value 23 | dealer value 18
init (20, 4, True)
player value 27 | dealer value 22
init (15, 8, False)
player value 15 | dealer value 18
init (13, 2, False)
player value 22 | dealer value 22
init (16, 7, True)
player value 16 | dealer

player value 18 | dealer value 21
init (9, 10, False)
init (19, 3, False)
player value 28 | dealer value 19
init (9, 5, False)
player value 24 | dealer value 22
init (17, 3, True)
player value 17 | dealer value 23
init (19, 1, True)
player value 19 | dealer value 20
init (16, 9, False)
player value 26 | dealer value 19
init (20, 5, False)
player value 20 | dealer value 19
init (15, 10, False)
player value 25 | dealer value 23
init (21, 10, True)
init (20, 5, False)
player value 20 | dealer value 25
init (21, 8, True)
init (16, 10, False)
player value 26 | dealer value 18
init (13, 3, False)
player value 19 | dealer value 21
init (13, 10, False)
player value 21 | dealer value 20
init (20, 8, False)
player value 20 | dealer value 18
init (14, 2, True)
player value 24 | dealer value 25
init (15, 7, False)
player value 25 | dealer value 17
init (9, 10, False)
player value 19 | dealer value 20
init (19, 2, True)
player value 19 | dealer value 24
init (17, 10, False)
player value 27 | dealer

init (12, 10, False)
player value 22 | dealer value 20
init (18, 9, False)
player value 18 | dealer value 20
init (15, 2, False)
player value 20 | dealer value 19
init (9, 4, False)
player value 21 | dealer value 23
init (19, 7, False)
player value 29 | dealer value 17
init (6, 5, False)
player value 15 | dealer value 24
init (15, 10, False)
player value 20 | dealer value 19
init (21, 8, True)
init (21, 5, True)
init (15, 8, True)
player value 15 | dealer value 18
init (7, 8, False)
player value 24 | dealer value 22
init (7, 8, False)
player value 25 | dealer value 17
init (12, 5, False)
player value 22 | dealer value 17
init (14, 6, False)
player value 14 | dealer value 23
init (11, 8, False)
player value 18 | dealer value 19
init (11, 5, False)
player value 19 | dealer value 19
init (20, 9, False)
player value 20 | dealer value 19
init (21, 5, True)
init (9, 7, False)
player value 18 | dealer value 18
init (14, 4, True)
player value 14 | dealer value 19
init (13, 10, False)
player va

init (13, 10, False)
player value 23 | dealer value 20
init (7, 5, False)
player value 27 | dealer value 22
init (15, 5, False)
player value 15 | dealer value 23
init (13, 1, False)
init (16, 10, False)
player value 16 | dealer value 20
init (17, 5, False)
player value 27 | dealer value 23
init (13, 9, False)
player value 17 | dealer value 18
init (15, 1, False)
player value 15 | dealer value 17
init (20, 9, False)
player value 20 | dealer value 19
init (20, 10, False)
player value 20 | dealer value 19
init (16, 10, False)
player value 16 | dealer value 20
init (12, 1, False)
player value 12 | dealer value 17
init (11, 3, False)
player value 21 | dealer value 22
init (19, 10, False)
player value 19 | dealer value 23
init (20, 3, False)
player value 20 | dealer value 25
init (20, 3, False)
player value 20 | dealer value 21
init (13, 8, False)
player value 23 | dealer value 18
init (7, 8, False)
player value 27 | dealer value 18
init (21, 7, True)
init (10, 10, False)
player value 20 | d

player value 17 | dealer value 24
init (14, 10, True)
player value 24 | dealer value 26
init (10, 7, False)
player value 17 | dealer value 19
init (20, 10, False)
player value 20 | dealer value 19
init (19, 8, True)
player value 19 | dealer value 18
init (13, 2, False)
player value 21 | dealer value 19
init (14, 4, False)
player value 24 | dealer value 18
init (14, 9, False)
player value 20 | dealer value 19
init (14, 4, False)
player value 17 | dealer value 24
init (13, 9, False)
player value 23 | dealer value 19
init (17, 3, True)
player value 17 | dealer value 20
init (14, 9, False)
player value 24 | dealer value 19
init (16, 10, False)
player value 16 | dealer value 19
init (9, 2, False)
player value 19 | dealer value 21
init (17, 7, False)
player value 17 | dealer value 19
init (11, 5, False)
player value 16 | dealer value 25
init (15, 10, False)
player value 15 | dealer value 20
init (20, 4, False)
player value 21 | dealer value 20
init (18, 7, False)
player value 18 | dealer val

init (6, 4, False)
player value 16 | dealer value 18
init (10, 10, False)
player value 19 | dealer value 20
init (10, 10, False)
player value 19 | dealer value 26
init (17, 9, False)
player value 17 | dealer value 19
init (14, 4, False)
player value 24 | dealer value 22
init (16, 2, False)
player value 24 | dealer value 17
init (9, 6, False)
player value 18 | dealer value 19
init (4, 7, False)
player value 17 | dealer value 22
init (17, 10, False)
init (9, 2, False)
player value 19 | dealer value 20
init (12, 10, False)
player value 22 | dealer value 20
init (21, 3, True)
init (13, 1, False)
player value 13 | dealer value 17
init (18, 5, False)
player value 18 | dealer value 22
init (10, 6, False)
player value 20 | dealer value 21
init (16, 3, False)
player value 16 | dealer value 22
init (16, 3, False)
player value 16 | dealer value 23
init (15, 1, False)
init (21, 3, True)
init (20, 4, True)
player value 21 | dealer value 21
init (17, 9, False)
player value 17 | dealer value 18
init 

player value 24 | dealer value 19
init (13, 10, False)
player value 23 | dealer value 18
init (13, 6, False)
player value 13 | dealer value 21
init (17, 10, True)
player value 17 | dealer value 25
init (12, 8, False)
player value 18 | dealer value 25
init (11, 4, False)
player value 18 | dealer value 18
init (18, 10, False)
player value 18 | dealer value 20
init (15, 8, False)
player value 22 | dealer value 21
init (13, 1, False)
player value 23 | dealer value 21
init (20, 10, False)
player value 20 | dealer value 23
init (18, 2, False)
player value 18 | dealer value 17
init (6, 7, False)
player value 20 | dealer value 17
init (20, 1, False)
player value 20 | dealer value 19
init (4, 10, False)
init (15, 7, False)
player value 21 | dealer value 18
init (15, 2, False)
player value 21 | dealer value 17
init (21, 8, True)
init (15, 2, False)
player value 15 | dealer value 18
init (13, 10, False)
player value 23 | dealer value 20
init (16, 1, False)
player value 23 | dealer value 20
init (

player value 14 | dealer value 23
init (13, 5, False)
player value 25 | dealer value 22
init (14, 10, False)
player value 14 | dealer value 20
init (17, 1, True)
player value 25 | dealer value 19
init (18, 2, False)
player value 18 | dealer value 23
init (14, 8, False)
player value 18 | dealer value 18
init (18, 8, False)
player value 18 | dealer value 21
init (13, 10, False)
init (11, 7, False)
player value 19 | dealer value 21
init (20, 8, False)
player value 20 | dealer value 18
init (15, 3, False)
player value 25 | dealer value 21
init (15, 3, False)
player value 24 | dealer value 26
init (17, 10, False)
player value 24 | dealer value 26
init (20, 10, False)
player value 20 | dealer value 18
init (17, 1, True)
player value 17 | dealer value 20
init (11, 3, False)
player value 23 | dealer value 23
init (18, 2, False)
player value 18 | dealer value 18
init (13, 3, False)
player value 21 | dealer value 17
init (8, 7, False)
player value 25 | dealer value 23
init (8, 2, False)
player v

init (14, 2, False)
player value 17 | dealer value 21
init (9, 8, False)
player value 18 | dealer value 19
init (20, 5, False)
player value 20 | dealer value 24
init (11, 10, False)
player value 18 | dealer value 20
init (9, 7, False)
player value 12 | dealer value 20
init (15, 2, False)
player value 25 | dealer value 24
init (12, 10, False)
player value 18 | dealer value 20
init (17, 1, True)
init (11, 10, False)
init (16, 4, True)
player value 16 | dealer value 21
init (19, 10, False)
player value 19 | dealer value 17
init (17, 4, False)
player value 17 | dealer value 18
init (11, 2, False)
player value 20 | dealer value 20
init (14, 8, False)
player value 18 | dealer value 22
init (18, 5, True)
player value 23 | dealer value 24
init (18, 4, False)
player value 18 | dealer value 23
init (9, 5, False)
player value 19 | dealer value 20
init (13, 10, False)
player value 26 | dealer value 24
init (20, 1, False)
player value 20 | dealer value 20
init (14, 7, True)
player value 23 | dealer

In [10]:
b.playWithDealer(10000)

array([4066., 1524., 4410.])

In [24]:
for k, v in b.player_Q_Values.items():
    actions = b.player_Q_Values.get(k)
    action = max(actions.keys(), key=lambda k: actions[k])
    action = "HIT" if action == 1 else "STAND"
    print(k, action)

(12, 1, True) HIT
(12, 1, False) HIT
(12, 2, True) STAND
(12, 2, False) HIT
(12, 3, True) HIT
(12, 3, False) STAND
(12, 4, True) STAND
(12, 4, False) HIT
(12, 5, True) HIT
(12, 5, False) STAND
(12, 6, True) HIT
(12, 6, False) HIT
(12, 7, True) HIT
(12, 7, False) HIT
(12, 8, True) HIT
(12, 8, False) HIT
(12, 9, True) HIT
(12, 9, False) HIT
(12, 10, True) HIT
(12, 10, False) HIT
(13, 1, True) HIT
(13, 1, False) HIT
(13, 2, True) HIT
(13, 2, False) HIT
(13, 3, True) HIT
(13, 3, False) HIT
(13, 4, True) HIT
(13, 4, False) HIT
(13, 5, True) HIT
(13, 5, False) HIT
(13, 6, True) HIT
(13, 6, False) STAND
(13, 7, True) HIT
(13, 7, False) HIT
(13, 8, True) HIT
(13, 8, False) STAND
(13, 9, True) HIT
(13, 9, False) HIT
(13, 10, True) HIT
(13, 10, False) STAND
(14, 1, True) HIT
(14, 1, False) HIT
(14, 2, True) HIT
(14, 2, False) HIT
(14, 3, True) HIT
(14, 3, False) STAND
(14, 4, True) HIT
(14, 4, False) STAND
(14, 5, True) HIT
(14, 5, False) HIT
(14, 6, True) HIT
(14, 6, False) HIT
(14, 7, True) HI

In [211]:
b.player_Q_Values.get((12, 6, True)) 

{1: 0.00787588976344472, 0: 0.1}

#### Play with same strategy

In [221]:
q_values = {}

for i in range(12, 22):
    for j in range(1, 11):
        for k in [True, False]:
            q_values[(i, j, k)] = {}
            for a in [1, 0]:
                if i >= 17 and a == 0:
                    q_values[(i, j, k)][a] = 1
                elif i < 17 and a == 1:
                    q_values[(i, j, k)][a] = 1
                else:
                    q_values[(i, j, k)][a] = 0

In [222]:
b = BlackJackSolution()
b.player_Q_Values = q_values

b.playWithDealer(rounds=10000)

array([4209., 1447., 4344.])