In [16]:
import sys
sys.path.insert(0, '/Users/yionko/Desktop/Reinforcement Learning')
from Blackjack import hit, putBack, valueSum, randomValue
from tqdm import tqdm
import copy

In [48]:

# THE SIMPLE POLICY: hit until >= 17
# Record the number/probability of winning of each choice.

def simplePolicy(n, dealerCard1, dealerCard2, playerCard1, 
                 playerCard2):
    
    cards = [1] * 52
    cardsCount = [4] * 13
    
    playerHistory = [playerCard1, playerCard2]
    dealerHistory = [dealerCard1, dealerCard2]
    
    for i in range(0, len(playerHistory)):
        hit(playerHistory[i], cards, cardsCount)
    for i in range(0, len(dealerHistory)):
        hitResult = hit(dealerHistory[i], cards, cardsCount)
    
    
    
    numOfWin = 0
    
    for i in tqdm(range(0, n)):
        cards1 = copy.deepcopy(hitResult[1])
        cardsCount1 = copy.deepcopy(hitResult[2])
        
        playerHistory = [playerCard1, playerCard2]

        # calculate valueSum as if it is for the dealer
        # because we treat Ace as one unless treating
        # it as 11 makes the total >= 17.
        playerSum = valueSum(playerHistory, 1)

        while playerSum < 17:
            value = randomValue(cards1) 
            hit(value, cards1, cardsCount1)
            playerHistory.append(value)
            playerSum = valueSum(playerHistory, 1)
        
        if playerSum > 21:
            continue
        else:
            dealerHistory = [dealerCard1, dealerCard2]
            dealerSum = valueSum(dealerHistory, 1)
            
            while dealerSum < 17:
                value = randomValue(cards1) 
                hit(value, cards1, cardsCount1)
                dealerHistory.append(value)
                dealerSum = valueSum(dealerHistory, 1)
            
        if dealerSum > 21:
            numOfWin += 1
        elif playerSum > dealerSum:
            numOfWin += 1
    return numOfWin / n
    
   


In [49]:
n = 1000000

dealerCard1 = 1
dealerCard2 = 6

playerCard1 = 5
playerCard2 = 9

winProb = simplePolicy(n, dealerCard1, dealerCard2, playerCard1, playerCard2)
print(winProb)

100%|██████████| 1000000/1000000 [01:02<00:00, 15930.90it/s]

0.336356





In [38]:
# CARDS and CARDSCOUNT should exclude dealerCard1 and 
# playerHistory. DealerCard2 is random to player.

def oneNodeStand(n, dealerCard1, playerHistory, cards, cardsCount):   
    probOfWinIfStand = 0
    
    
    playerSum = valueSum(playerHistory, 0)
    
    if playerSum > 21:
        return 0

    dealerBurstProb = 0

    numOfPlayerWin = 0
    
    for i in tqdm(range(0, n)):
        cards1 = copy.deepcopy(cards)
        cardsCount1 = copy.deepcopy(cardsCount)
    
        dealerHistory = [dealerCard1]
        
        value = randomValue(cards1)
        hit(value, cards1, cardsCount1)
        dealerHistory.append(value)
        
        dealerSum = valueSum(dealerHistory, 1)
        
        while dealerSum < 17:
            value = randomValue(cards1)
            hit(value, cards1, cardsCount1)
            dealerHistory.append(value)
            dealerSum = valueSum(dealerHistory, 1)
        #print(dealerHistory)
        if dealerSum > 21:
            numOfPlayerWin += 1
        elif playerSum > dealerSum:
            numOfPlayerWin += 1
            
    probOfWinIfStand = numOfPlayerWin / n

    return probOfWinIfStand

In [47]:
n = 1000000
dealerCard1 = 1
playerHistory = [5, 9]
cards = [1] * 52
cardsCount = [4] * 13

hit(dealerCard1, cards, cardsCount)
for i in range(0, len(playerHistory)):
        hit(playerHistory[i], cards, cardsCount)

probOfWinIfStand = oneNodeStand(n, dealerCard1, playerHistory, cards, cardsCount)
print(probOfWinIfStand)

100%|██████████| 1000000/1000000 [00:59<00:00, 16799.67it/s]

0.117258





In [50]:
# The player will hit if the probability of winning
# if hitting only one card is greater than the 
# probability of winning if she stands.

def hitAndStandStrategy(n, dealerCard1, playerHistory, cards, cardsCount):
    
    winProbStand = oneNodeStand(n, dealerCard1, playerHistory, cards, cardsCount)
       
    winProbHit = 0
    

    for j in range(1, 14):
        
        cards1 = copy.deepcopy(cards)
        cardsCount1 = copy.deepcopy(cardsCount)
        
        hitResult = hit(j, cards1, cardsCount1)
        if hitResult == None:
            hitProb = 0
            continue
        hitProb = hitResult[0]

        playerHistory.append(j)
        playerSum = valueSum(playerHistory, 0)
        if isinstance(playerSum, int) == False:
            # Treat Ace as 11 because it increases the winning probability
            # when player does not burst.
            playerSum = playerSum[1]

        if playerSum > 21:
            playerHistory.pop()
            continue
        else:
            winProbHit += hitProb * oneNodeStand(n, dealerCard1, playerHistory, 
                                                 cards1, cardsCount1)
        playerHistory.pop()
    if winProbHit > winProbStand:
        strategy = 1
    else:
        strategy = 0
    
    return strategy, winProbHit, winProbStand
    

In [46]:
n = 1000
dealerCard1 = 1
dealerHistory = [dealerCard1]

playerHistory = [5, 9]

cards = [1] * 52
cardsCount = [4] * 13

hit(dealerCard1, cards, cardsCount)
for i in range(0, len(playerHistory)):
    hit(playerHistory[i], cards, cardsCount)

hitAndStandStrategyResult = hitAndStandStrategy(n, dealerCard1, playerHistory, cards, cardsCount)
print(hitAndStandPolicyResult)

100%|██████████| 100000/100000 [00:05<00:00, 16825.14it/s]
100%|██████████| 100000/100000 [00:05<00:00, 17118.47it/s]
100%|██████████| 100000/100000 [00:05<00:00, 16969.39it/s]
100%|██████████| 100000/100000 [00:05<00:00, 16946.31it/s]
100%|██████████| 100000/100000 [00:05<00:00, 16895.07it/s]
100%|██████████| 100000/100000 [00:05<00:00, 16819.72it/s]
100%|██████████| 100000/100000 [00:05<00:00, 16755.80it/s]
100%|██████████| 100000/100000 [00:05<00:00, 16833.05it/s]

(0.16070142857142855, 0.1169)





In [67]:
def hitAndStandPolicy(n, dealerCard1, playerCard1, playerCard2):
    
    cards = [1] * 52
    cardsCount = [4] * 13
    
    playerHistory = [playerCard1, playerCard2]
    dealerHistory = [dealerCard1]
    
    for i in range(0, len(playerHistory)):
        hit(playerHistory[i], cards, cardsCount)
    for i in range(0, len(dealerHistory)):
        hitResult = hit(dealerHistory[i], cards, cardsCount)
    
    
    
    numOfWin = 0
    cards1 = copy.deepcopy(cards)
    cardsCount1 = copy.deepcopy(cardsCount)
    strategy = hitAndStandStrategy(n, dealerCard1, playerHistory, 
                                   cards1, cardsCount1)[0]
    for x in tqdm(range(0, n)):
        cards = [1] * 52
        cardsCount = [4] * 13

        playerHistory = [playerCard1, playerCard2]
        dealerHistory = [dealerCard1]

        for i in range(0, len(playerHistory)):
            hit(playerHistory[i], cards, cardsCount)
        for i in range(0, len(dealerHistory)):
            hitResult = hit(dealerHistory[i], cards, cardsCount)
    
        
        while strategy == 1:
            value = randomValue(cards)
            hit(value, cards, cardsCount)
            playerHistory.append(value)
            playerSum = valueSum(playerHistory, 0)

            if isinstance(playerSum, int) == True and playerSum > 21:
                break

            cards1 = copy.deepcopy(cards)
            cardsCount1 = copy.deepcopy(cardsCount)
            strategy = hitAndStandStrategy(n, dealerCard1, playerHistory, 
                                         cards1, cardsCount1)[0]
        
        
        playerSum = valueSum(playerHistory, 0)

        if isinstance(playerSum, int) == True and playerSum > 21:
            continue
        else:
            dealerHistory = [dealerCard1]
            dealerSum = valueSum(dealerHistory, 1)
            
            while dealerSum < 17:
                value = randomValue(cards) 
                hit(value, cards, cardsCount)
                dealerHistory.append(value)
                dealerSum = valueSum(dealerHistory, 1)
            
            if dealerSum > 21:
                numOfWin += 1
            elif playerSum > dealerSum:
                numOfWin += 1
    return numOfWin / n

In [69]:
n = 1000
dealerCard1 = 1
playerCard1 = 5
playerCard2 = 9
print(hitAndStandPolicy(n, dealerCard1, playerCard1, playerCard2))

100%|██████████| 1000/1000 [00:00<00:00, 12968.00it/s]
100%|██████████| 1000/1000 [00:00<00:00, 16018.15it/s]
100%|██████████| 1000/1000 [00:00<00:00, 16855.43it/s]
100%|██████████| 1000/1000 [00:00<00:00, 16461.94it/s]
100%|██████████| 1000/1000 [00:00<00:00, 16471.96it/s]
100%|██████████| 1000/1000 [00:00<00:00, 16529.15it/s]
100%|██████████| 1000/1000 [00:00<00:00, 16075.86it/s]
100%|██████████| 1000/1000 [00:00<00:00, 16139.45it/s]
  0%|          | 0/1000 [00:00<?, ?it/s]
100%|██████████| 1000/1000 [00:00<00:00, 16664.96it/s]

100%|██████████| 1000/1000 [00:00<00:00, 16813.26it/s]
100%|██████████| 1000/1000 [00:00<00:00, 5207.98it/s]

0.114



