In [3]:
import gym
import random

In [116]:
class BJEnv(gym.Env):
    """
    Blackjack
    """
    def __init__(self):
        self.action_space = gym.spaces.Discrete(2) # 0 hit, 1 stick
        
        obs_space = dict(
            player_sum = gym.spaces.Discrete(10), # x -> x + 11
            player_useful_Ace = gym.spaces.Discrete(2), # 0 no, 1 yes
            dealer_card = gym.spaces.Discrete(10) # 0 - Ace, x -> x + 1 (ex. 1 is 2, 9 is 10)
        )
        
        self.observation_space = gym.spaces.Dict(obs_space)
        self.reset()
    def _card_to_image(self, value):
        return "A" if value == 0 else value + 1
    def render(self):
        if self.state["player_useful_Ace"] > 0:
            print("Useful Ace")
        else:
            print("No useful Ace")
        print("Current sum: ", self.state["player_sum"])
        print("Dealer showing: ",  self._card_to_image(self.state["dealer_shown_card"]))
    def _random_card(self):
        return random.randint(0, 9) # 0 - Ace, x -> x + 1 (ex. 1 is 2, 9 is 10)
    def reset(self):
        self.state = dict(
            player_sum = random.randint(11, 21),
            player_useful_Ace = random.randint(0, 1),
            dealer_shown_card = self._random_card(),
            dealer_other_card = self._random_card()
        )
        self.state['dealer_sum'] = self.state['dealer_shown_card'] + 1 + self.state['dealer_other_card'] + 1
        if (self.state['dealer_shown_card'] == 0) or (self.state['dealer_other_card'] == 0):
            self.state['dealer_sum'] = self.state['dealer_sum'] + 10 if self.state['dealer_sum'] < 12 else self.state['dealer_sum']  
        obs = {
            k: v for k, v in self.state.items() if k in ['player_sum', 'player_useful_Ace', 'dealer_shown_card']
        }
        obs['player_sum'] -= 11
        return obs
    def step(self, action):
        done = None
        reward = None
        info = {}
        if action == 1: # stick
            done = True
            reward = 0 if self.state['player_sum'] < self.state['dealer_sum'] else 1
            info['dealer sum'] = self.state['dealer_sum'] 
        elif action == 0: # hit
            done = False
            reward = 0
            new_card = self._random_card()
            info['new card'] = self._card_to_image(new_card)
            self.state['player_sum'] += new_card + 1 # note since we start with 11 we cannot add another ACE.
            if self.state['player_sum'] > 21:
                if self.state['player_useful_Ace']:
                    self.state['player_sum'] -= 10
                else:
                    done = True
        else:
            assert False, f"unkown action {action}"
            
        return self.state, reward, done, info

In [117]:
env = BJEnv()

In [118]:
env.render()

No useful Ace
Current sum:  14
Dealer showing:  3


In [120]:
episodes = 5

for episode in range(episodes):
    print(f"Episode {episode}\n")
    obs = env.reset()
    while True:
        env.render()
        action = env.action_space.sample()
        obs, reward, done, info = env.step(action)
        print("action", action, "->", "reward", reward, "done", done, info, "\n")
        if done:
            break
    print("-" * 50)

Episode 0

No useful Ace
Current sum:  14
Dealer showing:  A
action 0 -> reward 0 done False {'new card': 4} 

No useful Ace
Current sum:  18
Dealer showing:  A
action 0 -> reward 0 done True {'new card': 10} 

--------------------------------------------------
Episode 1

No useful Ace
Current sum:  18
Dealer showing:  4
action 0 -> reward 0 done True {'new card': 4} 

--------------------------------------------------
Episode 2

Useful Ace
Current sum:  17
Dealer showing:  8
action 0 -> reward 0 done False {'new card': 9} 

Useful Ace
Current sum:  16
Dealer showing:  8
action 1 -> reward 1 done True {'dealer sum': 11} 

--------------------------------------------------
Episode 3

No useful Ace
Current sum:  11
Dealer showing:  2
action 1 -> reward 1 done True {'dealer sum': 6} 

--------------------------------------------------
Episode 4

No useful Ace
Current sum:  21
Dealer showing:  7
action 0 -> reward 0 done True {'new card': 2} 

----------------------------------------------