In [195]:
import random
import copy as cp
import itertools
import gym
import numpy as np
from gym import spaces

We will do everything based on indexes in a list array like the following to facilitate accessing it:

0 : Black

1 : Blue

2 : Green

3 : Red

4 : White

5 : Joker 

### Agent:

In [196]:
class SplendorAgent:
    '''Our agent will, based on its state(gems/cards/nobles) get the set of legal actions and then execute the chosen one'''
    MAX_GEMS_SUPPLY=10 #general attribute to call and compare againtst gems
    MAX_JOKERS=3
    def __init__(self, gem_reserve=None, cards_player=None, players_state=None, nobles=None):
        self.gem_reserve = gem_reserve if gem_reserve is not None else [0,0,0,0,0,0] # gems in the agents hand initializes as 0
        self.player_cards = cards_player if cards_player is not None else [] # cars in the player power
        self.player_reserved_cards = [] #cards reserved by the player
        self.nobles = nobles if nobles is not None else [] #nobles the player has
        self.buying_power = self.gem_reserve # buying power is initializes equal to the amount of gems of each kind
        self.points = 0

        self.binary_lists_with_sum_of_3 = list(filter(lambda binary_list: sum(binary_list) == 3, itertools.product([0, 1], repeat=5)))
        self.binary_lists_with_sum_of_2 = list(filter(lambda binary_list: sum(binary_list) == 2, itertools.product([0, 1], repeat=5)))
    
    def reset(self):
        self.gem_reserve = [0, 0, 0, 0, 0, 0] # Reset gems in the agent's hand to 0
        self.player_cards = [] # Reset cards in the player's possession
        self.player_reserved_cards = [] # Reset reserved cards
        self.nobles = [] # Reset nobles
        self.buying_power = [0, 0, 0, 0, 0, 0] # Reset buying power to 0
        self.points = 0 # Reset points to 0

        self.binary_lists_with_sum_of_3 = list(filter(lambda binary_list: sum(binary_list) == 3, itertools.product([0, 1], repeat=5)))
        self.binary_lists_with_sum_of_2 = list(filter(lambda binary_list: sum(binary_list) == 2, itertools.product([0, 1], repeat=5)))
    
    def UpdateBuyingPower(self): 
        ''' Buying power is gems + cards bought of each color'''
        self.buying_power =cp.deepcopy(self.gem_reserve)
        for card in self.player_cards:
            self.buying_power[card.color] +=1 # if you have bought a card of said color you get an extra buying unit for that color.

    def get_legal_actions(self,spl_env):
        '''I'll try to get all legal actions as indexes:
        Buy Card , (i,j) #specifies which card to buy
        Reserve Card, (i,j) #card to reserve
        Buy reserved (i) # index in reserved cards.
        Take Gems, [x,x,x,x,x] #gems to take
        ''' 

        ##First we get all legal actions:
        legal_actions=[] #57 available actions:
        ##Check cards that can be bought based on updated buying power'''
        card_buy = self.get_purchasable_cards(spl_env)
        legal_actions.extend(card_buy)
        ##Now reservable card(should be all as long as we have enough space and gems)'''
        reserve_cards=self.get_reservable_cards(spl_env)
        legal_actions.extend(reserve_cards)
        ##We get the purchasable reserved cards:
        buyable_reserved_cards=self.get_purchasable_reserved()
        legal_actions.extend(buyable_reserved_cards)
        ##Takeable gems combinations:
        gem_buy=self.get_takable_gems(spl_env)
        legal_actions.extend(gem_buy)

        return legal_actions       
#        return self.transform_legal_actions(legal_actions)

    def get_purchasable_cards(self,spl_env):
        '''We get all purchasable cards based on buying power(gems + cards) and return the index (level, column) in list format'''
        card_buy=[]
        for board in [spl_env.board_1, spl_env.board_2, spl_env.board_3]:   
                for card in board:
                    over = 0
                    for i in range(len(card.cost)):
                        over += max(0, card.cost[i]- self.buying_power[i])
                    if over <= self.buying_power[-1]:
                        card_buy.append((card.level-1,board.index(card))) # we append a tuple containing level(0-2),index in board(0-3)
        return  [('buy card', x) for x in card_buy]

    def get_reservable_cards(self,spl_env):
        '''Check reservable cards based on number of gems and number of reserved cards.'''
        reserve_cards = []
        if spl_env.gems[-1] >0 and len(self.player_reserved_cards) < SplendorAgent.MAX_JOKERS:
            for board in [spl_env.board_1, spl_env.board_2, spl_env.board_3]:
                    for card in board:
                            reserve_cards.append((card.level-1,board.index(card)))
        return [('reserve', x) for x in reserve_cards]         
    
    
    def get_purchasable_reserved(self):
        '''Checks fi we can buy our reserved cards and return index of reserved card purchasable'''
        buyable_reserved_cards = []
        for card in self.player_reserved_cards:
                    over = 0
                    for i in range(len(card.cost)):
                        over += max(0, card.cost[i]- self.buying_power[i])

                    if over <= self.buying_power[-1]:
                        buyable_reserved_cards.append(self.player_reserved_cards.index(card)) # returns index in reserved cards 0-2
        return [('buy reserved', x) for x in buyable_reserved_cards]
    
    def get_takable_gems(self,spl_env):
        '''We check all possible combinations of gem taking doubles and triples.'''
        #make the list of gems currently takable
        doable_3gems = [] #all possible combinations for 3 gems
        for comb in self.binary_lists_with_sum_of_3:
            if  min([spl_env.gems[i] - comb[i] for i in range(len(comb))]) >=0:
                doable_3gems.append(comb)

        doable_2gems = [] #possible combinations for 2 gems (same color)
        for comb in self.binary_lists_with_sum_of_2:
            if  min([spl_env.gems[i] - comb[i] for i in range(len(comb))]) >=0:
                doable_2gems.append(comb)        
    
        gem_buy= []

        # If gem supply is 10, don't consider taking gems
        if sum(self.gem_reserve) == SplendorAgent.MAX_GEMS_SUPPLY:
            pass # don't return gems to take.
        
        #append the list of 3 gems if the player has already 7 gems or less
        if sum(self.gem_reserve) <= 7:
            gem_buy.extend(doable_3gems)

        #append the list of 2 gems if the player has already 8

        if sum(self.gem_reserve) == 8:
            gem_buy.extend(doable_2gems)
        
        # append the list of 1 gem if the player has already 9
        
        if sum(self.gem_reserve) == 9:
            gem_buy.extend([[1,0,0,0,0], [0,1,0,0,0], [0,0,1,0,0], [0,0,0,1,0], [0,0,0,0,1]])

        #Appends the duos
        ##If the supply of gems is greater than 3 we can alwasy take 2 of the same gems.
        for x in range(len(spl_env.gems[:5])):
                list_empty = [0,0,0,0,0]
                if spl_env.gems[x] >3:
                    list_empty[x] = 2
                    gem_buy.append(list_empty)
        return [('take', x) for x in gem_buy]

    def transform_legal_actions(slef,legal_actions):
        actions_dict = {'buy_card': [], 'reserve_card': [], 'buy_reserved': [], 'pick_tokens': []}
        
        for action in legal_actions:
            action_type = action[0]
            params = action[1]
            
            if action_type == 'buy card':
                actions_dict['buy_card'].append(params)
            elif action_type == 'reserve':
                actions_dict['reserve_card'].append(params)
            elif action_type == 'buy reserved':
                actions_dict['buy_reserved'].append(params)
            elif action_type == 'take':
                actions_dict['pick_tokens'].append(params)
                
        return actions_dict
                   

    def execute_action(self, action, spl_env): 
        '''Once an actions has been chosen we execute it by calling the action function'''

        # execute the specified action and update the state
        action_type, action_params = action
        if action_type == 'buy card':
            index = action_params
            self.buy_card(index, spl_env)

        elif action_type == 'buy reserved':
            index = action_params
            self.buy_reserved_card(index, spl_env)

        elif action_type == 'reserve':
            index = action_params
            self.reserve_card(index, spl_env)

        elif action_type == 'take':
            gems = action_params
            self.take_gems(gems, spl_env)

    def buy_card(self, index, spl_env):
        '''We buy the selected card and update the environment by removing the bought card from the board, updating player's gems and supply gems'''
        # update gem and card supply and player's state when a card is bought
        #we get the card from the index:
        card=None
        while card is None:
            for board in [spl_env.board_1, spl_env.board_2, spl_env.board_3]:
                        for cards in board:
                             if index == (cards.level-1,board.index(cards)):
                                  card=cards
                                
        #Before adding card to players hand we need to check how much buying power do our current cards give us:
        
        #extract the colors of the current cards
        cards_color = [x.color for x in self.player_cards]

        #get our current color portofolio = buying power without gems
        cards_sum = [0,0,0,0,0]
        for x in cards_color:
             cards_sum[x] +=1
        
        #add card to players cards
        self.player_cards.append(card)

        ##Now we remove it from the board
        print('cards power:',cards_sum)
        print('cost:',card.cost)
        if card.level ==1:
             spl_env.board_1.remove(card)
        
        elif card.level ==2:
             spl_env.board_2.remove(card)

        elif card.level ==1:
             spl_env.board_3.remove(card)

        ##First check from the cost we substract our current free buying power.
        new_cost=[0,0,0,0,0]
        for i in range(len(card.cost)):
            new_cost[i]=max(0,card.cost[i]-cards_sum[i])
        
        print('Discounted card cost:',new_cost)


        #compute required tokens
        over = 0
        print('orig gem reserve:',self.gem_reserve)
        print('orig gem supply:',spl_env.gems)
        #print('card buying power:',self.)
        '''I'm using gem_reserve instead of buying power, I think it will be better to keep the values from gem reserve and cards separated to know what gems to resupply and which not'''
        for i in range(len(card.cost)):
            #update to know if we have to spend jokers
            over = max(0, new_cost[i]- self.gem_reserve[i]) #jokers needed for this color
            #print('index:',i,'over:',over)
            #update the number of reamining gems
            #print('Buying power:',self.buying_power[i],self.gem_reserve[i])
            gem_transaction = max(0, new_cost[i] - over)# after substracting cards + joker we get the amount to pay.
            #gem_transaction = max(0, card.cost[i] - self.buying_power[i] + self.gem_reserve[i])
            if over>self.gem_reserve[5]:
                 print(over,self.gem_reserve)
                 raise ValueError("Too many jokers")
            
            
            #print('gem_transaction?',gem_transaction)
            self.gem_reserve[i] -= gem_transaction
            spl_env.gems[i]  += gem_transaction
            self.gem_reserve[5]-=over
            spl_env.gems[5]+=over
        print('New gem reserve:',self.gem_reserve)
        print('new gem supply:',spl_env.gems)


        ##update number of jokers    
        #self.gem_reserve[5] -= over
        #spl_env.gems[5] += over


    def reserve_card(self, index, spl_env):
        '''We reserve the selected card and remove it from the board'''
        ##Get the card:
        card=None
        while card is None:
            for board in [spl_env.board_1, spl_env.board_2, spl_env.board_3]:
                        for cards in board:
                             if index == (cards.level-1,board.index(cards)):
                                  card=cards
        #add card to players reserved cards
        self.player_reserved_cards.append(card)

        #get rid of the card on the board
        if card.level ==1:
             spl_env.board_1.remove(card)
        
        elif card.level ==2:
             spl_env.board_2.remove(card)

        elif card.level ==3:
             spl_env.board_3.remove(card)

        #get a joker
        self.gem_reserve[5] += 1 # add joker to hand.
        spl_env.gems[5]-=1 # added removal of joker from board

    def buy_reserved_card(self, index, spl_env):
        '''We buy the selected reserved card and update the environment'''
        ##Get reserved card:
        card=None
        while card is None:
            for cards in self.player_reserved_cards:            
                if index == self.player_reserved_cards.index(cards):        
                     card=cards



        cards_color = [x.color for x in self.player_cards]

        #get our current color portofolio = buying power without gems
        cards_sum = [0,0,0,0,0]
        for x in cards_color:
             cards_sum[x] +=1
        
        new_cost=[0,0,0,0,0]
        for i in range(len(card.cost)):
            new_cost[i]=max(0,card.cost[i]-cards_sum[i])
        
        print('Discounted card cost:',new_cost)
        # update gem and card supply and player's state when a card is bought
        #add card to players cards
        self.player_cards.append(card)

        #compute required tokens
        over = 0
        print('reserve',self.gem_reserve)
        for i in range(len(card.cost)):
            #update to know if we have to spend jokers
            #over += max(0, card.cost[i]- self.buying_power[i])
            over=max(0,new_cost[i]-self.gem_reserve[i])
            #update the number of reamining gems
            #gem_transaction = max(0, card.cost[i] - self.buying_power[i] + self.gem_reserve[i])
            if over>self.gem_reserve[5]:
                 print(over, self.gem_reserve)
                 raise ValueError("Too many jokers")
            gem_transaction = max(0,new_cost[i]-over)
            self.gem_reserve[i] -=  gem_transaction
            spl_env.gems[i]  += gem_transaction
            spl_env.gems[5]+=over
            self.gem_reserve[5]-=over
        #update number of jokers    
        #self.gem_reserve[5] -= over
        #spl_env.gems[5] += over


    def take_gems(self, gems_array, spl_env):
        '''Add gems from supply to player's reserve and remove it from supply.'''

        # simply add the gems to the players count
        for i in range(len(gems_array)):
            self.gem_reserve[i] += gems_array[i]
            spl_env.gems[i] -= gems_array[i]
    def count_point(self):
        '''Count points and win condition'''

        points =  0
        win = False
        for cards in self.player_cards:
              points +=cards.points
        
        for noble in self.nobles:
             points +=noble[1]
        
        if points >= 15:
             win = True
        self.points = points
        return points, win

### ENVIRONMENT from OpenAI

In [197]:


class SplendorEnv(gym.Env):
    def __init__(self, card_supply, deck_nobles, initial_gems = [7,7,7,7,7,5]):
        super(SplendorEnv, self).__init__()
        '''Let's define the initial state and variables.'''
        num_card_tiers = 3
        cards_per_tier = 4
        num_token_colors = 5
        num_nobles = 3
    
        #creates the card decks
        deck=cp.deepcopy(card_supply) #since we are using pop we need to define a new list removing references. 
        self.deck_1 = deck[0]
        self.deck_2 = deck[1]
        self.deck_3 = deck[2]

        #creates the cardboard
        self.board_1 = []
        self.board_2 = []
        self.board_3 = []

        self.UpdateCardBoard()

        #creates the noble board
        self.nobles = []
        self.CreateNobles(deck_nobles)

        #Create the gems
        self.gems= initial_gems
        
        ##Valid actions:
        self.valid_actions=None

        self.score=0

        # Define state space
        #self.state_space = spaces.Dict({
        #    'cards': spaces.Box(low=0, high=1, shape=(num_card_tiers, cards_per_tier), dtype=np.integereger),#3x4 matrix
        #    'nobles': spaces.Box(low=0, high=1, shape=(num_nobles,), dtype=np.int), # 3x1 array
        #    'token_piles': spaces.Box(low=0, high=7, shape=(num_token_colors,), dtype=np.integereger), # array
        #    'reserved_cards': spaces.Box(low=0, high=1, shape=(num_card_tiers, cards_per_tier), dtype=np.int), #
        #    'cards_owned': spaces.Box(low=0, high=10, shape=(5,), dtype=np.int),    # Number of cards owned for each color
        #    'player_gems': spaces.Box(low=0, high=10, shape=(6,), dtype=np.int),    # Number of gems owned for each color
        #    'player_points': spaces.Box(low=0, high=25, shape=(1,), dtype=np.int)  # Player's total points
        #})
        ##Simpler version:
        self.observation_space = gym.spaces.Dict({
            'player_gems': spaces.Box(low=0, high=7, shape=(num_token_colors+1,), dtype=np.integer), #array 6 elements
            'gems_supply': spaces.Box(low=0, high=7, shape=(num_token_colors+1,), dtype=np.integer), #array 6
            'player_cards': spaces.Box(low=0, high=np.inf, shape=(num_token_colors,), dtype=np.integer), #array 5
            'player_score': spaces.Discrete(30), # integer
            'nobles': spaces.Tuple(tuple([spaces.Tuple((spaces.MultiDiscrete([1,1,1,1,1]), spaces.Discrete(5))) for _ in range(num_nobles)])), #cost and points per noble
            'player_reserved':spaces.Discrete(2)
            })
        
        ##Max purchase : 12 cards
        ##Max reserve = 12 cards
        ##Max buy purchase = 3
        ###Note for max_gem combination:
            #3 out of 5 gems of different colors = 10 options
            #2 out of 5 of same color = 5 options
            #2 out of 5 of different color = 10  options
            # 1 out of 5 =5 options
            ##Total = 30  options.
        # Define action space
        
        self.action_space = spaces.Dict({
            'action_type': spaces.Discrete(3),  # Action type (0-2)
            'purchase_card': spaces.Tuple((
                spaces.Discrete(num_card_tiers),    # Card tier (0-2)
                spaces.Discrete(cards_per_tier),    # Card index (0-cards_per_tier)
            )),
            'reserve_card': spaces.Tuple((
                spaces.Discrete(num_card_tiers),    # Card tier (0-2)
                spaces.Discrete(cards_per_tier),    # Card index (0-cards_per_tier)
            )),
            'pick_tokens': spaces.MultiBinary(num_token_colors)  # Tokens to pick
        })

   
    def UpdateCardBoard(self):
        '''Randomly shuffle each deck and take the first 3 cards to place them on the board
        Updates boards to 3 if there are less than 3 cards'''
        random.shuffle(self.deck_1)
        random.shuffle(self.deck_2)
        random.shuffle(self.deck_3)
        #update cards rank 1
        while len(self.board_1) <4:
            if len(self.deck_1) ==0:
                break
            self.board_1.append(self.deck_1.pop(0))
            
        #update cards rank 2
        while len(self.board_2) <4:
            if len(self.deck_2) ==0:
                break
            self.board_2.append(self.deck_2.pop(0))
            
        #update cards rank 3
        while len(self.board_3) <4:
            if len(self.deck_3) ==0:
                break
            self.board_3.append(self.deck_3.pop(0))
    
    
    def CreateNobles(self, deck_nobles):
        '''Shuffle the deck of Nobles and get the first 3 cards - no replacement here.'''
        deck_n=cp.deepcopy(deck_nobles)
        random.shuffle(deck_n)

        #update cards rank 1
        while len(self.nobles) <4:
            self.nobles.append(deck_n.pop(0)) #the issue for reseting is that when we pop we modify the original


    def get_masked_action_space(self, legal_actions):
      # create a new masked action space that only includes the legal actions
      # the new action space should be a copy of the original action space with the invalid actions removed
      # you can create a new gym.spaces object for the new action space or modify the existing one in place
      #new_action_space = gym.spaces.MultiBinary(self.action_space.shape[0])
      masked_action_space = gym.spaces.Box(low=0, high=1, shape=self.action_space.shape, dtype=np.float32)
      for key in self.action_space.spaces.keys():
            if key in legal_actions:
                masked_action_space[key] = self.action_space[key]
      
      return masked_action_space
    
    
    def step(self, action,agent):

        agent.execute_action(action,self)
        agent.UpdateBuyingPower()
        self.UpdateCardBoard()

        # Calculate reward
        reward,score,done = self.calculate_reward(agent)
        self.score=score
        # Check if the game is done
        #done = self.is_done()
        ##Update observation:
        next_obs = {
        'player_gems': agent.gem_reserve,  # player has no gems initially
        'gems_supply': self.gems,  # 4 gems of each color except for gold, which has 5
        'player_cards': agent.player_cards,  # player has no cards initially
        'player_score': self.score,  # player has no score initially
        'nobles:':agent.nobles,
        'player_reserved':agent.player_reserved_cards
        #'nobles': tuple([(self.nobles[0].requirements, self.nobles[0].points), (self.nobles[1].requirements, self.nobles[1].points),(self.nobles[2].requirements, self.nobles[2].points)])  # example noble cost and points per noble
        }

        # Return the next state, reward, done, and additional info
        return next_obs, reward, done, {}

    def calculate_reward(self,agent):
        # Calculate reward based on the current state of the game
        reward=0
        old_score=agent.points
        points,win=agent.count_point()
        if old_score<points:
            reward+=5
        else:
            reward-=1
        return reward,points,win


    def reset(self, new_deck,new_deck_nobles, initial_gems = [7,7,7,7,7,5]):
        '''Reset function, re-initializes all environmental variables:
        Set score to 0, reset supply, decks, boards, reserved cards, nobles etc.'''
        ###Reset all variables and rebuild:
        ##Decks:
        self.deck_1=[]
        self.deck_2=[]
        self.deck_3=[]

        #creates the card decks
         
        #self.deck_1 = new_deck[0]
        #self.deck_2 = new_deck[1]
        #self.deck_3 = new_deck[2]
        self.deck_1 = cp.deepcopy(new_deck[0])
        self.deck_2 = cp.deepcopy(new_deck[1])
        self.deck_3 = cp.deepcopy(new_deck[2])
        ##Boards:
        #creates the cardboard
        self.board_1 = []
        self.board_2 = []
        self.board_3 = []

        self.UpdateCardBoard()
        ##Nobles:
        #creates the noble board
        self.nobles = []
        self.CreateNobles(new_deck_nobles)
        ##Gems:
        #Create the gems
        self.gems= [7,7,7,7,7,5]
        ##Agentwise:
        # Reset the agent's variables
        #self.agent.reset()
        obs = {
            'player_gems': np.array([0, 0, 0, 0, 0, 0,]),  # player has no gems initially
            'gems_supply': np.array([7, 7, 7, 7, 7, 5]),  # 4 gems of each color except for gold, which has 5
            'player_cards': np.array([0, 0, 0, 0, 0]),  # player has no cards initially
            'player_score': 0,  # player has no score initially
            'nobles': tuple([(self.nobles[0].requirements, self.nobles[0].points), (self.nobles[1].requirements, self.nobles[1].points),(self.nobles[2].requirements, self.nobles[2].points)])  # example noble cost and points per noble
                        }



        return obs

    def render(self, mode='human'):
        # Render the current game state
        # You can implement a simple text-based representation or a more complex graphical representation
        pass

    def close(self):
        # Clean up the environment if necessary
        pass


In [198]:
class Card:
    def __init__(self, level, color, cost, points):
        '''Cards are defined by level color cost and points they give'''
        #int ranges from 1 to 3
        self.level = level
        #We would keep as int based on this mapping  {0: 'black', 1:'blue', 2: 'green', 3: 'red', 4:'white'}(alphabetical order)
        self.color = color
        #We do cost as an array [black, ...., white] [0, 2, 1, 0, 0]
        self.cost = cost
        #int, ranges from 0 to 5
        self.points = points

class Noble:
    def __init__(self, requirements, points):
        '''For nobles we only care about requirements and points you get'''
        #We do requirements as an array [black, ...., white] [3, 3, 3, 0, 0]
        self.requirements = requirements
        #int ranges from 3 to 4
        self.points = points

class GemColor:
    def __init__(self, name):
        self.name = name

In [199]:
#mapping  {0: 'black', 1:'blue', 2: 'green', 3: 'red', 4:'white'}

# create cards rank 1
cards_1= [Card(1, 2, [0,1,1,1,1], 0),
          Card(1, 2, [1,1,1,1,0], 0),
          Card(1, 2, [0,2,0,2,1], 0),
          Card(1, 3, [0,0,0,4,0], 0),
          Card(1, 1, [0,0,4,0,0], 0),
          Card(1, 0, [1,2,1,0,1], 0)]

#create cards rank 2
cards_2=[Card(2, 2, [0,0,1,2,1], 1),
         Card(2, 2, [2,0,3,0,0], 1),
         Card(2, 2, [0,0,4,0,2], 1),
         Card(2, 3, [0,0,0,0,5], 1),
         Card(2, 1, [3,0,0,3,0], 1),
         Card(2, 0, [0,0,0,2,2], 1)]

#create cards rank3
cards_3=[Card(3, 2, [0,0,3,3,3], 3),
         Card(3, 2, [0,0,0,6,0], 3),
         Card(3, 2, [0,3,3,0,3], 3),
         Card(3, 3, [3,3,3,0,0], 4),
         Card(3, 1, [0,0,7,0,0], 4),
         Card(3, 0, [3,3,0,3,0], 4)]

# create nobles
nobles= [Noble([3,3,0,0,3], 3),
Noble([3,0,3,3,0], 3),
Noble([0,4,0,4,0], 3),
Noble([0,0,4,0,4], 3),
Noble([4,0,0,0,4], 3),
Noble([0,0,0,5,3], 3)]

# create SplendorState
'''Note, card supply and nobles are list of lists of lengths 3 and 1 respectively.'''
##Get the card supply:
card_supply = [cards_1, cards_2, cards_3]

In [200]:
import gym

env = SplendorEnv(card_supply, nobles)
Fernando = SplendorAgent()
obs = env.reset(card_supply, nobles)
Fernando.reset()
print('obs:',obs)
done = False
actions_list=[]
while not done:
    actions = Fernando.get_legal_actions(env)
    random_action = random.choice(actions)    
    #action = env.action_space.sample()  # Replace with your own action selection logic
    print('available actions:',actions)
    actions_list.append(random_action)
    #print('Masked actions:',actions)
    #print('sampled action?',action)
    print('random action:',random_action)
    n_obs,reward,done,_= env.step(random_action,Fernando)
    #print('Next_obs:',n_obs)
    env.render()
print(f'Fernando made {Fernando.points} points in {len(actions_list)} turns, by making the following actions {actions_list}' )
env.close()


obs: {'player_gems': array([0, 0, 0, 0, 0, 0]), 'gems_supply': array([7, 7, 7, 7, 7, 5]), 'player_cards': array([0, 0, 0, 0, 0]), 'player_score': 0, 'nobles': (([3, 0, 3, 3, 0], 3), ([3, 3, 0, 0, 3], 3), ([0, 4, 0, 4, 0], 3))}
available actions: [('reserve', (0, 0)), ('reserve', (0, 1)), ('reserve', (0, 2)), ('reserve', (0, 3)), ('reserve', (1, 0)), ('reserve', (1, 1)), ('reserve', (1, 2)), ('reserve', (1, 3)), ('reserve', (2, 0)), ('reserve', (2, 1)), ('reserve', (2, 2)), ('reserve', (2, 3)), ('take', (0, 0, 1, 1, 1)), ('take', (0, 1, 0, 1, 1)), ('take', (0, 1, 1, 0, 1)), ('take', (0, 1, 1, 1, 0)), ('take', (1, 0, 0, 1, 1)), ('take', (1, 0, 1, 0, 1)), ('take', (1, 0, 1, 1, 0)), ('take', (1, 1, 0, 0, 1)), ('take', (1, 1, 0, 1, 0)), ('take', (1, 1, 1, 0, 0)), ('take', [2, 0, 0, 0, 0]), ('take', [0, 2, 0, 0, 0]), ('take', [0, 0, 2, 0, 0]), ('take', [0, 0, 0, 2, 0]), ('take', [0, 0, 0, 0, 2])]
random action: ('take', (1, 1, 0, 1, 0))
available actions: [('reserve', (0, 0)), ('reserve', (0

In [115]:
'''Let's try playing a game'''
#Make some moves
print('cards:',card_supply) #my card supply keeps getting updated somehow?
print('Nobles:',nobles)
print(Fernando.points)
env.reset(card_supply,nobles)
Fernando.reset()
print(Fernando.points)
actions_list = []
while Fernando.points <15 :
        ###Given the state we get the legal actions:
        actions = Fernando.get_legal_actions(env)
        if len(actions)==0:
                raise ValueError("No actions available?? check why")
        print(len(actions))
        break
        ##We pick an action for now it is random:
        #print(f'Possible actions at score {Fernando.points}:',actions) # there is smth weird in possible actions he can only take 2 gems from each color???
        random.shuffle(actions)
        print('Action_taken:',actions[0])
        Fernando.execute_action(actions[0], env)
        Fernando.UpdateBuyingPower()
        #We append the action to our list:
        actions_list.append(actions[0])
        #Update card board and count points
        env.UpdateCardBoard()
        _,_ =Fernando.count_point()
print(f'Fernando made {Fernando.points} points in {len(actions_list)} turns, by making the following actions {actions_list}' )


cards: [[<__main__.Card object at 0x000001BEFCD360E0>, <__main__.Card object at 0x000001BEFC92ED70>, <__main__.Card object at 0x000001BEFCCACC40>, <__main__.Card object at 0x000001BEFCCADED0>, <__main__.Card object at 0x000001BEFCCADFC0>, <__main__.Card object at 0x000001BEFCCAF3D0>], [<__main__.Card object at 0x000001BEFCD34160>, <__main__.Card object at 0x000001BEFC030CD0>, <__main__.Card object at 0x000001BEFC030C40>, <__main__.Card object at 0x000001BEFC030370>, <__main__.Card object at 0x000001BEFC030430>, <__main__.Card object at 0x000001BEFC030C10>], [<__main__.Card object at 0x000001BEFCD34850>, <__main__.Card object at 0x000001BEFC0305B0>, <__main__.Card object at 0x000001BEFC0311E0>, <__main__.Card object at 0x000001BEFC030C70>, <__main__.Card object at 0x000001BEFCD2D5A0>, <__main__.Card object at 0x000001BEFCD2DD50>]]
Nobles: [<__main__.Noble object at 0x000001BEFC030490>, <__main__.Noble object at 0x000001BEFCD2D690>, <__main__.Noble object at 0x000001BEFCD2F2B0>, <__main_