In [1]:
import random

We will do everything based on indexes in a list array like the following to facilitate accessing it:
0 : Black
1 : Blue
2 : Green
3 : Red
4 : White
5 : Joker 

## Define Cards and Nobles

In [3]:
class Card:
    def __init__(self, level, color, cost, points):
        '''Cards are defined by level color cost and points they give'''
        #int ranges from 1 to 3
        self.level = level
        #We would keep as int based on this mapping  {0: 'black', 1:'blue', 2: 'green', 3: 'red', 4:'white'}(alphabetical order)
        self.color = color
        #We do cost as an array [black, ...., white] [0, 2, 1, 0, 0]
        self.cost = cost
        #int, ranges from 0 to 5
        self.points = points

class Noble:
    def __init__(self, requirements, points):
        '''For nobles we only care about requirements and points you get'''
        #We do requirements as an array [black, ...., white] [3, 3, 3, 0, 0]
        self.requirements = requirements
        #int ranges from 3 to 4
        self.points = points

class GemColor:
    def __init__(self, name):
        self.name = name

## Define Agent:

In [6]:
class SplendorAgent:
    '''Our agent will, based on its state(gems/cards/nobles) get the set of legal actions and then execute the chosen one'''

    def __init__(self, gem_reserve=None, card_supply=None, players_state=None, nobles=None):
        self.gem_reserve = gem_reserve if gem_reserve is not None else [0,0,0,0,0, 0] # gems in the agents hand initializes as 0
        self.player_cards = card_supply if card_supply is not None else [] # cars in the player power
        self.player_reserved_cards = [] #cards reserved by the player
        self.nobles = nobles if nobles is not None else [] #nobles the player has
        self.buying_power = self.gem_reserve # buying power is initializes equal to the amount of gems of each kind
        self.points = 0
    
        #generate binary list
        # Generate all possible binary lists of length 5
        binary_lists = [[i, j, k, l, m] for i in range(2) for j in range(2) for k in range(2) for l in range(2) for m in range(2)]

        # Filter the binary lists to keep only those with a sum of 3
        self.binary_lists_with_sum_of_3 = [binary_list for binary_list in binary_lists if sum(binary_list) == 3]
        self.binary_lists_with_sum_of_2 = [binary_list for binary_list in binary_lists if sum(binary_list) == 2]


    def UpdateBuyingPower(self): 
        ''' Buying power is gems + cards bought of each color'''
        self.buying_power = self.gem_reserve
        for card in self.player_cards:
            self.buying_power[card.color] +=1 # if you have bought a card of said color you get an extra buying unit for that color.
        

    def get_legal_actions(self, spl_env):
        '''Function to get available actions for the agent based on the state'''
        legal_actions = []
        '''We check what cards can be bought based on the updated buying power taking into account any jokers in hand'''
        #can buy card #TO BE PUT IN A NICE LOOKING FUNCTION
        card_buy =[]
        for board in [spl_env.board_1, spl_env.board_2, spl_env.board_3]:
                
                for card in board:
                    over = 0
                    for i in range(len(card.cost)):
                        over += max(0, card.cost[i]- self.buying_power[i])
                    if over <= self.buying_power[-1]:
                        card_buy.append(card)

        card_buy = [('buy card', x) for x in card_buy]
        legal_actions.extend(card_buy)

        '''We check what cards can be reserved'''

        #RESERVE #TO BE PUT IN A NICE LOOKING FUNCTION
        reserve_cards = []
        if spl_env.gems[-1] >0 and len(self.player_reserved_cards) <3:
            for board in [spl_env.board_1, spl_env.board_2, spl_env.board_3]:
                    for card in board:
                            reserve_cards.append(card)
        reserve_cards = [('reserve', x) for x in reserve_cards]
        legal_actions.extend(reserve_cards)

        '''We check what reserved cards can be bought based on the updated buying power taking into account any jokers in hand'''
        #Buy reserved cards
        buyable_reserved_cards = []
     
        for card in self.player_reserved_cards:
                    over = 0
                    for i in range(len(card.cost)):
                        over += max(0, card.cost[i]- self.buying_power[i])

                    if over <= self.buying_power[-1]:
                        buyable_reserved_cards.append(card)
        buyable_reserved_cards = [('buy reserved', x) for x in buyable_reserved_cards]
        legal_actions.extend(buyable_reserved_cards)

        #Take Gems
        '''We check all possible combinations of gem taking doubles and triples.'''

        #make the list of gems currently takable
        doable_3gems = []
        for comb in self.binary_lists_with_sum_of_3:
            if  min([spl_env.gems[i] - comb[i] for i in range(len(comb))]) >=0:
                doable_3gems.append(comb)

        doable_2gems = []
        for comb in self.binary_lists_with_sum_of_2:
            if  min([spl_env.gems[i] - comb[i] for i in range(len(comb))]) >=0:
                doable_2gems.append(comb)
            
        
        gem_buy= []
        #append the list of 3 gems if the player has already 7 gems or less
        if sum(self.gem_reserve) <= 7:
            gem_buy.extend(doable_3gems)

        #append the list of 2 gems if the player has already 8

        if sum(self.gem_reserve)  == 8:
            gem_buy.extend(doable_2gems)

        #Appends the duos
        ##If the supply of gems is greater than 3 we can alwasy take 2 of the same gems.
        for x in range(len(spl_env.gems[:5])):
                list_empty = [0,0,0,0,0]
                if spl_env.gems[x] >3:
                    list_empty[x] = 2
                    gem_buy.append(list_empty)
        gem_buy = [('take', x) for x in gem_buy]
        legal_actions.extend(gem_buy)
        return legal_actions

        ###There is a bug here in the legal actions, not getting all posibilities need to fix!!


    def execute_action(self, action, spl_env): 
        '''Once an actions has been chosen we execute it by calling the action function'''

        # execute the specified action and update the state
        action_type, action_params = action
        if action_type == 'buy card':
            card = action_params
            self.buy_card(card, spl_env)

        elif action_type == 'buy reserved':
            card = action_params
            self.buy_reserved_card(card, spl_env)

        elif action_type == 'reserve':
            card = action_params
            self.reserve_card(card, spl_env)

        elif action_type == 'take':
            noble = action_params
            self.take_gems(noble, spl_env)

    def buy_card(self, card, spl_env):
        '''We buy the selected card and update the environment by removing the bought card from the board, updating player's gems and supply gems'''
        # update gem and card supply and player's state when a card is bought
        #add card to players cards
        self.player_cards.append(card)
        
        if card.level ==1:
             spl_env.board_1.remove(card)
        
        elif card.level ==2:
             spl_env.board_2.remove(card)

        elif card.level ==1:
             spl_env.board_3.remove(card)

        #compute required tokens
        over = 0
        for i in range(len(card.cost)):
            #update to know if we have to spend jokers
            over += max(0, card.cost[i]- self.buying_power[i])
            #update the number of reamining gems
            gem_transaction = max(0, card.cost[i] - self.buying_power[i] + self.gem_reserve[i])
            self.gem_reserve[i] += - gem_transaction
            spl_env.gems[i]  += gem_transaction

        #update number of jokers    
        self.gem_reserve[5] -= over
        spl_env.gems[5] += over


    def reserve_card(self, card, spl_env):
        '''We reserve the selected card and remove it from the board'''

        #add card to players reserved cards
        self.player_reserved_cards.append(card)

        #get rid of the card on the board
        if card.level ==1:
             spl_env.board_1.remove(card)
        
        elif card.level ==2:
             spl_env.board_2.remove(card)

        elif card.level ==3:
             spl_env.board_3.remove(card)

        #get a joker
        self.gem_reserve[5] += 1

    def buy_reserved_card(self, card, spl_env):
        '''We buy the selected reserved card and update the environment'''

        # update gem and card supply and player's state when a card is bought
        #add card to players cards
        self.player_cards.append(card)

        #compute required tokens
        over = 0
        for i in range(len(card.cost)):
            #update to know if we have to spend jokers
            over += max(0, card.cost[i]- self.buying_power[i])
            #update the number of reamining gems
            gem_transaction = max(0, card.cost[i] - self.buying_power[i] + self.gem_reserve[i])
            self.gem_reserve[i] += - gem_transaction
            spl_env.gems[i]  += gem_transaction

        #update number of jokers    
        self.gem_reserve[5] -= over
        spl_env.gems[5] += over


    def take_gems(self, gems_array, spl_env):
        '''Add gems from supply to player's reserve'''

        # simply add the gems to the players count

        for i in gems_array:
             self.gem_reserve[i] += gems_array[i]
             spl_env.gems[i] -= gems_array[i]
    
    def verify_noble(self, spl_env):
        '''Verify if we can get a noble'''

        #extract the colors of the cards
        cards_color = [x[1] for x in self.player_cards]

        #get our current color portfolio
        cards_sum = [0,0,0,0,0]
        for x in cards_color:
             cards_sum[x] +=1
        
        #check for the nobles
        for noble in spl_env.nobles:
             if sum([min(cards_sum[i]- noble[0][i], 0) for i in range(len(cards_sum))]) >=0:
                  self.nobles.append(noble)
                  spl_env.nobles.remove(noble)
    

    def count_point(self):
        '''Count points and win condition'''

        points =  0
        win = False
        for cards in self.player_cards:
              points +=cards.points
        
        for noble in self.nobles:
             points +=noble[1]
        
        if points >= 15:
             win = True
        self.points = points
        return points, win

    

## Define environment

In [8]:
class SplendorEnvironment:
    '''Set up the environment: Decks, Boards, Nobles, Gem supply, update and reset function'''

    def __init__(self, deck, deck_nobles, initial_gems = [7,7,7,7,7,5]):
    
        #creates the card decks 
        self.deck_1 = deck[0]
        self.deck_2 = deck[1]
        self.deck_3 = deck[2]

        #creates the cardboard
        self.board_1 = []
        self.board_2 = []
        self.board_3 = []

        self.UpdateCardBoard()

        #creates the noble board
        self.nobles = []
        self.CreateNobles(deck_nobles)

        #Create the gems
        self.gems= initial_gems
        

    def UpdateCardBoard(self):
        '''Randomly shuffle each deck and take the first 3 cards to place them on the board
        Updates boards to 3 if there are less than 3 cards'''
        random.shuffle(self.deck_1)
        random.shuffle(self.deck_2)
        random.shuffle(self.deck_3)

        #update cards rank 1
        while len(self.board_1) <4:
            if len(self.deck_1) ==0:
                break
            self.board_1.append(self.deck_1.pop(0))
            
        #update cards rank 2
        while len(self.board_2) <4:
            if len(self.deck_2) ==0:
                break
            self.board_2.append(self.deck_2.pop(0))
            
        #update cards rank 3
        while len(self.board_3) <4:
            if len(self.deck_3) ==0:
                break
            self.board_3.append(self.deck_3.pop(0))
            
    
    def CreateNobles(self, deck_nobles):
        '''Shuffle the deck of Nobles and get the first 3 cards - no replacement here.'''
        random.shuffle(deck_nobles)

        #update cards rank 1
        while len(self.nobles) <4:
            self.nobles.append(deck_nobles.pop(0))
           
      

    #Make this work
    def reset(self, deck, deck_nobles, initial_gems = [7,7,7,7,7,5]):
        '''Reset function, re-initializes all environmental variables:
        Set score to 0, reset supply, decks, boards, reserved cards, nobles etc.'''
         #creates the card decks 
        self.deck_1 = deck[0]
        self.deck_2 = deck[1]
        self.deck_3 = deck[2]

        #creates the cardboard
        self.board_1 = []
        self.board_2 = []
        self.board_3 = []

        self.UpdateCardBoard()

        #creates the noble board
        self.nobles = []
        self.CreateNobles(deck_nobles)

        #Create the gems
        self.gems= initial_gems
       
    '''This functions were here before, we keep tham as reference in case we want to take inspiration but we are not using them:
    '''
    def draw_card(self, player_id):
        # draw a card from the top of the deck and add it to the player's hand
        level = min(sum([len(x) for x in self.state.card_supply]), 4) - 1
        if len(self.state.card_supply[level]) == 0:
            self.state.card_supply[level] = self.state.card_supply[level + 1][::-1]
            self.state.card_supply[level + 1] = []
        card = self.state.card_supply[level].pop()
        self.state.players_state[player_id]['cards'][card.color] += 1

    def step(self, action):
        # execute the specified action and update the current player
        self.state.execute_action(action)
        self.current_player = (self.current_player + 1) % self.num_players

    def get_legal_actions(self):
        return self.state.get_legal_actions()

    def get_current_player(self):
        return self.current_player

    def get_state(self):
        return self.state

    def is_terminal(self):
        # check if the game is over (i.e., if any player has 15 prestige points or if the card and noble supplies are exhausted)
        if any([player['prestige_points'] >= 15 for player in self.state.players_state]):
            return True
        if sum([len(x) for x in self.state.card_supply]) == 0 and len(self.state.nobles) == 0:
            return True
        return False

    def get_reward(self):
        # calculate the reward for each player based on their score
        rewards = [0] * self.num_players
        max_score = max([self.state.get_player_score(i) for i in range(self.num_players)])
        for i in range(self.num_players):
            if self.state.get_player_score(i) == max_score:
                rewards[i] = 1
        return rewards

## Initialize variables, agent and environment:

In [9]:
#mapping  {0: 'black', 1:'blue', 2: 'green', 3: 'red', 4:'white'}

# create cards rank 1
cards_1= [Card(1, 2, [0,0,0,0,0], 1),
          Card(1, 2, [0,0,0,0,0], 1),
          Card(1, 2, [0,0,0,0,0], 1),
          Card(1, 3, [0,0,0,0,0], 1),
          Card(1, 1, [0,0,0,0,0], 1),
          Card(1, 0, [0,0,0,0,0], 1)]

#create cards rank 2
cards_2=[Card(2, 2, [2,0,0,0,0], 3),
         Card(2, 2, [2,0,0,0,0], 3),
         Card(2, 2, [2,0,0,0,0], 3),
         Card(2, 3, [2,0,0,0,0], 3),
         Card(2, 1, [2,0,0,0,0], 3),
         Card(2, 0, [2,0,0,0,0], 3)]

#create cards rank3
cards_3=[Card(3, 2, [3,0,0,0,0], 5),
         Card(3, 2, [3,0,0,0,0], 5),
         Card(3, 2, [3,0,0,0,0], 5),
         Card(3, 3, [4,0,0,0,0], 5),
         Card(3, 1, [4,0,0,0,0], 5),
         Card(3, 0, [4,0,0,0,0], 5)]

# create nobles
nobles= [
Noble([0,3,3,3, 0], 3),
Noble([4, 4, 0,0,0], 3),
Noble([3,3,3,0,0], 3),
Noble([0,3,3,3, 0], 3),
Noble([4, 4, 0,0,0], 3),
Noble([3,3,3,0,0], 3)]
# create SplendorState
card_supply = [cards_1, cards_2, cards_3]
env = SplendorEnvironment(card_supply, nobles)
'''Fernando is a random action agent, he is chill'''
Fernando = SplendorAgent()

In [10]:
'''Let's try playing a game'''
#Make some moves

actions_list = []
while Fernando.points <15 :
        
        actions = Fernando.get_legal_actions(env)
        print(f'Possible actions at score {Fernando.points}:',actions) # there is smth weird in possible actions he can only take 2 gems from each color???
        random.shuffle(actions)
        Fernando.execute_action(actions[0], env)
        actions_list.append(actions[0])
        env.UpdateCardBoard()
        _,_ =Fernando.count_point()
print(f'Fernando made {Fernando.points} points in {len(actions_list)} turns, by making the following actions {actions_list}' )


Possible actions at score 0: [('buy card', <__main__.Card object at 0x00000212152F6710>), ('buy card', <__main__.Card object at 0x0000021214F5FDC0>), ('buy card', <__main__.Card object at 0x0000021214F5EE30>), ('buy card', <__main__.Card object at 0x0000021214F5F2B0>), ('reserve', <__main__.Card object at 0x00000212152F6710>), ('reserve', <__main__.Card object at 0x0000021214F5FDC0>), ('reserve', <__main__.Card object at 0x0000021214F5EE30>), ('reserve', <__main__.Card object at 0x0000021214F5F2B0>), ('reserve', <__main__.Card object at 0x0000021214F5CA00>), ('reserve', <__main__.Card object at 0x0000021214F5F910>), ('reserve', <__main__.Card object at 0x00000212152F63B0>), ('reserve', <__main__.Card object at 0x0000021214F5D090>), ('reserve', <__main__.Card object at 0x0000021214F5F790>), ('reserve', <__main__.Card object at 0x0000021214F5E680>), ('reserve', <__main__.Card object at 0x0000021214F5D300>), ('reserve', <__main__.Card object at 0x0000021214F5DCF0>), ('take', [0, 0, 1, 1, 