In [23]:
import random
import copy as cp
import itertools

We will do everything based on indexes in a list array like the following to facilitate accessing it:

0 : Black

1 : Blue

2 : Green

3 : Red

4 : White

5 : Joker 

## Define Cards and Nobles

In [2]:
class Card:
    def __init__(self, level, color, cost, points):
        '''Cards are defined by level color cost and points they give'''
        #int ranges from 1 to 3
        self.level = level
        #We would keep as int based on this mapping  {0: 'black', 1:'blue', 2: 'green', 3: 'red', 4:'white'}(alphabetical order)
        self.color = color
        #We do cost as an array [black, ...., white] [0, 2, 1, 0, 0]
        self.cost = cost
        #int, ranges from 0 to 5
        self.points = points

class Noble:
    def __init__(self, requirements, points):
        '''For nobles we only care about requirements and points you get'''
        #We do requirements as an array [black, ...., white] [3, 3, 3, 0, 0]
        self.requirements = requirements
        #int ranges from 3 to 4
        self.points = points

class GemColor:
    def __init__(self, name):
        self.name = name

## Define Agent:

In [37]:
class SplendorAgent:
    '''Our agent will, based on its state(gems/cards/nobles) get the set of legal actions and then execute the chosen one'''
    MAX_GEMS_SUPPLY=10 #general attribute to call and compare againtst gems
    MAX_JOKERS=3
    def __init__(self, gem_reserve=None, cards_player=None, players_state=None, nobles=None):
        self.gem_reserve = gem_reserve if gem_reserve is not None else [0,0,0,0,0,0] # gems in the agents hand initializes as 0
        self.player_cards = cards_player if cards_player is not None else [] # cars in the player power
        self.player_reserved_cards = [] #cards reserved by the player
        self.nobles = nobles if nobles is not None else [] #nobles the player has
        self.buying_power = self.gem_reserve # buying power is initializes equal to the amount of gems of each kind
        self.points = 0

        self.binary_lists_with_sum_of_3 = list(filter(lambda binary_list: sum(binary_list) == 3, itertools.product([0, 1], repeat=5)))
        self.binary_lists_with_sum_of_2 = list(filter(lambda binary_list: sum(binary_list) == 2, itertools.product([0, 1], repeat=5)))
    
    def reset(self):
        self.gem_reserve = [0, 0, 0, 0, 0, 0] # Reset gems in the agent's hand to 0
        self.player_cards = [] # Reset cards in the player's possession
        self.player_reserved_cards = [] # Reset reserved cards
        self.nobles = [] # Reset nobles
        self.buying_power = [0, 0, 0, 0, 0, 0] # Reset buying power to 0
        self.points = 0 # Reset points to 0

        self.binary_lists_with_sum_of_3 = list(filter(lambda binary_list: sum(binary_list) == 3, itertools.product([0, 1], repeat=5)))
        self.binary_lists_with_sum_of_2 = list(filter(lambda binary_list: sum(binary_list) == 2, itertools.product([0, 1], repeat=5)))

    def UpdateBuyingPower(self): 
        ''' Buying power is gems + cards bought of each color'''
        self.buying_power =cp.deepcopy(self.gem_reserve)
        for card in self.player_cards:
            self.buying_power[card.color] +=1 # if you have bought a card of said color you get an extra buying unit for that color.

    
    def get_legal_actions(self,spl_env):
        '''I'll try to get all legal actions as indexes:
        Buy Card , (i,j) #specifies which card to buy
        Reserve Card, (i,j) #card to reserve
        Buy reserved (i) # index in reserved cards.
        Take Gems, [x,x,x,x,x] #gems to take
        ''' 

        ##First we get all legal actions:
        legal_actions=[] #57 available actions:
        ##Check cards that can be bought based on updated buying power'''
        card_buy = self.get_purchasable_cards(spl_env)
        legal_actions.extend(card_buy)
        ##Now reservable card(should be all as long as we have enough space and gems)'''
        reserve_cards=self.get_reservable_cards(spl_env)
        legal_actions.extend(reserve_cards)
        ##We get the purchasable reserved cards:
        buyable_reserved_cards=self.get_purchasable_reserved()
        legal_actions.extend(buyable_reserved_cards)
        ##Takeable gems combinations:
        gem_buy=self.get_takable_gems(spl_env)
        legal_actions.extend(gem_buy)

        return legal_actions       
#        return self.transform_legal_actions(legal_actions)

    def get_purchasable_cards(self,spl_env):
        '''We get all purchasable cards based on buying power(gems + cards) and return the index (level, column) in list format'''
        card_buy=[]
        for board in [spl_env.board_1, spl_env.board_2, spl_env.board_3]:   
                for card in board:
                    over = 0
                    for i in range(len(card.cost)):
                        over += max(0, card.cost[i]- self.buying_power[i])
                    if over <= self.buying_power[-1]:
                        card_buy.append(card) # we could append a tuple containing level(0-2),index in board(0-3)
        return  [('buy card', x) for x in card_buy]

    def get_reservable_cards(self,spl_env):
        '''Check reservable cards based on number of gems and number of reserved cards.'''
        reserve_cards = []
        if spl_env.gems[-1] >0 and len(self.player_reserved_cards) < SplendorAgent.MAX_JOKERS:
            for board in [spl_env.board_1, spl_env.board_2, spl_env.board_3]:
                    for card in board:
                            reserve_cards.append(card)
        return [('reserve', x) for x in reserve_cards]         
    
    
    def get_purchasable_reserved(self):
        '''Checks fi we can buy our reserved cards and return index of reserved card purchasable'''
        buyable_reserved_cards = []
        for card in self.player_reserved_cards:
                    over = 0
                    for i in range(len(card.cost)):
                        over += max(0, card.cost[i]- self.buying_power[i])

                    if over <= self.buying_power[-1]:
                        buyable_reserved_cards.append(card) # we could return the index in reserved cards 0-2
        return [('buy reserved', x) for x in buyable_reserved_cards]
    
    def get_takable_gems(self,spl_env):
        '''We check all possible combinations of gem taking doubles and triples.'''
        #make the list of gems currently takable
        doable_3gems = [] #all possible combinations for 3 gems
        for comb in self.binary_lists_with_sum_of_3:
            if  min([spl_env.gems[i] - comb[i] for i in range(len(comb))]) >=0:
                doable_3gems.append(comb)

        doable_2gems = [] #possible combinations for 2 gems (same color)
        for comb in self.binary_lists_with_sum_of_2:
            if  min([spl_env.gems[i] - comb[i] for i in range(len(comb))]) >=0:
                doable_2gems.append(comb)        
    
        gem_buy= []

        # If gem supply is 10, don't consider taking gems
        if sum(self.gem_reserve) == SplendorAgent.MAX_GEMS_SUPPLY:
            pass # don't return gems to take.
        
        #append the list of 3 gems if the player has already 7 gems or less
        if sum(self.gem_reserve) <= 7:
            gem_buy.extend(doable_3gems)

        #append the list of 2 gems if the player has already 8

        if sum(self.gem_reserve) == 8:
            gem_buy.extend(doable_2gems)
        
        # append the list of 1 gem if the player has already 9
        
        if sum(self.gem_reserve) == 9:
            gem_buy.extend([[1,0,0,0,0], [0,1,0,0,0], [0,0,1,0,0], [0,0,0,1,0], [0,0,0,0,1]])

        #Appends the duos
        ##If the supply of gems is greater than 3 we can alwasy take 2 of the same gems.
        for x in range(len(spl_env.gems[:5])):
                list_empty = [0,0,0,0,0]
                if spl_env.gems[x] >3:
                    list_empty[x] = 2
                    gem_buy.append(list_empty)
        return [('take', x) for x in gem_buy]


    def execute_action(self, action, spl_env): 
        '''Once an actions has been chosen we execute it by calling the action function'''

        # execute the specified action and update the state
        action_type, action_params = action
        if action_type == 'buy card':
            card = action_params
            self.buy_card(card, spl_env)

        elif action_type == 'buy reserved':
            card = action_params
            self.buy_reserved_card(card, spl_env)

        elif action_type == 'reserve':
            card = action_params
            self.reserve_card(card, spl_env)

        elif action_type == 'take':
            noble = action_params
            self.take_gems(noble, spl_env)

    def buy_card(self, card, spl_env):
        '''We buy the selected card and update the environment by removing the bought card from the board, updating player's gems and supply gems'''
        # update gem and card supply and player's state when a card is bought
        #Before adding card to players hand we need to check how much buying power do our current cards give us:
        #extract the colors of the current cards
        cards_color = [x.color for x in self.player_cards]

        #get our current color portofolio = buying power without gems
        cards_sum = [0,0,0,0,0]
        for x in cards_color:
             cards_sum[x] +=1
        
        #add card to players cards
        self.player_cards.append(card)

        ##Now we remove it from the board
        print('cards power:',cards_sum)
        print('cost:',card.cost)
        if card.level ==1:
             spl_env.board_1.remove(card)
        
        elif card.level ==2:
             spl_env.board_2.remove(card)

        elif card.level ==1:
             spl_env.board_3.remove(card)

        ##First check from the cost we substract our current free buying power.
        new_cost=[0,0,0,0,0]
        for i in range(len(card.cost)):
            new_cost[i]=max(0,card.cost[i]-cards_sum[i])
        
        print('Discounted card cost:',new_cost)


        #compute required tokens
        over = 0
        print('orig gem reserve:',self.gem_reserve)
        print('orig gem supply:',spl_env.gems)
        #print('card buying power:',self.)
        '''I'm using gem_reserve instead of buying power, I think it will be better to keep the values from gem reserve and cards separated to know what gems to resupply and which not'''
        for i in range(len(card.cost)):
            #update to know if we have to spend jokers
            over = max(0, new_cost[i]- self.gem_reserve[i]) #jokers needed for this color
            #print('index:',i,'over:',over)
            #update the number of reamining gems
            #print('Buying power:',self.buying_power[i],self.gem_reserve[i])
            gem_transaction = max(0, new_cost[i] - over)# after substracting cards + joker we get the amount to pay.
            #gem_transaction = max(0, card.cost[i] - self.buying_power[i] + self.gem_reserve[i])
            if over>self.gem_reserve[5]:
                 print(over,self.gem_reserve)
                 raise ValueError("Too many jokers")
            
            
            #print('gem_transaction?',gem_transaction)
            self.gem_reserve[i] -= gem_transaction
            spl_env.gems[i]  += gem_transaction
            self.gem_reserve[5]-=over
            spl_env.gems[5]+=over
        print('New gem reserve:',self.gem_reserve)
        print('new gem supply:',spl_env.gems)


        ##update number of jokers    
        #self.gem_reserve[5] -= over
        #spl_env.gems[5] += over


    def reserve_card(self, card, spl_env):
        '''We reserve the selected card and remove it from the board'''

        #add card to players reserved cards
        self.player_reserved_cards.append(card)

        #get rid of the card on the board
        if card.level ==1:
             spl_env.board_1.remove(card)
        
        elif card.level ==2:
             spl_env.board_2.remove(card)

        elif card.level ==3:
             spl_env.board_3.remove(card)

        #get a joker
        self.gem_reserve[5] += 1 # add joker to hand.
        spl_env.gems[5]-=1 # added removal of joker from board

    def buy_reserved_card(self, card, spl_env):
        '''We buy the selected reserved card and update the environment'''
        cards_color = [x.color for x in self.player_cards]

        #get our current color portofolio = buying power without gems
        cards_sum = [0,0,0,0,0]
        for x in cards_color:
             cards_sum[x] +=1
        
        new_cost=[0,0,0,0,0]
        for i in range(len(card.cost)):
            new_cost[i]=max(0,card.cost[i]-cards_sum[i])
        
        print('Discounted card cost:',new_cost)
        # update gem and card supply and player's state when a card is bought
        #add card to players cards
        self.player_cards.append(card)

        #compute required tokens
        over = 0
        print('reserve',self.gem_reserve)
        for i in range(len(card.cost)):
            #update to know if we have to spend jokers
            #over += max(0, card.cost[i]- self.buying_power[i])
            over=max(0,new_cost[i]-self.gem_reserve[i])
            #update the number of reamining gems
            #gem_transaction = max(0, card.cost[i] - self.buying_power[i] + self.gem_reserve[i])
            if over>self.gem_reserve[5]:
                 print(over, self.gem_reserve)
                 raise ValueError("Too many jokers")
            gem_transaction = max(0,new_cost[i]-over)
            self.gem_reserve[i] -=  gem_transaction
            spl_env.gems[i]  += gem_transaction
            spl_env.gems[5]+=over
            self.gem_reserve[5]-=over
        #update number of jokers    
        #self.gem_reserve[5] -= over
        #spl_env.gems[5] += over


    def take_gems(self, gems_array, spl_env):
        '''Add gems from supply to player's reserve and remove it from supply.'''

        # simply add the gems to the players count
        for i in range(len(gems_array)):
            self.gem_reserve[i] += gems_array[i]
            spl_env.gems[i] -= gems_array[i]
    
    def verify_noble(self, spl_env):
        '''Verify if we can get a noble'''

        #extract the colors of the cards
        cards_color = [x[1] for x in self.player_cards]

        #get our current color portfolio
        cards_sum = [0,0,0,0,0]
        for x in cards_color:
             cards_sum[x] +=1
        
        #check for the nobles
        for noble in spl_env.nobles:
             if sum([min(cards_sum[i]- noble[0][i], 0) for i in range(len(cards_sum))]) >=0:
                  self.nobles.append(noble)
                  spl_env.nobles.remove(noble)
    

    def count_point(self):
        '''Count points and win condition'''

        points =  0
        win = False
        for cards in self.player_cards:
              points +=cards.points
        
        for noble in self.nobles:
             points +=noble[1]
        
        if points >= 15:
             win = True
        self.points = points
        return points, win

    

## Define environment

In [38]:
class SplendorEnvironment:
    '''Set up the environment: Decks, Boards, Nobles, Gem supply, update and reset function'''

    def __init__(self, card_supply, deck_nobles, initial_gems = [7,7,7,7,7,5]):
    
        self.agent = SplendorAgent()
        #creates the card decks
        deck=cp.deepcopy(card_supply) #since we are using pop we need to define a new list removing references. 
        self.deck_1 = deck[0]
        self.deck_2 = deck[1]
        self.deck_3 = deck[2]

        #creates the cardboard
        self.board_1 = []
        self.board_2 = []
        self.board_3 = []

        self.UpdateCardBoard()

        #creates the noble board
        self.nobles = []
        self.CreateNobles(deck_nobles)

        #Create the gems
        self.gems= initial_gems
        
    def check_gems(self): # in theory if our code is right we wouldn't need this.
        for i in range(len(self.gems)):
            if i==5 and self.gems[i]>5:
                self.gems[i]=5
            elif self.gems[i]>7:
                self.gems[i]=7

    def UpdateCardBoard(self):
        '''Randomly shuffle each deck and take the first 3 cards to place them on the board
        Updates boards to 3 if there are less than 3 cards'''
        random.shuffle(self.deck_1)
        random.shuffle(self.deck_2)
        random.shuffle(self.deck_3)
        #update cards rank 1
        while len(self.board_1) <4:
            if len(self.deck_1) ==0:
                break
            self.board_1.append(self.deck_1.pop(0))
            
        #update cards rank 2
        while len(self.board_2) <4:
            if len(self.deck_2) ==0:
                break
            self.board_2.append(self.deck_2.pop(0))
            
        #update cards rank 3
        while len(self.board_3) <4:
            if len(self.deck_3) ==0:
                break
            self.board_3.append(self.deck_3.pop(0))
    
    
    def CreateNobles(self, deck_nobles):
        '''Shuffle the deck of Nobles and get the first 3 cards - no replacement here.'''
        deck_n=cp.deepcopy(deck_nobles)
        random.shuffle(deck_n)

        #update cards rank 1
        while len(self.nobles) <4:
            self.nobles.append(deck_n.pop(0)) #the issue for reseting is that when we pop we modify the original
           
      

    #Make this work
    def reset(self, new_deck,new_deck_nobles, initial_gems = [7,7,7,7,7,5]):
        '''Reset function, re-initializes all environmental variables:
        Set score to 0, reset supply, decks, boards, reserved cards, nobles etc.'''
        ###Reset all variables and rebuild:
        ##Decks:
        self.deck_1=[]
        self.deck_2=[]
        self.deck_3=[]

        #creates the card decks
         
        #self.deck_1 = new_deck[0]
        #self.deck_2 = new_deck[1]
        #self.deck_3 = new_deck[2]
        self.deck_1 = cp.deepcopy(new_deck[0])
        self.deck_2 = cp.deepcopy(new_deck[1])
        self.deck_3 = cp.deepcopy(new_deck[2])
        ##Boards:
        #creates the cardboard
        self.board_1 = []
        self.board_2 = []
        self.board_3 = []

        self.UpdateCardBoard()
        ##Nobles:
        #creates the noble board
        self.nobles = []
        self.CreateNobles(new_deck_nobles)
        ##Gems:
        #Create the gems
        self.gems= [7,7,7,7,7,5]
        ##Agentwise:
        # Reset the agent's variables
        self.agent.reset()
        
       
    '''This functions were here before, we keep tham as reference in case we want to take inspiration but we are not using them:
    '''
    def draw_card(self, player_id):
        # draw a card from the top of the deck and add it to the player's hand
        level = min(sum([len(x) for x in self.state.card_supply]), 4) - 1
        if len(self.state.card_supply[level]) == 0:
            self.state.card_supply[level] = self.state.card_supply[level + 1][::-1]
            self.state.card_supply[level + 1] = []
        card = self.state.card_supply[level].pop()
        self.state.players_state[player_id]['cards'][card.color] += 1

    def step(self, action):
        # execute the specified action and update the current player
        self.state.execute_action(action)
        self.current_player = (self.current_player + 1) % self.num_players

    def get_legal_actions(self):
        return self.state.get_legal_actions()

    def get_current_player(self):
        return self.current_player

    def get_state(self):
        return self.state

    def is_terminal(self):
        # check if the game is over (i.e., if any player has 15 prestige points or if the card and noble supplies are exhausted)
        if any([player['prestige_points'] >= 15 for player in self.state.players_state]):
            return True
        if sum([len(x) for x in self.state.card_supply]) == 0 and len(self.state.nobles) == 0:
            return True
        return False

    def get_reward(self):
        # calculate the reward for each player based on their score
        rewards = [0] * self.num_players
        max_score = max([self.state.get_player_score(i) for i in range(self.num_players)])
        for i in range(self.num_players):
            if self.state.get_player_score(i) == max_score:
                rewards[i] = 1
        return rewards

## Initialize variables, agent and environment:

In [39]:
#mapping  {0: 'black', 1:'blue', 2: 'green', 3: 'red', 4:'white'}

# create cards rank 1
cards_1= [Card(1, 2, [0,1,1,1,1], 0),
          Card(1, 2, [1,1,1,1,0], 0),
          Card(1, 2, [0,2,0,2,1], 0),
          Card(1, 3, [0,0,0,4,0], 0),
          Card(1, 1, [0,0,4,0,0], 0),
          Card(1, 0, [1,2,1,0,1], 0)]

#create cards rank 2
cards_2=[Card(2, 2, [0,0,1,2,1], 1),
         Card(2, 2, [2,0,3,0,0], 1),
         Card(2, 2, [0,0,4,0,2], 1),
         Card(2, 3, [0,0,0,0,5], 1),
         Card(2, 1, [3,0,0,3,0], 1),
         Card(2, 0, [0,0,0,2,2], 1)]

#create cards rank3
cards_3=[Card(3, 2, [0,0,3,3,3], 3),
         Card(3, 2, [0,0,0,6,0], 3),
         Card(3, 2, [0,3,3,0,3], 3),
         Card(3, 3, [3,3,3,0,0], 4),
         Card(3, 1, [0,0,7,0,0], 4),
         Card(3, 0, [3,3,0,3,0], 4)]

# create nobles
nobles= [Noble([3,3,0,0,3], 3),
Noble([3,0,3,3,0], 3),
Noble([0,4,0,4,0], 3),
Noble([0,0,4,0,4], 3),
Noble([4,0,0,0,4], 3),
Noble([0,0,0,5,3], 3)]

# create SplendorState
'''Note, card supply and nobles are list of lists of lengths 3 and 1 respectively.'''
##Get the card supply:
card_supply = [cards_1, cards_2, cards_3]
###Initialize the environment with card_supply and nobles
env = SplendorEnvironment(card_supply, nobles)
##Initialize Agent:
'''Fernando is a random action agent, he is chill'''
Fernando = SplendorAgent()

For now, to run the game, run the cell above and the cell below. Everytime you want to start a new game run the cell above because, the reset function is not working properly still.

In [40]:
'''Let's try playing a game'''
#Make some moves
print('cards:',card_supply) #my card supply keeps getting updated somehow?
print('Nobles:',nobles)
print(Fernando.points)
env.reset(card_supply,nobles)
Fernando.reset()
print(Fernando.points)
actions_list = []
while Fernando.points <15 :
        ###Given the state we get the legal actions:
        actions = Fernando.get_legal_actions(env)
        if len(actions)==0:
                raise ValueError("No actions available?? check why")
        print(len(actions))
        ##We pick an action for now it is random:
        #print(f'Possible actions at score {Fernando.points}:',actions) # there is smth weird in possible actions he can only take 2 gems from each color???
        random.shuffle(actions)
        print('Action_taken:',actions[0])
        Fernando.execute_action(actions[0], env)
        Fernando.UpdateBuyingPower()
        #We append the action to our list:
        actions_list.append(actions[0])
        #Update card board and count points
        env.UpdateCardBoard()
        _,_ =Fernando.count_point()
print(f'Fernando made {Fernando.points} points in {len(actions_list)} turns, by making the following actions {actions_list}' )


cards: [[<__main__.Card object at 0x000002A356C52950>, <__main__.Card object at 0x000002A356C50910>, <__main__.Card object at 0x000002A356C52E30>, <__main__.Card object at 0x000002A356C53B50>, <__main__.Card object at 0x000002A356C523B0>, <__main__.Card object at 0x000002A356C528F0>], [<__main__.Card object at 0x000002A356C51450>, <__main__.Card object at 0x000002A356C515D0>, <__main__.Card object at 0x000002A356C51750>, <__main__.Card object at 0x000002A356C50880>, <__main__.Card object at 0x000002A356C53010>, <__main__.Card object at 0x000002A356C51510>], [<__main__.Card object at 0x000002A356C509A0>, <__main__.Card object at 0x000002A356C50F40>, <__main__.Card object at 0x000002A356C527A0>, <__main__.Card object at 0x000002A356C52050>, <__main__.Card object at 0x000002A356C51E10>, <__main__.Card object at 0x000002A357926170>]]
Nobles: [<__main__.Noble object at 0x000002A356C529E0>, <__main__.Noble object at 0x000002A357925D50>, <__main__.Noble object at 0x000002A357926320>, <__main_

Implementation of agent inside environment to facilitate the reset and communication between classes. *WIP still*

In [9]:
'''Let's try playing a game'''
#Make some moves
print('cards:',card_supply) #my card supply keeps getting updated somehow?
print('Nobles:',nobles)
print(env.agent.points)
env.reset(card_supply,nobles)
print(env.agent.points)
actions_list = []
while env.agent.points <15 :
        ###Given the state we get the legal actions:
        actions = env.agent.get_legal_actions(env)
        ##We pick an action for now it is random:
        #print(f'Possible actions at score {env.agent.points}:',actions) # there is smth weird in possible actions he can only take 2 gems from each color???
        random.shuffle(actions)
        #print(f'Action taken:{actions[0]}')
        env.agent.execute_action(actions[0], env)
        env.agent.UpdateBuyingPower()
        #print(env.gem_supply)
        #We append the action to our list:
        actions_list.append(actions[0])
        #Update card board and count points
        env.UpdateCardBoard()
        _,_ =env.agent.count_point()
print(f'Fernando made {env.agent.points} points in {len(actions_list)} turns, by making the following actions {actions_list}' )


cards: [[<__main__.Card object at 0x000001B5C4141B10>, <__main__.Card object at 0x000001B5C4141CC0>, <__main__.Card object at 0x000001B5C41409A0>, <__main__.Card object at 0x000001B5C4142770>, <__main__.Card object at 0x000001B5C4140130>, <__main__.Card object at 0x000001B5C41406D0>], [<__main__.Card object at 0x000001B5C4141B70>, <__main__.Card object at 0x000001B5C4142D10>, <__main__.Card object at 0x000001B5C4142E60>, <__main__.Card object at 0x000001B5C4142E90>, <__main__.Card object at 0x000001B5C4142C20>, <__main__.Card object at 0x000001B5C4142C80>], [<__main__.Card object at 0x000001B5C4141750>, <__main__.Card object at 0x000001B5C4140CA0>, <__main__.Card object at 0x000001B5C4140B20>, <__main__.Card object at 0x000001B5C4140B80>, <__main__.Card object at 0x000001B5C4143E80>, <__main__.Card object at 0x000001B5C4143DF0>]]
Nobles: [<__main__.Noble object at 0x000001B5C4142D70>, <__main__.Noble object at 0x000001B5C4140D90>, <__main__.Noble object at 0x000001B5C4142680>, <__main_

Things to take into account:

When defining an action space we will need to find a way of indexing all actions, I think it might make more sense to just use indexes for the cards we are buying as buy(0,0) the issue will be mapping our actions maybe because right now the actions are buying specific cards not the position of the card. 

Also might be smarter to define everything in one environment inheriting from openai gym, right now it is a mess to have variables go from 1 class to the other.

In that same idea, we need to think about the observation space, do we feed the board with the cards available, what other things. 