# DL4G - Jass Introduction

In this exercise we will look at some properties of the jass kit environment that can be used to develop your own jass agent.

You will need to have numpy installed, as well as the jass-kit environment.

In [4]:
import numpy as np
import random

from jass.game.game_util import *
from jass.game.game_state_util import *
from jass.game.game_sim import GameSim
from jass.game.game_observation import GameObservation
from jass.game.const import *
from jass.game.rule_schieber import RuleSchieber
from jass.agents.agent import Agent
from jass.agents.agent_random_schieber import AgentRandomSchieber
from jass.arena.arena import Arena

from simple_agent import *


Another possibility to test agents locally is to use the arena. Let us play 100 games against the Random Agent and see if our trump methods makes any difference.


In [5]:
arena = Arena(nr_games_to_play=100)
arena.set_players(MyAgent(), AgentRandomSchieber(), MyAgent(), AgentRandomSchieber())

In [6]:
arena.play_all_games()

hand: [ 3  4  6  7 21 23 25 32 33]
choice: 7
hand: [14 16 28 30]
choice: 30
hand: [ 5 14 16 22 26 28 31 35]
choice: 31
hand: [32 33]
choice: 33
hand: [ 5 28 35]
choice: 28
hand: [ 3  4  6 32]
choice: 3
hand: [ 5 14 16 22 26 35]
choice: 22
hand: [21 23 25 32]
choice: 23
hand: [35]
choice: 35
hand: [32]
choice: 32
hand: [ 4  6 21 25]
choice: 6
hand: [5]
choice: 5
hand: [ 4 21 25]
choice: 25
hand: [14 16]
choice: 14
hand: [26]
choice: 26
hand: [21]
choice: 21
hand: [16]
choice: 16
hand: [4]
choice: 4
hand: [ 0  1  3 13 14 17 19 29 33]
choice: 29
hand: [18 21 22 26 28 30]
choice: 30
hand: [13 14 17 19]
choice: 13
hand: [ 9 12 18 21 22 26]
choice: 12
hand: [ 0  1  3 19]
choice: 0
hand: [ 8 18 21]
choice: 8
hand: [ 9 18 21 22 26]
choice: 21
hand: [14 17]
choice: 14
hand: [ 9 18 22 26 28]
choice: 22
hand: [19]
choice: 19
hand: [33]
choice: 33
hand: [18 26 28]
choice: 18
hand: [ 9 26 28]
choice: 26
hand: [ 1  3 17]
choice: 3
hand: [ 9 28]
choice: 9
hand: [17]
choice: 17
hand: [28]
choice: 28
h

In [7]:
print(arena.points_team_0.sum(), arena.points_team_1.sum())

8254.0 7446.0


Now you can continue with a rule based implemenation of the card play. Also look at the flask implementation of the service to see how you can get your agent online.

# MCTS with Determinization

In [10]:
import numpy as np
from jass.game.game_sim import GameSim
from jass.game.game_observation import GameObservation
from jass.game.const import *
from jass.game.rule_schieber import RuleSchieber
from jass.agents.agent import Agent
from jass.game.game_util import deal_random_hand, convert_one_hot_encoded_cards_to_int_encoded_list
import random

class MCTSAgent(Agent):
    def __init__(self, n_simulations=200, n_determinizations=10):
        super().__init__()
        self._rule = RuleSchieber()
        self.n_simulations = n_simulations
        self.n_determinizations = n_determinizations
    
    def action_trump(self, obs: GameObservation) -> int:
        """
        Determine trump action for the given observation.
        The trump selection will be handled using a heuristic as done in previous tasks.
        """
        card_list = convert_one_hot_encoded_cards_to_int_encoded_list(obs.hand)
        scores = [calculate_trump_selection_score(card_list, trump) for trump in [0, 1, 2, 3]]
        highest_score_index = scores.index(max(scores))
        if scores[highest_score_index] > 68:
            return highest_score_index
        if obs.forehand == -1:
            return PUSH
        return highest_score_index

    def action_play_card(self, obs: GameObservation) -> int:
        """
        Perform the Monte Carlo Tree Search (MCTS) to select the best card to play
        based on multiple determinizations of the game state.
        """
        valid_cards = self._rule.get_valid_cards_from_obs(obs)
        valid_card_indices = np.flatnonzero(valid_cards)

        if len(valid_card_indices) == 1:
            # Only one valid card, no need for MCTS
            return valid_card_indices[0]

        # Perform multiple determinizations and MCTS simulations
        card_scores = np.zeros(len(valid_card_indices))
        
        for _ in range(self.n_determinizations):
            determinization_hands = self._create_determinization(obs)
            card_scores += self._run_mcts_for_determinization(determinization_hands, obs, valid_card_indices)
        
        # Choose the card with the best score
        best_card_index = np.argmax(card_scores)
        return valid_card_indices[best_card_index]

    def _create_determinization(self, obs: GameObservation) -> np.ndarray:
        """
        Create a determinized version of the game state by assigning random plausible hands to opponents.
        """
        # Deal random hands for opponents
        hands = deal_random_hand()
        hands_with_card_names = [convert_one_hot_encoded_cards_to_str_encoded_list(hand) for hand in hands]
        players_card_names = convert_one_hot_encoded_cards_to_str_encoded_list(obs.hand)

        print(f'players hand: {players_card_names}')
        print(f'random cards: ')
        for hand in hands_with_card_names:
            print(hand)

        # get the cards that would vanish when not reassigning
        non_matching_cards = list(filter(lambda card: card not in players_card_names, hands_with_card_names[obs.player]))
        random.shuffle(non_matching_cards)

        # check every card in players hand and replace openents card if it is the same with one of the none matching cards
        opponents_hands = hands_with_card_names.copy()
        del opponents_hands[obs.player]

        for card in players_card_names:
            for hand in opponents_hands:
                if card in hand:
                    hand.remove(card)
                    hand.append(non_matching_cards.pop())

        opponents_hands.insert(obs.player, players_card_names)
        one_hot_encoded_cards = np.asarray([get_cards_encoded_from_str(hand) for hand in opponents_hands])
        print(type(one_hot_encoded_cards))
        print(type(one_hot_encoded_cards[0]))

        # Replace the player's hand with the known hand from observation
        hands = one_hot_encoded_cards.copy()
        print(f'random cards for game simulation')
        print(f'{hands}')

        return hands

    def _run_mcts_for_determinization(self, hands: np.ndarray, obs: GameObservation, valid_card_indices: np.ndarray) -> np.ndarray:
        """
        Run multiple MCTS simulations for a given determinization and return scores for each valid card.
        """
        card_scores = np.zeros(len(valid_card_indices))
        
        for _ in range(self.n_simulations):
            # For each valid card, simulate the outcome by reinitializing the game simulation
            for i, card in enumerate(valid_card_indices):
                sim_game = GameSim(rule=self._rule)
                sim_game.init_from_cards(hands=hands, dealer=obs.dealer)

                # Set the trump if already determined
                if obs.trump != -1:
                    sim_game.action_trump(obs.trump)
                
                # Simulate playing the card
                sim_game.action_play_card(card)
                
                # Play out the rest of the game randomly
                while not sim_game.is_done():
                    valid_cards_sim = self._rule.get_valid_cards_from_obs(sim_game.get_observation())
                    
                    # Check if there are any valid cards left
                    if np.flatnonzero(valid_cards_sim).size == 0:
                        # No valid cards, break out of the loop or handle the situation
                        break
                    
                    # Randomly play a valid card 
                    sim_game.action_play_card(np.random.choice(np.flatnonzero(valid_cards_sim)))
                
                # Update score based on the points scored for the simulation
                points = sim_game.state.points[self._team(obs.player)]
                card_scores[i] += points

        return card_scores

    def _team(self, player: int) -> int:
        """
        Determine the team number for the given player.
        Players 0 and 2 are in team 0, and players 1 and 3 are in team 1.
        """
        return player % 2


In [9]:
rule = RuleSchieber()
game = GameSim(rule=rule)
agent = MCTSAgent()

np.random.seed(1)
game.init_from_cards(hands=deal_random_hand(), dealer=NORTH)
obs = game.get_observation()
cards = convert_one_hot_encoded_cards_to_str_encoded_list(obs.hand)
trump = agent.action_trump(obs)
game.action_trump(trump)

while not game.is_done():
    game.action_play_card(agent.action_play_card(game.get_observation()))

print(game.state.points)


[np.str_('DA'), np.str_('DK'), np.str_('D9'), np.str_('D6'), np.str_('HA'), np.str_('HQ'), np.str_('HJ'), np.str_('H8'), np.str_('H7')]
[[0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 1 0 1 0 1 1 0 0 0 1]
 [0 0 1 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 1 0]
 [0 0 0 1 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 1 1 0 0]
 [1 1 0 0 0 0 0 0 1 0 1 0 0 1 0 1 0 1 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]]
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
players hand: [np.str_('DA'), np.str_('DK'), np.str_('D9'), np.str_('D6'), np.str_('HA'), np.str_('HQ'), np.str_('HJ'), np.str_('H8'), np.str_('H7')]
players position: 3
random cards: 
[np.str_('D9'), np.str_('H9'), np.str_('H7'), np.str_('S10'), np.str_('S6'), np.str_('CK'), np.str_('CJ'), np.str_('C10'), np.str_('C6')]
[np.str_('DQ'), np.str_('HA'), np.str_('HQ'), np.str_('HJ'), np.str_('SA'), np.str_('SQ'), np.str_('CA'), np.str_('CQ'), np.str_('C7')]
[np.str_('DJ'), np.str_('D10'), np.str_('D8'), np.str_('

In [None]:
from jass.arena.arena import Arena

# Assume MCTSAgent and MyAgent are already defined and implemented

# Define the number of games to simulate
num_games = 10

# Initialize the Arena
arena = Arena(nr_games_to_play=num_games)

# Set up the players: Teams 0 and 1 each have 2 players
# Here, team 0 consists of MCTSAgent, and team 1 consists of MyAgent
arena.set_players(MCTSAgent(), MyAgent(), MCTSAgent(), MyAgent())

# Play all the games
arena.play_all_games()

# Retrieve and display the points scored by each team across all games
team_0_points = arena.points_team_0.sum()
team_1_points = arena.points_team_1.sum()

print(f"Team 0 (MCTSAgent) Total Points: {team_0_points}")
print(f"Team 1 (MyAgent) Total Points: {team_1_points}")

# Optionally, you can calculate win ratios
team_0_wins = (arena.points_team_0 > arena.points_team_1).sum()
team_1_wins = (arena.points_team_1 > arena.points_team_0).sum()

print(f"Team 0 (MCTSAgent) Wins: {team_0_wins}")
print(f"Team 1 (MyAgent) Wins: {team_1_wins}")
