# Head-to-Head Matches

In [1]:
import numpy as np
import matplotlib.pyplot as plt

## Random agent

In [None]:
"""A random agent. Random agents is for running toy examples on the card games"""

class RandomAgent(object):

    def __init__(self, num_actions):
        ''' Initilize the random agent

        Args:
            num_actions (int): The size of the ouput action space
        '''
        self.use_raw = False
        self.num_actions = num_actions


    """Predict the action given the curent state in gerenerating training data."""
    @staticmethod
    def step(state):
        '''
        Args:
            state (dict): An dictionary that represents the current state
        Returns:
            action (int): The action predicted (randomly chosen) by the random agent
        '''
        return np.random.choice(list(state['legal_actions'].keys()))

    """Predict the action given the current state for evaluation.
            Since the random agents are not trained. This function is equivalent to step function"""

    def eval_step(self, state):
        ''' 
        Args:
            state (dict): An dictionary that represents the current state

        Returns:
            action (int): The action predicted (randomly chosen) by the random agent
            probs (list): The list of action probabilities
        '''
        probs = [0 for _ in range(self.num_actions)]
        for i in state['legal_actions']:
            probs[i] = 1/len(state['legal_actions'])

        info = {}
        info['probs'] = {state['raw_legal_actions'][i]: probs[list(state['legal_actions'].keys())[i]] for i in range(len(state['legal_actions']))}

        return self.step(state), info

## CFR agent against Random agent

In [None]:
from leduc_test import LeducholdemEnv

In [None]:
# Make environment
env = LeducholdemEnv(Env)
random_agent = RandomAgent(env.num_actions)
cfr_agent = models.load('leduc-holdem-cfr').agents[0]
env.set_agents([
    random_agent,
    cfr_agent,
])

print(">> Leduc Hold'em pre-trained model")

iteration = 10000

EA_wealth = []
EA_wins = []
EA_cumulative_wealth = 0
EA_cumulative_wins = 0

for i in range(iteration):
    while (True):
        print(">> Start a new game")

        trajectories, payoffs = env.run(is_training=False)
        # If the human does not take the final action, we need to
        # print other players action
        final_state = trajectories[0][-1]
        action_record = final_state['action_record']
        state = final_state['raw_obs']
        _action_list = []
        for i in range(1, len(action_record)+1):
            if action_record[-i][0] == state['current_player']:
                break
            _action_list.insert(0, action_record[-i])
        
        EA_cumulative_wealth += payoffs[0]
        
        # Record the cumulative wealth
        EA_wealth.append(EA_cumulative_wealth)

        if payoffs[0] > 0:
            EA_cumulative_wins += 1
        
        EA_wins.append(EA_cumulative_wins)

        break



In [None]:
plt.figure(figsize=(12, 6))

plt.subplot(2, 1, 1)
plt.plot(EA_wealth)
plt.xlabel('Iteration')
plt.ylabel('Cumulative Wealth')
plt.title('Player A Cumulative Wealth over Iterations')

# Plotting the cumulative wins over iterations
plt.subplot(2, 1, 2)
plt.plot(EA_wins)
plt.xlabel('Iteration')
plt.ylabel('Cumulative Wins')
plt.title('Player A Cumulative Wins over Iterations')

plt.tight_layout()
plt.show()

## Time-average CFR agent against Ensemble-average CFR agent

In [None]:
nonICMgame_env = LeducholdemEnv(Env)
TA_cfr_agent = models.load('leduc-holdem-cfr').agents[0]
EA_cfr_agent = models.load('leduc-holdem-cfr').agents[0]
nonICMgame_env.set_agents([
    EA_cfr_agent,
    TA_cfr_agent,
])

print(">> Leduc Hold'em pre-trained model")

iteration = 10000

TA_wealth = []
TA_wins = []
TA_cumulative_wealth = 0
TA_cumulative_wins = 0

for i in range(iteration):
    while (True):
        print(">> Start a new game")

        trajectories, payoffs = nonICMgame_env.run(is_training=False)
        # If the human does not take the final action, we need to
        # print other players action
        final_state = trajectories[0][-1]
        action_record = final_state['action_record']
        state = final_state['raw_obs']
        _action_list = []
        for i in range(1, len(action_record)+1):
            if action_record[-i][0] == state['current_player']:
                break
            _action_list.insert(0, action_record[-i])
        
        TA_cumulative_wealth += payoffs[0]
        
        # Record the cumulative wealth
        TA_wealth.append(TA_cumulative_wealth)

        if payoffs[0] > 0:
            TA_cumulative_wins += 1
        
        TA_wins.append(TA_cumulative_wins)

        break

In [None]:
plt.figure(figsize=(12, 6))

plt.subplot(2, 1, 1)
plt.plot(TA_wealth)
plt.xlabel('Iteration')
plt.ylabel('Cumulative Wealth')
plt.title('Player A Cumulative Wealth over Iterations')

# Plotting the cumulative wins over iterations
plt.subplot(2, 1, 2)
plt.plott(TA_wins)
plt.xlabel('Iteration')
plt.ylabel('Cumulative Wins')
plt.title('Player A Cumulative Wins over Iterations')

plt.tight_layout()
plt.show()