In [19]:
import rlcard
from rlcard.agents import RandomAgent
from rlcard.utils import set_global_seed

# Make environment
env = rlcard.make('blackjack', config={'seed': 0})
episode_num = 2

import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

# Set a global seed
# set_global_seed(0)

# Set up agents
agent_0 = RandomAgent(action_num=env.action_num)
env.set_agents([agent_0])

for episode in range(episode_num):

    # Generate data from the environment
    trajectories, _ = env.run(is_training=False)

    # Print out the trajectories
    print('\nEpisode {}'.format(episode))
    for ts in trajectories[0]:
        print('State: {}, Action: {}, Reward: {}, Next State: {}, Done: {}'.format(ts[0], ts[1], ts[2], ts[3], ts[4]))


Episode 0
State: {'obs': array([17, 10]), 'legal_actions': [0, 1]}, Action: 0, Reward: -1, Next State: {'obs': array([27, 12]), 'legal_actions': [0, 1]}, Done: True

Episode 1
State: {'obs': array([16,  5]), 'legal_actions': [0, 1]}, Action: 0, Reward: 0, Next State: {'obs': array([20,  5]), 'legal_actions': [0, 1]}, Done: False
State: {'obs': array([20,  5]), 'legal_actions': [0, 1]}, Action: 0, Reward: -1, Next State: {'obs': array([25, 16]), 'legal_actions': [0, 1]}, Done: True


In [21]:
import rlcard
from rlcard.agents import RandomAgent

env = rlcard.make('leduc-holdem')
env.set_agents([RandomAgent(action_num=env.action_num), RandomAgent(action_num=env.action_num)])

trajectories, payoffs = env.run(is_training=False)

In [24]:
trajectories[0]

[]

In [5]:
import rlcard
from rlcard import models
from rlcard.agents import LeducholdemHumanAgent as HumanAgent
from rlcard.utils import print_card

# Make environment
# Set 'record_action' to True because we need it to print results
env = rlcard.make('leduc-holdem', config={'record_action': True})
human_agent = HumanAgent(env.action_num)
cfr_agent = models.load('leduc-holdem-rule-v1').agents[0]
env.set_agents([human_agent, cfr_agent])

print(">> Leduc Hold'em pre-trained model")

while (True):
    print(">> Start a new game")

    trajectories, payoffs = env.run(is_training=False)
    # If the human does not take the final action, we need to
    # print other players action
    final_state = trajectories[0][-1][-2]
    action_record = final_state['action_record']
    state = final_state['raw_obs']
    _action_list = []
    for i in range(1, len(action_record)+1):
        if action_record[-i][0] == state['current_player']:
            break
        _action_list.insert(0, action_record[-i])
    for pair in _action_list:
        print('>> Player', pair[0], 'chooses', pair[1])

    # Let's take a look at what the agent card is
    print('===============     Agent    ===============')
    print_card(env.get_perfect_information()['hand_cards'][1])

    print('===============     Result     ===============')
    if payoffs[0] > 0:
        print('You win {} chips!'.format(payoffs[0]))
    elif payoffs[0] == 0:
        print('It is a tie.')
    else:
        print('You lose {} chips!'.format(-payoffs[0]))
    print('')

    input("Press any key to continue...")

>> Leduc Hold'em pre-trained model
>> Start a new game
>> Player 1 chooses raise

┌─────────┐
│░░░░░░░░░│
│░░░░░░░░░│
│░░░░░░░░░│
│░░░░░░░░░│
│░░░░░░░░░│
│░░░░░░░░░│
│░░░░░░░░░│
└─────────┘
┌─────────┐
│J        │
│         │
│         │
│    ♥    │
│         │
│         │
│        J│
└─────────┘
Yours:   ++
Agent 1: ++++
0: call, 1: raise, 2: fold



>> You choose action (integer):  0


>> Player 1 chooses raise

┌─────────┐
│Q        │
│         │
│         │
│    ♠    │
│         │
│         │
│        Q│
└─────────┘
┌─────────┐
│J        │
│         │
│         │
│    ♥    │
│         │
│         │
│        J│
└─────────┘
Yours:   ++++
Agent 1: ++++++++
0: call, 1: raise, 2: fold



>> You choose action (integer):  2


>> Player 0 chooses fold
┌─────────┐
│J        │
│         │
│         │
│    ♠    │
│         │
│         │
│        J│
└─────────┘
You lose 2.0 chips!



KeyboardInterrupt: Interrupted by user

In [6]:
[2] != [1]

True