In [None]:
# test Dealer.shuffle and cards2str functions

import numpy as np
from rlcard.games.tractor.dealer import TractorDealer

np_random = np.random.RandomState()
dealer = TractorDealer(np_random)
dealer.shuffle()
print(dealer.deck)

In [None]:
# test Judger.playable_cards_from_hand function

from rlcard.games.tractor.judger import TractorJudger

print(TractorJudger.playable_cards_from_hand(['AD','9H','KC','KC','AC','AC','3D','3D','7H','BJ','BJ','RJ','RJ','AD','2D','2D','2H','2H','2S','2S','AS','AS','AD','AD']))

In [None]:
# test Player.available_actions function

from rlcard.games.tractor import Game
import numpy as np

game = Game()
game.init_game()

print(game.players[0].current_hand)
playable_cards = game.judger.get_playable_cards(game.players[0])
print(playable_cards)
played_cards = max(playable_cards, key=len)

game.players[0].played_cards = played_cards
#game.players[0].played_cards = cards2str([game.players[0].current_hand[10]])
print(game.players[0].played_cards)
print(game.players[1].current_hand)
available_actions = game.players[1].available_actions(game.players[0], game.judger, game.round)
print(available_actions)


In [None]:
''' Test for round.calc_score_in_round
'''
import numpy as np
from rlcard.games.tractor import Round


np_random = np.random.RandomState()
round = Round(np_random)

round.current_round = [['5S','TS'], ['5H','5S'], ['3S'], ['KH']]
print(round.calc_score_in_round())


In [None]:
''' generate action space
'''
import numpy as np
from rlcard.games.tractor import Dealer, Judger

np_random = np.random.RandomState()
dealer = Dealer(np)

cards = dealer.deck
# print(cards)

actions = Judger.playable_cards_from_hand(cards)
actions.sort()

actions.extend(['pass', 'pass_score'])
print(actions)
# print(len(actions))

action_index = {','.join(actions[x]):x for x in range(len(actions))}
print(action_index)
print(len(action_index.keys()))



In [None]:
''' Test Game class
'''
from rlcard.games.tractor import Game

game = Game()
state, player_id = game.init_game()

print(state, "\r\n")

while not game.is_over():
     state, next_id = game.step(state['actions'][0])
     print(state, "\r\n")

print(state)



In [None]:
''' Test Game class perf
'''
from tqdm import tqdm
from rlcard.games.tractor import Game
import cProfile

def run():
    for iter in tqdm(range(1000)):
        state, player_id = game.init_game()
        while not game.is_over():
            state, next_id = game.step(state['actions'][0])

game = Game()
cProfile.run('run()')





In [None]:
''' test Tractor env
'''

''' An example of learning a NFSP Agent on Tractor
'''

import tensorflow as tf
import os

import rlcard

# Make environment
env = rlcard.make('tractor', config={'seed': 0})

env.reset()

env.run()


In [None]:
CARD_RANK_STR = ['3H', '4H', '5H', '6H', '7H', '8H', '9H', 'TH', 'JH', 'QH', 'KH', 'AH',
            '3C', '4C', '5C', '6C', '7C', '8C', '9C', 'TC', 'JC', 'QC', 'KC', 'AC',
            '3D', '4D', '5D', '6D', '7D', '8D', '9D', 'TD', 'JD', 'QD', 'KD', 'AD',
            '3S', '4S', '5S', '6S', '7S', '8S', '9S', 'TS', 'JS', 'QS', 'KS', 'AS',
            '2H', '2C', '2D', '2S', 'BJ', 'RJ']

print({CARD_RANK_STR[x] : x for x in range(len(CARD_RANK_STR))})

In [None]:
def train():
    ''' An example of learning a NFSP Agent on Tractor
    '''

    import tensorflow as tf
    import os

    from tqdm import tqdm

    import rlcard
    from rlcard.agents import NFSPAgent
    from rlcard.agents import RandomAgent
    from rlcard.utils import set_global_seed, tournament
    from rlcard.utils import Logger

    # Make environment
    env = rlcard.make('tractor', config={'seed': 0})
    eval_env = rlcard.make('tractor', config={'seed': 0})

    # Set the iterations numbers and how frequently we evaluate the performance
    evaluate_every = 1000
    evaluate_num = 1000
    # episode_num = 100000
    episode_num = 2000

    # The intial memory size
    memory_init_size = 1000

    # Train the agent every X steps
    train_every = 64

    # The paths for saving the logs and learning curves
    log_dir = './experiments/tractor_nfsp_result/'

    # Set a global seed
    set_global_seed(0)

    # Mitigation for gpu memory issue
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    # config.gpu_options.per_process_gpu_memory_fraction = 0.9
    with tf.Session(config=config) as sess:
    # with tf.Session() as sess:
        
        # Initialize a global step
        global_step = tf.Variable(0, name='global_step', trainable=False)

        # Set up the agents
        agents = []
        for i in range(env.player_num):
            agent = NFSPAgent(sess,
                            scope='nfsp' + str(i),
                            action_num=env.action_num,
                            state_shape=env.state_shape,
                            hidden_layers_sizes=[512,1024,2048,1024,512],
                            #hidden_layers_sizes=[512,1024,512],
                            #   hidden_layers_sizes=[64],
                            anticipatory_param=0.5,
                            batch_size=256,
                            rl_learning_rate=0.00005,
                            sl_learning_rate=0.00001,
                            min_buffer_size_to_learn=memory_init_size,
                            q_replay_memory_size=int(1e5),
                            #   q_replay_memory_size=int(1000),
                            q_replay_memory_init_size=memory_init_size,
                            train_every = train_every,
                            q_train_every=train_every,
                            q_batch_size=256,
                            q_mlp_layers=[512,1024,2048,1024,512],
                            #   q_mlp_layers=[512,1024,512],
                            #   q_mlp_layers=[64],
                            reservoir_buffer_capacity=int(1e3))
            agents.append(agent)
        random_agent = RandomAgent(action_num=eval_env.action_num)

        env.set_agents(agents)
        eval_env.set_agents([agents[0], random_agent, random_agent, random_agent])

        # Initialize global variables
        sess.run(tf.global_variables_initializer())

        # Init a Logger to plot the learning curvefrom rlcard.agents.random_agent import RandomAgent

        logger = Logger(log_dir)

        for episode in tqdm(range(episode_num)):
            # First sample a policy for the episode
            for agent in agents:
                agent.sample_episode_policy()

            # Generate data from the environment
            trajectories, _ = env.run(is_training=True)

            # Feed transitions into agent memory, and train the agent
            for i in range(env.player_num):
                for ts in trajectories[i]:
                    agents[i].feed(ts)

            # Evaluate the performance. Play with random agents.
            if episode % evaluate_every == 0:
                logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0])

        # Close files in the logger
        logger.close_files()

        # Plot the learning curve
        logger.plot('NFSP')
        
        # Save model
        save_dir = 'models/tractor_nfsp'
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        saver = tf.train.Saver()
        saver.save(sess, os.path.join(save_dir, 'model'))
        


In [None]:
''' perf test of E2E training
'''

import cProfile

cProfile.run('train()')