# Experiment - Play

In [1]:
import agent
import memory
import model
import game
import config
import numpy as np

from IPython.display import clear_output

Using TensorFlow backend.


## Load model and agent

In [2]:
pipeline_id = "tictactoe_8"
episode = 9
##############################
position_memory = memory.PositionMemory()
position_memory.load("position_memory_{}_ep_{}" .format(pipeline_id, episode))

input_shape = (3, 3, 3)
num_possible_moves = 9

az_model = model.AZModel(
    memory=position_memory,
    input_shape=input_shape,
    num_possible_moves=num_possible_moves,
    model_id=pipeline_id
)
az_model.load(5)

az_agent = agent.AlphaZeroAgent(model=az_model)

Loaded model ttt_random_11_5


## Play against agent

In [3]:
def show_board(position, current_player, nn_policy, nn_value):
    stones = np.array([" ", "X", "O"])
    
    if current_player == -1:
        board = position.state[:,:,1] + 2*position.state[:,:,0]
    else:
        board = position.state[:,:,0] + 2*position.state[:,:,1]
    
    print(" -----")
    print("|{}|{}|{}|" .format(
        stones[board[0,0]],
        stones[board[0,1]],
        stones[board[0,2]]
    ))
    print("|{}|{}|{}|" .format(
        stones[board[1,0]],
        stones[board[1,1]],
        stones[board[1,2]]
    ))
    print("|{}|{}|{}|" .format(
        stones[board[2,0]],
        stones[board[2,1]],
        stones[board[2,2]]
    ))
    print(" -----")
    legal_moves = np.where(position.legal_actions == 1)[0]
    print("Legal Moves: {}" .format(legal_moves))
    print("Value: {}" .format(nn_value))
    distribution = nn_policy
    print("Distribution:")
    
    print("|{}|{}|{}|" .format(
        distribution[0],
        distribution[1],
        distribution[2]
    ))
    print("|{}|{}|{}|" .format(
        distribution[3],
        distribution[4],
        distribution[5]    ))
    print("|{}|{}|{}|" .format(
        distribution[6],
        distribution[7],
        distribution[8]
    ))
    return legal_moves

In [4]:
def agent_vs_user(agent):
    game_environment = game.TicTacToe()

    # reset game
    game_environment.reset_game()

    agent.join_game(game_environment)

    current_player = game_environment.current_player

    winning = 0
    turn = 0

    num_simulations = config.EVALUATION['num_simulations']
    
    position = game_environment.get_current_position()
    nn_value, nn_policy = agent.model.evaluate(position.state)
    legal_moves = show_board(position, current_player, nn_policy, nn_value)

    while winning is 0 and turn < 9:

        if current_player == -1:
            clear_output()  
            # agent move
            winning, _ = agent.play_move(num_simulations, temperature=0)
            
            position = game_environment.get_current_position()
            nn_value, nn_policy = agent.model.evaluate(position.state)
            legal_moves = show_board(position, current_player, nn_policy, nn_value)
            
        else:
            # user move
            move = int(input())
            
            if move in legal_moves:
                winning = game_environment.execute_move(move)
                agent.tree_search.update_root(move)
                
                position = game_environment.get_current_position()
                nn_value, nn_policy = agent.model.evaluate(position.state)
                show_board(position, current_player, nn_policy, nn_value)
            else:
                print("Illegal move!")


            
            
        
        current_player = game_environment.current_player
        turn += 1

    # logger.info("Player {} has won the game after {} turns.".format(winner, turn))
    print("Player {} won the game after {} turns.".format(winning*current_player, turn))

    return winning

In [5]:
agent_vs_user(az_agent)

 -----
|O|X| |
|O|O| |
|X|X| |
 -----
Legal Moves: [2 5 8]
Value: 0.2976859211921692
Distribution:
|0.0005192581447772682|0.008472572080790997|0.036073166877031326|
|0.0005564227467402816|0.0010216637747362256|0.020716460421681404|
|0.0006489946972578764|0.0031845192424952984|0.9288069009780884|
5
++++++++++ SIMULATION: ILLEGAL MOVE +++++++++++
illegal move 5
simulation: legal_moves: [0 0 1 0 0 0 0 0 1]
simulation current position:
 [1] | [-1] | [0] 
 [1] | [1] | [1] 
 [-1] | [-1] | [0] 
 -----
|O|X| |
|O|O|O|
|X|X| |
 -----
Legal Moves: [2 8]
Value: 0.5535036325454712
Distribution:
|0.0004060008213855326|0.009973080828785896|0.10363472253084183|
|0.042611341923475266|0.004207355435937643|8.164860628312454e-05|
|0.004795646294951439|0.0005004603299312294|0.8337897062301636|


TypeError: 'int' object is not subscriptable