# Spieling with Open: Understanding openspiel

Goal:

- Building a script to resume training of an interrupted model, complete with Cache, Checkpoints, Actors, and Evaluator
- Building a Pytorch equivalent of the AlphaZero algorithm, and connecting it to the tournament machine


## Approach

Important files;
- open_spiel/python/algorithms/alpha_zero/model.py
- open_spiel/python/algorithms/alpha_zero/alpha_zero.py
- Go off of the command `python3 open_spiel/python/examples/mcts.py --game=tic_tac_toe --player1=human --player2=az --az_path <path to your checkpoint directory>`

In [30]:
import pyspiel
import torch
import numpy as np

In [2]:
pyspiel.__file__

'/Users/marcwenzlawski/.pyenv/versions/3.8/envs/openspiel/lib/python3.8/site-packages/pyspiel.so'

In [3]:
game = pyspiel.load_game("hex")

In [4]:
dir(game)

['__class__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'action_to_string',
 'deserialize_state',
 'get_parameters',
 'get_type',
 'information_state_tensor_layout',
 'information_state_tensor_shape',
 'information_state_tensor_size',
 'make_observer',
 'max_chance_nodes_in_history',
 'max_chance_outcomes',
 'max_game_length',
 'max_history_length',
 'max_move_number',
 'max_utility',
 'min_utility',
 'new_initial_state',
 'new_initial_state_for_population',
 'new_initial_states',
 'num_distinct_actions',
 'num_players',
 'observation_tensor_layout',
 'observation_tensor_shape',
 'observation_tensor_size',
 'policy_tensor_shape',
 'utility_sum']

In [5]:
gs = game.new_initial_state()

In [6]:
gs.__class__

pyspiel.State

In [7]:
dir(gs)

['__class__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'action_to_string',
 'apply_action',
 'apply_action_with_legality_check',
 'apply_actions',
 'apply_actions_with_legality_checks',
 'chance_outcomes',
 'child',
 'clone',
 'current_player',
 'distribution_support',
 'full_history',
 'get_game',
 'get_type',
 'history',
 'history_str',
 'information_state_string',
 'information_state_tensor',
 'is_chance_node',
 'is_initial_state',
 'is_mean_field_node',
 'is_player_node',
 'is_simultaneous_node',
 'is_terminal',
 'legal_actions',
 'legal_actions_mask',
 'mean_field_population',
 'move_number',
 'num_distinct_actions',
 'num_players',
 'observation_string',
 'observat

In [14]:
gs.apply_action(15)
gs

. . . . . . . . y . q 
 . . . . x . . . . . . 
  . . . . . . . . . . . 
   . . . . . . . . . . . 
    . . . . . . . . . . . 
     . . . . . . . . . . . 
      . . . . . . . . . . . 
       . . . . . . . . . . . 
        . . . . . . . . . . . 
         . . . . . . . . . . . 
          . . . . . . . . . . . 

In [11]:
gs.action_to_string(0, 5)

'y(5,0)'

In [15]:
len(gs.observation_tensor())

1089

In [45]:
gs.current_player()

1

In [29]:
gs.legal_actions_mask()

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1]

In [33]:
type(game.observation_tensor_shape())

list

In [32]:
np.expand_dims(gs.observation_tensor(), 0)

(1, 1089)

In [49]:
t = torch.randint(0, 2, (11, 11))
g = torch.tensor(gs.legal_actions_mask()).view(11,11)
t, g

(tensor([[1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1],
         [0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0],
         [1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
         [1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0],
         [1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0],
         [0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0],
         [0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0],
         [0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0],
         [1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1],
         [1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1],
         [1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1]]),
 tensor([[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0],
         [1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1],
         [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
         [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
         [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
         [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
         [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
         [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
         [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
         [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
         [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]))

In [51]:
torch.gather(t, 1, g)

tensor([[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1],
        [1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])