Initial game setup. All game logic is in ConnectFourGame.py

In [1]:
from Connect4Game import C4Game

g = C4Game(height=6, width=7, win_length=4)

Below is the training procedure for AlphaZero and AlphaZero-ES.
- The first cell is imports and the hyperparameters
- Second cell is actual training

AlphaZero code is modified from  

https://github.com/suragnair/alpha-zero-general

For original AlphaZero paper, see  

https://arxiv.org/abs/1712.01815



In [2]:
from C4_AZ import *
from mcts_c4 import MCTS
from C4_net import NNetWrapper as wrapper
import time
from utils import *

args = dotdict({
    'numEps': 5,        # Number of complete self-play games to simulate during a new iteration.
    'numMCTSSims': 20,  # Number of games moves for MCTS to simulate.
    'cpuct': 1,         # hyperparameter for MCTS
    'batch_size' : 8,  # number of samples to take for AZ-ES, N in paper
    'elite_size' : 4,  # elite size for AZ-ES, K in paper
})

In [4]:
def train_AZ(g, nnet, use_gradient = True, t_max = 3600):
    """
    training method for AlphaZero and AlphaZero-ES
    g: Game to train on 
    nnet: neural network to train
    grad: If true, train with gradient/traditional AZ, otherwise use AlphaZero-ES
    t_max: total training time, 3600 in paper
    """
    
    # set up neural network, MCTS, and AlphaZero objects
    mcts = MCTS(g, nnet, args)
    AZ = AlphaZero(g, nnet, mcts, args)

    # training loop
    start = time.time()
    iterations = 0
    
    while (time.time() - start) < t_max:
        iterations += 1
        if use_gradient:
            AZ.train_gradient()
        else:
            AZ.train_es()

    # print number of iterations and total training time
    print(iterations)
    print(time.time() - start)


    # save attack and defense neural networks
    # they will be stored in folder 'checkpoint'
    # don't overwrite what is already there unless you want to train new models
    nnet.save_checkpoint(filename = 'temp.pth.tar')

nnet = wrapper(g)
train_AZ(g, nnet, use_gradient = True, t_max = 10)

3
12.459793090820312
Checkpoint Directory exists! 


ES methods  

Parameters (except for training time) are same as in paper  
episode_count is G = 50  
batch_size is N = 8  
elite_size is K = 4  
iters is the maximum number of iterations to use, we want to cap by time so we set it to a very large number


- first cell is imports
- second cell is (1+1)-ES
- third cell is CEM

models are saved to folder 'checkpoint'

In [7]:
from C4_es import *
import os

# training time
t_max = 10

In [9]:
mu, _ = oneone(g, iters = 100000000, episode_count = 50, temp = 1, t_max = t_max)

fname= os.path.join('checkpoint', 'temp.pth.tar')
torch.save(mu.state_dict(), fname)

Finished in 18.0 seconds and 2 iterations


In [10]:
mu, _ = cem(g, iters = 100000, batch_size = 8, elite_size = 4, episode_count = 50, weight_type = 'log', temp = 1, t_max = t_max)

fname= os.path.join('checkpoint', 'temp.pth.tar')
torch.save(mu.state_dict(), fname)

Finished in 49.0 seconds and 1 iterations


In [14]:
from C4_a2c import *
import os

In [15]:
actor_critic = a2c(g, iters = 20000, t_max = 10)

fname= os.path.join('checkpoint', 'temp.pth.tar')
torch.save(actor_critic.state_dict(), fname)

Finished in 10.0 seconds and 37 iterations
