In [1]:
import sys
sys.path.append('/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages')
from Connect4Game import C4Game
from C4_net import NNetWrapper as nn
from utils import *
import numpy as np
import matplotlib.pyplot as plt
from mcts_c4 import MCTS
from AZ_C4 import AlphaZero
import time

In [2]:
args = dotdict({
    'numIters': 2,
    'numEps': 2,              # Number of complete self-play games to simulate during a new iteration.
    'tempThreshold': 15,        #
    'updateThreshold': 0.5,     # During arena playoff, new neural net will be accepted if threshold or more of games are won.
    'maxlenOfQueue': 200,    # Number of game examples to train the neural networks.
    'numMCTSSims': 2,          # Number of games moves for MCTS to simulate.
    'arenaCompare': 10,         # Number of games to play during arena play to determine if new net will be accepted.
    'cpuct': 1,
    'batch_size' : 20,
    'elite_size' : 10,

    'checkpoint': './temp/',
    'load_model': False,
    'load_folder_file': ('/dev/models/8x100x50','best.pth.tar'),
    'numItersForTrainExamplesHistory': 20,

})

In [3]:
g = C4Game()

nnet_grad = nn(g)
mcts_grad = MCTS(g, nnet_grad, args)
AZ_grad = AlphaZero(g, nnet_grad, mcts_grad, args)

nnet_es = nn(g)
mcts_es = MCTS(g, nnet_es, args)
AZ_es = AlphaZero(g, nnet_es, mcts_es, args)

mcts_untrained = MCTS(g, nn(g), args)

In [4]:
b = g.getInitBoard()
for k in range(3):
    b, _ = g.getNextState(b, 1, 1)
    
    

In [10]:
print(nnet_grad.predict(g.getInitBoard()))
print(nnet_es.predict(g.getInitBoard()))
AZ_grad.train_session_grad(time_cap = 10)
AZ_es.train_session_es(time_cap = 10)
print(nnet_es.predict(g.getInitBoard()))
print(nnet_grad.predict(g.getInitBoard()))



nnet_grad.save_checkpoint(filename = 'nnet_grad_checkpoint.pth.tar')
nnet_es.save_checkpoint(filename = 'nnet_es_checkpoint.pth.tar')


nnet_grad.load_checkpoint(filename = 'nnet_grad_checkpoint.pth.tar')
nnet_es.load_checkpoint(filename = 'nnet_es_checkpoint.pth.tar')


(array([0.14367622, 0.14303078, 0.1424532 , 0.14157675, 0.1438277 ,
       0.1415706 , 0.14386474], dtype=float32), array([-0.00490259], dtype=float32))
(array([0.14285715, 0.14285715, 0.14285715, 0.14285715, 0.14285715,
       0.14285715, 0.14285715], dtype=float32), array([0.13712771], dtype=float32))
Starting first iteration
Finished iteration 1 of 2
Previous iteration took 0.48 seconds. Average iteration time is 0.48 seconds.
Finished iteration 2 of 2
Previous iteration took 0.54 seconds. Average iteration time is 0.51 seconds.
Done in 1.0 seconds.
Starting first iteration
Finished iteration 1 of 2
Previous iteration took 1.13 seconds. Average iteration time is 1.13 seconds.
Finished iteration 2 of 2
Previous iteration took 0.88 seconds. Average iteration time is 1.0 seconds.
Done in 2.0 seconds.
(array([0.14285715, 0.14285715, 0.14285715, 0.14285715, 0.14285715,
       0.14285715, 0.14285715], dtype=float32), array([0.13712771], dtype=float32))
(array([0.14395793, 0.14327794, 0.14

In [11]:
def play_game(g, player1 = 0, player2 = 0, render = False, temp = 0):
    board = g.getInitBoard()
    player = 1
    move_count = 0
    while True:
        canon = g.getCanonicalForm(board, player)
        if player == 1:
            agent = player1
        if player == -1:
            agent = player2
            
        if agent == 0:
            probs = g.getValidMoves(board, player)
            probs = probs/sum(probs)
        else:    
            probs = agent.getActionProb(canon, temp = temp)
        
        a = np.random.choice(len(probs), p = probs)
        board, player = g.getNextState(board, player, a)
        move_count+=1
        if render:
            print('Move {}: {} to {}'.format(move_count,-player, a))
            print(board, '\n\n')
        
        r = g.getGameEnded(board, player)
        if r != 0:
            return r * player
            
        
def play_games(g, total = 100, player1 = 0, player2 = 0, temp = 0):
    wins = 0
    start = time.time()
    current = time.time()
    for k in range(total):
        w = play_game(g, player1 = player1, player2 = player2, render = False, temp = temp)
        if w == 1:
            wins += 1

        if time.time() - current > 60:
            current = time.time()
            print('{} wins in {} games. {} seconds elapsed'.format(wins, k + 1, np.round(time.time() - start)))


    print('Won {} games out of {}. Done in {} seconds'.format(wins, total, np.round(time.time() - start)))
    return wins
        

In [12]:
play_games(g, total = 2, player1 = mcts_grad, player2 = mcts_es, temp = 0)
play_games(g, total = 2, player1 = mcts_es, player2 = mcts_grad, temp = 0)

Won 2 games out of 2. Done in 0.0 seconds
Won 1 games out of 2. Done in 0.0 seconds


1

In [13]:
play_games(g, total = 2, player1 = mcts_grad, player2 = 0)
play_games(g, total = 2, player1 = mcts_es, player2 = 0)

Won 2 games out of 2. Done in 0.0 seconds
Won 2 games out of 2. Done in 0.0 seconds


2

In [14]:
play_games(g, total = 2, player1 = mcts_grad, player2 = mcts_untrained)
play_games(g, total = 2, player1 = mcts_es, player2 = mcts_untrained)


Won 0 games out of 2. Done in 0.0 seconds
Won 0 games out of 2. Done in 0.0 seconds


0