In [195]:
import math
from collections import Counter
import numpy as np

In [196]:

NUM_COLUMNS = 7
COLUMN_HEIGHT = 6
FOUR = 4
MAX_DEPTH = 4

In [197]:
def valid_moves(board):
    """Returns columns where a disc may be played"""
    return [n for n in range(NUM_COLUMNS) if board[n, COLUMN_HEIGHT - 1] == 0]


def play(board, column, player):
    """Updates `board` as `player` drops a disc in `column`"""
    (index,) = next((i for i, v in np.ndenumerate(board[column]) if v == 0))
    board[column, index] = player


def take_back(board, column):
    """Updates `board` removing top disc from `column`"""
    (index,) = [i for i, v in np.ndenumerate(board[column]) if v != 0][-1]
    board[column, index] = 0


def four_in_a_row(board, player):
    """Checks if `player` has a 4-piece line"""
    return (
        any(
            all(board[c, r] == player)
            for c in range(NUM_COLUMNS)
            for r in (list(range(n, n + FOUR)) for n in range(COLUMN_HEIGHT - FOUR + 1))
        )
        or any(
            all(board[c, r] == player)
            for r in range(COLUMN_HEIGHT)
            for c in (list(range(n, n + FOUR)) for n in range(NUM_COLUMNS - FOUR + 1))
        )
        or any(
            np.all(board[diag] == player)
            for diag in (
                (range(ro, ro + FOUR), range(co, co + FOUR))
                for ro in range(0, NUM_COLUMNS - FOUR + 1)
                for co in range(0, COLUMN_HEIGHT - FOUR + 1)
            )
        )
        or any(
            np.all(board[diag] == player)
            for diag in (
                (range(ro, ro + FOUR), range(co + FOUR - 1, co - 1, -1))
                for ro in range(0, NUM_COLUMNS - FOUR + 1)
                for co in range(0, COLUMN_HEIGHT - FOUR + 1)
            )
        )
    )


In [198]:
import random

from itertools import product


def calculate_board_hash (board):
    cnt =""
    for i,j in product(range(NUM_COLUMNS), range(COLUMN_HEIGHT)):
        if board[i][j]==1:
          cnt+="2"
        else:
            if board[i][j]==-1:
                cnt+="1"
            else: cnt+="0"
    return cnt

def _mc(board, player):
    p = -player
    while valid_moves(board):
        p = -p
        c = np.random.choice(valid_moves(board))
        play(board, c, p)
        if four_in_a_row(board, p):
            return p
    return 0


def montecarlo(board, player):
    montecarlo_samples = 100
    cnt = Counter(_mc(np.copy(board), player) for _ in range(montecarlo_samples))

    return (cnt[1] - cnt[-1]) / montecarlo_samples


def eval_board(board, player):
    if four_in_a_row(board, 1):
        # Alice won
        return 1
    elif four_in_a_row(board, -1):
        # Bob won
        return -1
    else:
        # Not terminal, let's simulate...
        d = {'key': 'value'}
        bv = float('nan')
        return minmax(np.copy(board), player,0,bv,d)


def minmax(board, player, depth,best_value, lut):
    if calculate_board_hash(board) in lut:
        return  -1,lut[calculate_board_hash(board)]
    if four_in_a_row(board, -player):
        lut[calculate_board_hash(board)] =-player
        return -1, -player
    possible_moves = valid_moves(board)
    if not possible_moves:
        lut[calculate_board_hash(board)] =0
        return -1,0
    depth+=1;
    first_eval = 1
    best_eval =float('NaN')
    ret_val=-1
    if depth > MAX_DEPTH:
        return  -1,montecarlo(np.copy(board), -player)
    for val in possible_moves:
         recur_board= np.copy(board)
         play(recur_board, val, player)

         if first_eval:
             first_eval =0
             _,values=  minmax(np.copy(recur_board), -player,depth,best_eval,lut)
             if depth==1:
                 print (val)
                 print (values)
             best_eval= values*player
             ret_val=val
         else:
             _,values= minmax(np.copy(recur_board), -player,depth,best_eval,lut)
             if depth==1:
                 print (val)
                 print (values)
             if best_eval<values*player:
                 ret_val=val
             best_eval=max(best_eval,values*player)
             if not math.isnan(best_value) and best_eval>-best_value:
                lut[calculate_board_hash(board)] =best_eval*player
                return val,best_eval*player
    lut[calculate_board_hash(board)] =best_eval
    return ret_val,best_eval*player


class Node():
    def __init__(self, board, parent = None):
        self.visits = 1
        self.reward = 0.0
        self.board = board
        self.children = []
        self.children_move = []
        self.parent = parent


def MCTS(maxIter, root, player0, factor):
    for inter in range(maxIter):
        front, player = treePolicy( root , player0 , factor )
        reward = _mc(np.copy(front.board), player)
        backup(front,reward,player)
    ans = bestChild(root,0)
    return ans
def treePolicy( node, player , factor ):
	while len(valid_moves(node.board))>0 and four_in_a_row(board,player) == 0:
		if  len(valid_moves(node.board)) >len(node.children) :
			return expand(node, player), -player
		else:
			node = bestChild ( node , factor )
			player *= -1
	return node, player

def expand( node, player ):
	tried_children_move = [m for m in node.children_move]
	possible_moves = valid_moves(node.board)

	for move in possible_moves:
		if move not in tried_children_move:
			new_state =  Node(np.copy(node.board),node)
			play(new_state.board, move, player)
			break

	node.children.append(new_state)

	node.children_move.append(move)

	return new_state

def bestChild(node,factor):
    bestscore = -10000000.0

    bestChildren = []
    for c in node.children:
        exploit = c.reward / c.visits
        explore = math.sqrt(math.log(2.0*node.visits)/float(c.visits))
        score = exploit + factor*explore
        if score == bestscore:
            bestChildren.append(c)
        if score > bestscore:
            bestChildren = [c]
            bestscore = score

    return random.choice(bestChildren)



def backup( node , reward, turn ):
	while node != None:
		node.visits += 1
		node.reward -= turn*reward
		node = node.parent
		turn *= -1
	return




In [199]:
board = board = np.zeros((NUM_COLUMNS, COLUMN_HEIGHT), dtype=np.byte)

play(board,3,1)
play(board,0,-1)
play(board,2,1)
play(board,0,-1)
play(board,6,1)




print(board)

player =1
move = 0
game= Node(board)
factor =0.5
while not four_in_a_row(board, player):
    player*=-1
    game= MCTS(42,game,player,factor)
    print(game.board)
    factor=max(factor-0.05,0.05)
print("The match was won by player: ")
print(player)
while False: #not four_in_a_row(board, player):
    player = -player
    print("\nStarting evaluation for player: ")
    print(player)
    print("\n")
    move, eval = eval_board(board, player)
    play(board,move,player)
    print("\n")
    print(board)
    print("Move:")
    print(move)
    print("Eval:")
    print(eval)
    print("\n")
print("The match was won by player: ")
print(player)



[[-1 -1  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1  0  0  0  0  0]]
[[-1 -1  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 1 -1  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1  0  0  0  0  0]]
[[-1 -1  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 1 -1  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 1  0  0  0  0  0]]
[[-1 -1  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 1 -1  0  0  0  0]
 [-1  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 1  0  0  0  0  0]]
[[-1 -1  1  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 1 -1  0  0  0  0]
 [-1  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 1  0  0  0  0  0]]
[[-1 -1  1  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 1 -1  0  0  0  0]
 [-1  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 1 -1  0  0  0  0]]
[[-1 -1  1  1  0  0]
 [ 0  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 1 -1  0  0  0  0]
 [-1  0  0  0  0  0]
 [ 1  0

IndexError: list index out of range