In [1]:
import math
from collections import Counter
import numpy as np

In [2]:

NUM_COLUMNS = 7
COLUMN_HEIGHT = 6
FOUR = 4
MAX_DEPTH = 4

In [3]:
def valid_moves(board):
    """Returns columns where a disc may be played"""
    return [n for n in range(NUM_COLUMNS) if board[n, COLUMN_HEIGHT - 1] == 0]


def play(board, column, player):
    """Updates `board` as `player` drops a disc in `column`"""
    (index,) = next((i for i, v in np.ndenumerate(board[column]) if v == 0))
    board[column, index] = player


def take_back(board, column):
    """Updates `board` removing top disc from `column`"""
    (index,) = [i for i, v in np.ndenumerate(board[column]) if v != 0][-1]
    board[column, index] = 0


def four_in_a_row(board, player):
    """Checks if `player` has a 4-piece line"""
    return (
        any(
            all(board[c, r] == player)
            for c in range(NUM_COLUMNS)
            for r in (list(range(n, n + FOUR)) for n in range(COLUMN_HEIGHT - FOUR + 1))
        )
        or any(
            all(board[c, r] == player)
            for r in range(COLUMN_HEIGHT)
            for c in (list(range(n, n + FOUR)) for n in range(NUM_COLUMNS - FOUR + 1))
        )
        or any(
            np.all(board[diag] == player)
            for diag in (
                (range(ro, ro + FOUR), range(co, co + FOUR))
                for ro in range(0, NUM_COLUMNS - FOUR + 1)
                for co in range(0, COLUMN_HEIGHT - FOUR + 1)
            )
        )
        or any(
            np.all(board[diag] == player)
            for diag in (
                (range(ro, ro + FOUR), range(co + FOUR - 1, co - 1, -1))
                for ro in range(0, NUM_COLUMNS - FOUR + 1)
                for co in range(0, COLUMN_HEIGHT - FOUR + 1)
            )
        )
    )


In [4]:
from itertools import product


def calculate_board_hash (board):
    cnt =""
    for i,j in product(range(NUM_COLUMNS), range(COLUMN_HEIGHT)):
        if board[i][j]==1:
          cnt+="2"
        else:
            if board[i][j]==-1:
                cnt+="1"
            else: cnt+="0"
    return cnt

def _mc(board, player):
    p = -player
    while valid_moves(board):
        p = -p
        c = np.random.choice(valid_moves(board))
        play(board, c, p)
        if four_in_a_row(board, p):
            return p
    return 0


def montecarlo(board, player):
    montecarlo_samples = 100
    cnt = Counter(_mc(np.copy(board), player) for _ in range(montecarlo_samples))
    return (cnt[1] - cnt[-1]) / montecarlo_samples


def eval_board(board, player):
    if four_in_a_row(board, 1):
        # Alice won
        return 1
    elif four_in_a_row(board, -1):
        # Bob won
        return -1
    else:
        # Not terminal, let's simulate...
        d = {'key': 'value'}
        bv = float('nan')
        return minmax(board, player,0,bv,d)


def minmax(board, player, depth,best_value, lut):
    if calculate_board_hash(board) in lut:
        print ("LUT hit!")
        return  -1,lut[calculate_board_hash(board)]
    if four_in_a_row(board, player) == player:
        lut[calculate_board_hash(board)] =player
        return -1, player
    possible_moves = valid_moves(board)
    if not possible_moves:
        lut[calculate_board_hash(board)] =0
        return -1,0
    depth+=1;
    first_eval = 1
    best_eval =float('NaN')
    ret_val=-1
    if depth > MAX_DEPTH:
        return  -1,montecarlo(np.copy(board), -player)
    for val in possible_moves:
         play(board, val, player)
         if first_eval:
             first_eval =0
             _,values=  minmax(np.copy(board), -player,depth,best_eval,lut)
             best_eval= values*player
             ret_val=val
             if best_eval == 1:
                 lut[calculate_board_hash(board)] =player
                 return val,player
         else:
             _,values= minmax(np.copy(board), -player,depth,best_eval,lut)
             best_eval=max(best_eval,values*player)
             if best_eval==values:
                 ret_val=val
             if best_eval == 1:
                 lut[calculate_board_hash(board)] =player
                 return val,player
             if not math.isnan(best_value) and best_eval>-best_value:
                print("ALPHABETA pruning!")
                print(best_value)
                lut[calculate_board_hash(board)] =best_eval*player
                return val,best_eval*player
    lut[calculate_board_hash(board)] =player
    return ret_val,best_eval*player

In [None]:
board = board = np.zeros((NUM_COLUMNS, COLUMN_HEIGHT), dtype=np.byte)




play(board, 3, 1)

play(board, 0, -1)

play(board, 4, 1)

play(board, 0, -1)

play(board, 5, 1)
print(calculate_board_hash(board))
print(board)
eval_board(board, 1)

110000000000000000200000200000200000000000
[[-1 -1  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 0  0  0  0  0  0]]
LUT hit!
LUT hit!
LUT hit!
LUT hit!
LUT hit!
LUT hit!
LUT hit!
LUT hit!
LUT hit!
