In [1]:
import numpy as np
import random as rand
import warnings
warnings.filterwarnings("ignore")

NONE_MOVE_SCORE = -100000 * (9**3) 
DEPTH = 1

In [2]:
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

In [3]:
def reset_game():
    initial_board = np.random.random_integers(low=-9, high=9, size=5).tolist()
    next_queue = np.random.random_integers(low=-9, high=9, size=3).tolist()
    return initial_board, next_queue

In [4]:
def get_left_val(i, board):
    if i == 0: return 1
    left_i = i - 1;
    while left_i >= 0:
        if board[left_i] is None:
            left_i -= 1
            continue
        else: break
            
    if left_i >= 0: return board[left_i]
    else: return 1

In [5]:
def get_right_val(i, board):
    if i == (len(board) - 1): return 1
    right_i = i + 1;
    while right_i < len(board):
        if board[right_i] is None:
            right_i += 1
            continue
        else: break
            
    if right_i < len(board): return board[right_i]
    else: return 1

In [6]:
def get_next_board_state(i, board, queue):
    board_copy = board.copy()
    queue_copy = queue.copy()
    replacement_val = None
    if len(queue_copy) > 0:
        replacement_val = queue_copy.pop(0)
    
    board_copy[i] = replacement_val
    return board_copy, queue_copy

In [7]:
def get_deep_move_score(board, queue, depth):
    if depth > DEPTH: return 0

    val_scores = []
    deep_val_scores = []
    
    for i in range(len(board)):
        if board[i] is None: continue
        
        left_val = get_left_val(i, board)
        right_val = get_right_val(i, board)
        val = left_val * board[i] * right_val
        val_scores.append(val)
        
        next_board, next_queue = get_next_board_state(i, board, queue)
        deep_val = get_deep_move_score(next_board, next_queue, depth + 1)
        deep_val_scores.append(deep_val)
    
    deep_score = NONE_MOVE_SCORE
    for i in range(len(val_scores)):
        if (val_scores[i] - deep_val_scores[i]) > deep_score:
            deep_score = val_scores[i] - deep_val_scores[i]
    
    return deep_score
    

In [8]:
def get_move_score(i, board, queue):
    if board[i] is None: NONE_MOVE_SCORE
        
    left_val = get_left_val(i, board)
    right_val = get_right_val(i, board)
    val = left_val * board[i] * right_val
    deep_val = 0
    
    if DEPTH > 0:
        next_board, next_queue = get_next_board_state(i, board, queue)
        if not is_game_over(next_board):
            deep_val = get_deep_move_score(next_board, next_queue, 1)
        
    return val - deep_val
    

In [9]:
def is_game_over(board):
    return sum(np.array(board) != None) == 0

In [10]:
def play_move(i, board, queue):
    valid_move = True
    if is_game_over(board) or i < 0 or i >= len(board) or board[i] is None:
        valid_move = False
        return valid_move, board, queue, None

    move_score = get_move_score(i, board, queue)
    next_board, next_queue = get_next_board_state(i, board, queue)
    return valid_move, next_board, next_queue, move_score
    

In [11]:
def play_game(board, queue):
    board_queue_moves_score_map = []
    game_over = False
    game_score = 0
    while not game_over:
        raw_moves_scores = []
        for i in range(len(board)):
            valid_move, _, _, move_score = play_move(i, board, queue)
            if not valid_move:
                raw_moves_scores.append(NONE_MOVE_SCORE)
            else:
                raw_moves_scores.append(move_score)
            
        moves_score = softmax(raw_moves_scores)
        board_queue_moves_score_map.append((board, moves_score))
        valid_move = False
        while not valid_move:
            i = np.random.choice(len(moves_score), p=moves_score)
            valid_move, board, queue, raw_move_score = play_move(i, board, queue)
            
        game_score += raw_move_score
        game_over = is_game_over(board)
        
    return board_queue_moves_score_map, game_score

In [12]:
board, queue = reset_game()

In [13]:
board

[8, -3, -6, -9, 9]

In [14]:
queue

[1, 2, -9]

In [15]:
board_moves_scores_map, game_score = play_game(board, queue)

In [16]:
board_moves_scores_map

[([8, -3, -6, -9, 9],
  array([0.00000000e+000, 8.76141755e-298, 2.45261912e-231, 1.00000000e+000,
         5.68890604e-247])),
 ([8, -3, -6, 1, 9],
  array([2.25235791e-082, 1.00000000e+000, 2.57220937e-056, 2.63026133e-142,
         6.03324914e-115])),
 ([8, 2, -6, 1, 9],
  array([1.60381082e-028, 7.12457611e-218, 9.99999959e-001, 4.37749086e-223,
         4.13993755e-008])),
 ([8, 2, -9, 1, 9],
  array([9.99999168e-01, 3.25748582e-70, 2.11512928e-19, 6.81355116e-46,
         8.31528028e-07])),
 ([None, 2, -9, 1, 9],
  array([0.00000000e+00, 2.86251858e-20, 3.53262857e-24, 6.63967720e-36,
         1.00000000e+00])),
 ([None, 2, -9, 1, None],
  array([0.00000000e+00, 1.52299795e-08, 2.54366561e-13, 9.99999985e-01,
         0.00000000e+00])),
 ([None, 2, -9, None, None],
  array([0.00000000e+00, 9.99983299e-01, 1.67014218e-05, 0.00000000e+00,
         0.00000000e+00])),
 ([None, None, -9, None, None], array([0., 0., 1., 0., 0.]))]

In [17]:
game_score

458