# Assignment: Minimax, Alpha–Beta, Ordering, Simulation, Custom Heuristic

In [19]:
# Engine imports
import os, sys, pathlib, copy, math, time, random
from typing import Dict, List, Tuple, Optional, Callable
REPO_ROOT = pathlib.Path().resolve().parents[1]  # .../tutorials/minimax_alpha/ -> repo root
SRC = REPO_ROOT / "src"
if str(SRC) not in sys.path:
    sys.path.insert(0, str(SRC))
from mancala_ai.engine.core import new_game, legal_actions, step, evaluate

def score_for(state: Dict, root_idx: int) -> float:
    s = copy.deepcopy(state); s["current_player"] = root_idx
    return float(evaluate(s))

def print_state(s):
    p0, p1 = s["pits"][0], s["pits"][1]
    st0, st1 = s["stores"]
    turn = s["current_player"]
    # Top row is player 1 (reverse display for board-like view)
    print("+----- Mancala -----+")
    print("P1 store:", st1)
    print("P1 pits: ", list(reversed(p1)))
    print("P0 pits: ", p0)
    print("P0 store:", st0)
    print("Turn: P", turn, sep="")
    print("+-------------------+")
    
class Stats:
    def __init__(self): self.visits = 0


## Minimax

In [3]:
def minimax(state: Dict, depth: int, root_idx: int, stats: Stats):
    stats.visits += 1
    if depth == 0 or sum(state["pits"][0]) == 0 or sum(state["pits"][1]) == 0:
        return score_for(state, root_idx), None
    acts = legal_actions(state)
    if not acts: return score_for(state, root_idx), None
    is_max = (state["current_player"] == root_idx)
    best_move = acts[0]
    if is_max:
        best = -math.inf
        for a in acts:
            ns, _, _ = step(state, a)
            reduce = 0 if ns["current_player"] == state["current_player"] else 1
            v, _ = minimax(ns, depth - reduce, root_idx, stats)
            if v > best: best, best_move = v, a
        return best, best_move
    else:
        best = math.inf
        for a in acts:
            ns, _, _ = step(state, a)
            reduce = 0 if ns["current_player"] == state["current_player"] else 1
            v, _ = minimax(ns, depth - reduce, root_idx, stats)
            if v < best: best, best_move = v, a
        return best, best_move

def choose_move_minimax(state: Dict, depth: int = 5):
    st = Stats()
    _, mv = minimax(state, depth, state["current_player"], st)
    if mv is None:
        acts = legal_actions(state); mv = int(acts[0]) if acts else 0
    return int(mv), st


## Alpha–Beta (reference, with optional ordering)

In [4]:
def alphabeta(state: Dict, depth: int, alpha: float, beta: float, root_idx: int,
              stats: Stats, ordering_fn: Optional[Callable[[Dict, int, List[int]], List[int]]] = None):
    stats.visits += 1
    if depth == 0 or sum(state["pits"][0]) == 0 or sum(state["pits"][1]) == 0:
        return score_for(state, root_idx), None
    acts = legal_actions(state)
    if not acts: return score_for(state, root_idx), None
    ordered = ordering_fn(state, root_idx, acts) if ordering_fn else acts
    is_max = (state["current_player"] == root_idx)
    best_move = ordered[0]
    if is_max:
        val = -math.inf; a = alpha
        for mv in ordered:
            ns, _, _ = step(state, mv)
            reduce = 0 if ns["current_player"] == state["current_player"] else 1
            child_v, _ = alphabeta(ns, depth - reduce, a, beta, root_idx, stats, ordering_fn)
            if child_v > val: val, best_move = child_v, mv
            a = max(a, val)
            if beta <= a: break
        return val, best_move
    else:
        val = math.inf; b = beta
        for mv in ordered:
            ns, _, _ = step(state, mv)
            reduce = 0 if ns["current_player"] == state["current_player"] else 1
            child_v, _ = alphabeta(ns, depth - reduce, alpha, b, root_idx, stats, ordering_fn)
            if child_v < val: val, best_move = child_v, mv
            b = min(b, val)
            if b <= alpha: break
        return val, best_move

def choose_move_alphabeta(state: Dict, depth: int = 7, ordering_fn=None):
    st = Stats()
    _, mv = alphabeta(state, depth, -1e9, 1e9, state["current_player"], st, ordering_fn)
    if mv is None:
        acts = legal_actions(state); mv = int(acts[0]) if acts else 0
    return int(mv), st


## Part A — Implement simple move ordering

In [5]:
def order_moves_by_one_ply_score(state: Dict, root_idx: int, acts: List[int]) -> List[int]:
    scored = []
    for a in acts:
        ns, _, _ = step(state, a)
        scored.append((score_for(ns, root_idx), a))
    scored.sort(reverse=True)
    return [a for _, a in scored]


## Part B — Compare visits with vs without ordering

In [13]:
CHALLENGE_STATE = {
    "pits":   [[0, 2, 0, 5, 2, 7], [4, 0, 6, 0, 2, 5]],
    "stores": [5, 10],
    "current_player": 0,
}
print_state(CHALLENGE_STATE)

def compare_visits_on_state(state: Dict, depth: int = 7):
    s1 = copy.deepcopy(state); s2 = copy.deepcopy(state)
    mv_no, st_no   = choose_move_alphabeta(s1, depth=depth, ordering_fn=None)
    mv_ord, st_ord = choose_move_alphabeta(s2, depth=depth, ordering_fn=order_moves_by_one_ply_score)
    print(f"Alpha–Beta d={depth} WITHOUT ordering: move={mv_no}, nodes={st_no.visits}")
    print(f"Alpha–Beta d={depth} WITH    ordering: move={mv_ord}, nodes={st_ord.visits}")

compare_visits_on_state(CHALLENGE_STATE, depth=7)

+----- Mancala -----+
P1 store: 10
P1 pits:  [5, 2, 0, 6, 0, 4]
P0 pits:  [0, 2, 0, 5, 2, 7]
P0 store: 5
Turn: P0
+-------------------+
Alpha–Beta d=7 WITHOUT ordering: move=4, nodes=29389
Alpha–Beta d=7 WITH    ordering: move=4, nodes=22161


## Part C — 100-game simulations & metrics

In [14]:
def random_agent():
    def _fn(state: Dict):
        acts = legal_actions(state); mv = random.choice(acts) if acts else 0
        st = Stats(); return mv, st
    return _fn

def minimax_agent(depth: int):
    def _fn(state: Dict):
        return choose_move_minimax(state, depth=depth)
    return _fn

def alphabeta_agent(depth: int, ordering: bool = False):
    def _fn(state: Dict):
        order = order_moves_by_one_ply_score if ordering else None
        return choose_move_alphabeta(state, depth=depth, ordering_fn=order)
    return _fn

def play_game(agent0, agent1, max_plies=500):
    s = new_game(); moves = 0
    times_0, times_1, nodes_0, nodes_1 = [], [], [], []
    while sum(s["pits"][0]) > 0 and sum(s["pits"][1]) > 0 and moves < max_plies:
        turn = s["current_player"]
        agent = agent0 if turn == 0 else agent1
        t0 = time.perf_counter()
        mv, st = agent(copy.deepcopy(s))
        dt = time.perf_counter() - t0
        s, _, _ = step(s, mv)
        moves += 1
        if turn == 0:
            times_0.append(dt); nodes_0.append(st.visits)
        else:
            times_1.append(dt); nodes_1.append(st.visits)
    st0, st1 = s["stores"]
    winner = 0 if st0 > st1 else 1 if st1 > st0 else -1
    return {"winner": winner, "moves": moves,
            "times_0": times_0, "times_1": times_1,
            "nodes_0": nodes_0, "nodes_1": nodes_1}

def run_series(agentA, agentB, n_games=100, seed=42):
    random.seed(seed)
    wins_A = wins_B = draws = 0
    moves_list = []; tA=[]; tB=[]; nA=[]; nB=[]
    for g in range(n_games):
        if g % 2 == 0:
            res = play_game(agentA, agentB)
            if   res["winner"] == 0: wins_A += 1
            elif res["winner"] == 1: wins_B += 1
            else: draws += 1
            tA += res["times_0"]; tB += res["times_1"]
            nA += res["nodes_0"]; nB += res["nodes_1"]
        else:
            res = play_game(agentB, agentA)
            if   res["winner"] == 0: wins_B += 1
            elif res["winner"] == 1: wins_A += 1
            else: draws += 1
            tA += res["times_1"]; tB += res["times_0"]
            nA += res["nodes_1"]; nB += res["nodes_0"]
        moves_list.append(res["moves"])
    def _avg(xs): return (sum(xs)/len(xs)) if xs else 0.0
    return {
        "games": n_games,
        "wins_A": wins_A, "wins_B": wins_B, "draws": draws,
        "win_rate_A": wins_A/n_games, "win_rate_B": wins_B/n_games,
        "avg_moves_per_game": _avg(moves_list),
        "avg_time_per_move_A": _avg(tA), "avg_time_per_move_B": _avg(tB),
        "avg_nodes_per_move_A": _avg(nA), "avg_nodes_per_move_B": _avg(nB),
    }

### 6.1) Run: Minimax vs Alpha-Beta (after you implement Minimax) and comment

In [25]:
summary = run_series(random_agent(), random_agent(), n_games=100, seed=123)
summary

{'games': 100,
 'wins_A': 53,
 'wins_B': 41,
 'draws': 6,
 'win_rate_A': 0.53,
 'win_rate_B': 0.41,
 'avg_moves_per_game': 41.47,
 'avg_time_per_move_A': 7.82604387516726e-06,
 'avg_time_per_move_B': 8.660614305950896e-06,
 'avg_nodes_per_move_A': 0.0,
 'avg_nodes_per_move_B': 0.0}

In [27]:
print("Alpha–Beta (no ordering) vs Random")
summary = run_series(alphabeta_agent(depth=5, ordering=False), minimax_agent(depth=5), n_games=10, seed=123)
summary

Alpha–Beta (no ordering) vs Random


{'games': 10,
 'wins_A': 5,
 'wins_B': 5,
 'draws': 0,
 'win_rate_A': 0.5,
 'win_rate_B': 0.5,
 'avg_moves_per_game': 57.0,
 'avg_time_per_move_A': 0.09040524237916862,
 'avg_time_per_move_B': 0.8655028674280753,
 'avg_nodes_per_move_A': 5635.350877192983,
 'avg_nodes_per_move_B': 52738.36842105263}

## Part D — Your own heuristic Minimax

In [None]:
def heuristic_score_for(state: Dict, root_idx: int) -> float:
    # TODO: replace with your own heuristic formula
    return score_for(state, root_idx)

def minimax_with_heuristic(state: Dict, depth: int, root_idx: int, stats: Stats):
    stats.visits += 1
    if depth == 0 or sum(state["pits"][0]) == 0 or sum(state["pits"][1]) == 0:
        return heuristic_score_for(state, root_idx), None
    acts = legal_actions(state)
    if not acts: return heuristic_score_for(state, root_idx), None
    is_max = (state["current_player"] == root_idx)
    best_move = acts[0]
    if is_max:
        best = -math.inf
        for a in acts:
            ns, _, _ = step(state, a)
            reduce = 0 if ns["current_player"] == state["current_player"] else 1
            v, _ = minimax_with_heuristic(ns, depth - reduce, root_idx, stats)
            if v > best: best, best_move = v, a
        return best, best_move
    else:
        best = math.inf
        for a in acts:
            ns, _, _ = step(state, a)
            reduce = 0 if ns["current_player"] == state["current_player"] else 1
            v, _ = minimax_with_heuristic(ns, depth - reduce, root_idx, stats)
            if v < best: best, best_move = v, a
        return best, best_move

def choose_move_minimax_heuristic(state: Dict, depth: int = 5):
    st = Stats()
    _, mv = minimax_with_heuristic(state, depth, state["current_player"], st)
    if mv is None:
        acts = legal_actions(state); mv = int(acts[0]) if acts else 0
    return int(mv), st


In [None]:
print("Heuristic Minimax vs Random")
def heur_agent(depth=5):
    def _fn(state):
        return choose_move_minimax_heuristic(state, depth=depth)
    return _fn

summary = run_series(heur_agent(depth=5), random_agent(), n_games=100, seed=123)
summary