In [1]:
# ticcat_bot.py
# A tiny tic-tac-toe bot trained on interpretable, hand-engineered features.
# It learns a linear value function from perfect-play labels generated by minimax.

from __future__ import annotations
import itertools
import math
import random
import numpy as np

EMPTY, X, O = 0, 1, 2
WIN_LINES = [(0,1,2),(3,4,5),(6,7,8),(0,3,6),(1,4,7),(2,5,8),(0,4,8),(2,4,6)]

# ---------- Basic game logic ----------
def next_player(board):
    x = sum(v == X for v in board); o = sum(v == O for v in board)
    return X if x == o else O

def legal_moves(board):
    return [i for i,v in enumerate(board) if v == EMPTY]

def winner(board):
    for a,b,c in WIN_LINES:
        if board[a] != EMPTY and board[a] == board[b] == board[c]:
            return board[a]
    return None

def terminal_value(board):
    """Return +1 if X wins, -1 if O wins, 0 draw/ongoing."""
    w = winner(board)
    if w == X: return 1
    if w == O: return -1
    if not legal_moves(board): return 0
    return None  # non-terminal

def play(board, i, p):
    nb = board[:]; nb[i] = p; return nb

# ---------- Interpretable feature vector ----------
def line_counts(board, player):
    """Return counts of lines with k of 'player' and 3-k empties; and lines open to player."""
    counts = {0:0, 1:0, 2:0, 3:0}
    open_lines = 0
    for a,b,c in WIN_LINES:
        line = [board[a], board[b], board[c]]
        if player in line and (3-player) in line:
            continue  # blocked
        # open to this player (contains only player's marks and empties)
        open_lines += 1
        k = sum(v == player for v in line)
        counts[k] += 1
    return counts, open_lines

def feature_vector(board, p_to_move):
    """
    Interpretable features:
      - open lines for X/O
      - #lines with exactly 2 X and 1 empty (X_twos), etc.
      - #lines with exactly 1 mark
      - center/corner/edge control
      - next player (binary)
    Vector is always from X's perspective; we’ll flip sign at use-time for O.
    """
    # counts and openness
    x_counts, x_open = line_counts(board, X)
    o_counts, o_open = line_counts(board, O)

    # simple positional features
    center = 1 if board[4] == X else (-1 if board[4] == O else 0)
    corners_idx = [0,2,6,8]
    edges_idx   = [1,3,5,7]
    corners = sum(1 if board[i]==X else (-1 if board[i]==O else 0) for i in corners_idx)
    edges   = sum(1 if board[i]==X else (-1 if board[i]==O else 0) for i in edges_idx)

    # next player feature (X=1, O=0) — we’ll pass p_to_move in
    next_is_x = 1.0 if p_to_move == X else 0.0

    phi = np.array([
        # "material" / control of space
        corners, edges, center,
        # openness
        x_open, o_open,
        # lines with 1 mark (potential building)
        x_counts[1], o_counts[1],
        # immediate threats/opps
        x_counts[2], o_counts[2],
        # completed (should correlate with terminal, but present in midgame lines too)
        x_counts[3], o_counts[3],
        # mobility proxy
        len(legal_moves(board)),
        # meta
        next_is_x,
        1.0  # bias
    ], dtype=float)
    return phi

# ---------- Perfect labels via small minimax ----------
def minimax_value(board):
    """Perfect value from X's perspective: +1 win for X, -1 win for O, 0 draw."""
    tv = terminal_value(board)
    if tv is not None:
        return tv
    p = next_player(board)
    vals = [minimax_value(play(board, m, p)) for m in legal_moves(board)]
    return (max(vals) if p == X else min(vals))

# ---------- Dataset generation ----------
def all_reachable_states():
    """Enumerate all legal states reachable from empty by alternating play."""
    start = [EMPTY]*9
    seen = set()
    stack = [tuple(start)]
    out = []
    while stack:
        st = stack.pop()
        if st in seen: continue
        seen.add(st)
        board = list(st)
        out.append(board)
        if terminal_value(board) is not None: 
            continue
        p = next_player(board)
        for m in legal_moves(board):
            stack.append(tuple(play(board, m, p)))
    return out

def training_examples():
    """
    For each non-terminal state and legal action, create an example:
    - x: features of the *resulting* state after the move
    - y: perfect minimax value of that resulting state (from X's perspective)
    """
    Xs, Ys = [], []
    for board in all_reachable_states():
        if terminal_value(board) is not None: 
            continue
        p = next_player(board)
        for m in legal_moves(board):
            after = play(board, m, p)
            # features of the *result* with next player updated
            phi = feature_vector(after, next_player(after))
            y = minimax_value(after)  # +1/-1/0 from X perspective
            Xs.append(phi); Ys.append(y)
    return np.vstack(Xs), np.array(Ys, dtype=float)

# ---------- Linear value function (ridge regression) ----------
class LinearValue:
    def __init__(self, w):
        self.w = w

    @classmethod
    def fit(cls, X, y, l2=1e-2):
        # Solve (X^T X + λI) w = X^T y
        XT = X.T
        A = XT @ X + l2 * np.eye(X.shape[1])
        b = XT @ y
        w = np.linalg.solve(A, b)
        return cls(w)

    def predict(self, phi):
        return float(phi @ self.w)

# ---------- Bot policy ----------
def score_move(board, move, model):
    p = next_player(board)
    after = play(board, move, p)
    # model predicts value from X's perspective; flip if O is the current player
    val_x = model.predict(feature_vector(after, next_player(after)))
    return val_x if p == X else -val_x

def choose_move(board, model, epsilon=0.0):
    moves = legal_moves(board)
    if not moves: return None
    if random.random() < epsilon:
        return random.choice(moves)
    scored = [(m, score_move(board, m, model)) for m in moves]
    # break ties randomly among max
    best_val = max(v for _,v in scored)
    best_moves = [m for m,v in scored if abs(v - best_val) < 1e-9]
    return random.choice(best_moves)

# ---------- Tiny REPL for play ----------
def print_board(b):
    sym = {EMPTY:'.', X:'X', O:'O'}
    for r in range(3):
        print(' '.join(sym[b[3*r+c]] for c in range(3)))
    print()

def train_model(seed=0):
    random.seed(seed); np.random.seed(seed)
    Xmat, y = training_examples()
    model = LinearValue.fit(Xmat, y, l2=1e-1)
    return model

def play_human_vs_bot(model, human_is=X):
    board = [EMPTY]*9
    while True:
        print_board(board)
        tv = terminal_value(board)
        if tv is not None:
            if tv == 1: print("X wins!")
            elif tv == -1: print("O wins!")
            else: print("Draw.")
            break
        p = next_player(board)
        if p == human_is:
            mv = None
            while mv not in legal_moves(board):
                try:
                    mv = int(input(f"Your move (0-8): "))
                except: mv = None
            board = play(board, mv, p)
        else:
            mv = choose_move(board, model)
            print(f"Bot plays {mv}")
            board = play(board, mv, p)

if __name__ == "__main__":
    model = train_model()
    # Quick self-test: bot vs bot
    b = [EMPTY]*9
    while terminal_value(b) is None:
        m = choose_move(b, model)
        b = play(b, m, next_player(b))
    print("Self-test terminal:", terminal_value(b))  # expect perfect/draw
    # Uncomment to play:
    # play_human_vs_bot(model, human_is=X)

Self-test terminal: -1


  A = XT @ X + l2 * np.eye(X.shape[1])
  A = XT @ X + l2 * np.eye(X.shape[1])
  A = XT @ X + l2 * np.eye(X.shape[1])
  b = XT @ y
  b = XT @ y
  b = XT @ y
