In [1]:
from abc import ABCMeta, abstractmethod

# Minimax Algorithm implementation

<img src="minimax.png" width="400"/>

In [2]:
class Minimax():
    """
    Minimax implementation

    Methods
    -------
    search(state)
        Runs Minimax, and returns the best move for the current player.
    player_moves(state)
        Finds the list of successors for a given state.
    opponent_moves(state)
        Finds the list of successors for a given state.
    terminal_test(state)
        Checks if the given state is a terminal state.
    utility(state)
        Returns the utility value for the given state.
    """
    __metaclass__ = ABCMeta

    def __max_node(self, state):
        num_expanded_states = 1
        if self.terminal_test(state):
            return self.utility(state), None, num_expanded_states

        best_action = None
        for action, child in self.player_moves(state):
            utility, _, expanded_states = self.__min_node(child)
            num_expanded_states += expanded_states
            if best_action is None or utility > best_action[0]:
                best_action = (utility, action)

        return best_action[0], best_action[1], num_expanded_states

    def __min_node(self, state):
        num_expanded_states = 1
        if self.terminal_test(state):
            return self.utility(state), None, num_expanded_states

        best_action = None
        for action, child in self.opponent_moves(state):
            utility, _, expanded_states = self.__max_node(child)
            num_expanded_states += expanded_states
            if best_action is None or utility < best_action[0]:
                best_action = (utility, action)

        return best_action[0], best_action[1], num_expanded_states

    def search(self, state):
        """
        Runs Minimax, and returns the best move for the current player.

        Parameters
        ----------
        state
            A tuple describing a unique world configuration.

        Returns
        -------
        tuple
            A tuple describing the action to be taken.
        int
            The number of states expanded during the search.
        """

        utility, action, expanded_states = self.__max_node(state)
        return action, expanded_states

    @abstractmethod
    def player_moves(self, state):
        """
        Finds the list of successors for a given state.

        Parameters
        ----------
        state
            A tuple describing a unique world configuration.

        Returns
        -------
        list
            A list of pairs (action,state) with all states that can be reached from the given state with a single action.
        """
        pass

    @abstractmethod
    def opponent_moves(self, state):
        """
        Finds the list of successors for a given state.

        Parameters
        ----------
        state
            A tuple describing a unique world configuration.

        Returns
        -------
        list
            A list of pairs (action,state) with all states that can be reached from the given state with a single action.
        """
        pass

    @abstractmethod
    def terminal_test(self, state):
        """
        Checks if the current state is a terminal state.

        Parameters
        ----------
        state
            A tuple describing a unique world configuration.

        Returns
        -------
        bool
             True if the given state is a terminal state, and False otherwise.
        """
        pass

    @abstractmethod
    def utility(self, state):
        """
        Returns the utility value for the given state.

        Parameters
        ----------
        state
            A tuple describing a unique world configuration.

        Returns
        -------
        int
             The utility value for the given state.
        """
        pass

# Tic-Tac-Toe

- A $3\times3$ array where two players put pieces on (**X** or **O**) in turns. Each player's goal is to fill one row, column, or diagonal with their pieces.

<img src="tictactoe.png" width="400"/>

In [3]:
class TicTacToe(Minimax):
    """
    TicTacToe solution using Minimax.

    Methods
    -------
    initial_state()
        Visualize a given state.
    show(state)
        Visualize a given state.
    player_move(state, action)
        Uses a given action to update a state of the board.
    opponent_move(state, action)
        Uses a given action to update a state of the board.
    player_moves(state)
        Finds the list of successors for a given state.
    opponent_moves(state)
        Finds the list of successors for a given state.
    terminal_test(state)
        Checks if the given state is a terminal state.
    utility(state)
        Returns the utility value for the given state.
    """

    def initial_state(self):
        return ((' ',' ',' '),(' ',' ',' '),(' ',' ',' '))

    def show(self, state):
        """
        Visualize a given state.
        """
        for i in range(3):
            print('', end=' ')
            for j in range(3):
                print(state[i][j], end=' ')
                if j < 2:
                    print('║', end=' ')
            print()
            if i < 2:
                print('═══╬═══╬═══')
        print()

    def player_move(self, state, action):
        """
        Uses a given action to update a state of the board. Assumes the action is valid.
        """
        a, b = action
        state = [list(x) for x in state]
        state[a][b] = 'X'
        state = tuple(tuple(x) for x in state)
        return state

    def opponent_move(self, state, action):
        """
        Uses a given action to update a state of the board. Assumes the action is valid.
        """
        a, b = action
        state = [list(x) for x in state]
        state[a][b] = 'O'
        state = tuple(tuple(x) for x in state)
        return state

    def __successor(self, state, actor):
        successors = []
        for i in range(3):
            for j in range(3):
                if state[i][j] == ' ':
                    neighbor_state = [list(x) for x in state]
                    neighbor_state[i][j] = actor
                    neighbor_state = tuple(tuple(x) for x in neighbor_state)
                    successors.append(((i,j), neighbor_state))
        return successors

    def player_moves(self, state):
        """
        Finds the list of successors for a given state.

        Parameters
        ----------
        state
            A tuple describing a unique world configuration.

        Returns
        -------
        list
            A list of pairs (action,state) with all states that can be reached from the given state with a single action.
        """
        return self.__successor(state, 'X')

    def opponent_moves(self, state):
        """
        Finds the list of successors for a given state.

        Parameters
        ----------
        state
            A tuple describing a unique world configuration.

        Returns
        -------
        list
            A list of pairs (action,state) with all states that can be reached from the given state with a single action.
        """
        return self.__successor(state, 'O')

    def terminal_test(self, state):
        """
        Checks if the current state is a terminal state.

        Parameters
        ----------
        state
            A tuple describing a unique world configuration.

        Returns
        -------
        bool
             True if the given state is a terminal state, and False otherwise.
        """
        flag = True
        for i in range(3):
            for j in range(3):
                if state[i][j] == ' ':
                    flag = False
        if flag or self.utility(state) != 0:
            return True
        else:
            return False

    def utility(self, state):
        """
        Returns the utility value for the given state.

        Parameters
        ----------
        state
            A tuple describing a unique world configuration.

        Returns
        -------
        int
             The utility value for the given state.
        """
        for i in range(3):
            if state[i][0] == state[i][1] and state[i][1] == state[i][2] or state[0][i] == state[1][i] and state[1][i] == state[2][i]:
                if state[i][i] == 'X':
                    return 1
                elif state[i][i] == 'O':
                    return -1
        if state[0][0] == state[1][1] and state[1][1] == state[2][2] or state[2][0] == state[1][1] and state[1][1] == state[0][2]:
            if state[1][1] == 'X':
                return 1
            elif state[1][1] == 'O':
                return -1
        return 0

In [5]:
board = TicTacToe()
state = board.initial_state()
board.show(state)

for i in range(4):
    action, num_expanded_states = board.search(state)
    state = board.player_move(state, action)
    board.show(state)
    if board.terminal_test(state):
        break

    row, col = [int(x) for x in input().split()]
    state = board.opponent_move(state, (row,col))
    board.show(state)
    if board.terminal_test(state):
        break

   ║   ║   
═══╬═══╬═══
   ║   ║   
═══╬═══╬═══
   ║   ║   

 X ║   ║   
═══╬═══╬═══
   ║   ║   
═══╬═══╬═══
   ║   ║   



 0 1


 X ║ O ║   
═══╬═══╬═══
   ║   ║   
═══╬═══╬═══
   ║   ║   

 X ║ O ║   
═══╬═══╬═══
 X ║   ║   
═══╬═══╬═══
   ║   ║   



 2 0


 X ║ O ║   
═══╬═══╬═══
 X ║   ║   
═══╬═══╬═══
 O ║   ║   

 X ║ O ║   
═══╬═══╬═══
 X ║ X ║   
═══╬═══╬═══
 O ║   ║   



 1 2


 X ║ O ║   
═══╬═══╬═══
 X ║ X ║ O 
═══╬═══╬═══
 O ║   ║   

 X ║ O ║   
═══╬═══╬═══
 X ║ X ║ O 
═══╬═══╬═══
 O ║   ║ X 

