
In this notebook we will encode TIC TAC TOE problem and try to solve it with MINIMAX Algorithm



## PROBLEM FORMULATION
The problem was formulated as a Python class, changing some of the features that we considered for previous exercises. In games we need to consider:


* Initial State: initial configuration
* Terminal Test(s): function that returns true on a state s where the game is over and false otherwise. 
* Terminal States: States where the game has ended
* Actions(s): Set of legal moves on game state s.
* Effect(s, a): Determines the result of an action on a state.
* UtilityFunction(s, p): Applied on a terminal states to determine its numerical value for a player p.





## INITIAL STATE:
an empty board (0s)

## TERMINAL TEST:
* the board is full (with a winner or not)
* there is a winner (diagonal, vertical, horizontal line)

## ACTIONS:
* A (MAX) places "o" in empty (0) space
* B (Min) places "x" in empty (0) space

## EFFECT:
a square is filled with the players move

## UTILITY
* A WINNER = 1
* B WINNER = -1
* no winner = 0



# MINIMAX ALGORITHM



```
1. minimax-value = MAX-VALUE(state)
2. bestSuccessorFound = false
3. WHILE !bestSuccessorFound DO
  3.1 successor = next state’s successor
  3.2 IF MinimaxValue(successor) == minimax-value THEN
    3.2.1 bestSuccessorFound = true
    3.2.2 action = successor’s action
4. return action

```





# MAX_VALUE FUNCTION


```
1 IF Terminal-Test(state) THEN
  1.1 minimax-value = UtilityValue(state, MAX)
  1.2return minimax-value
2. ELSE
  2.1 minimax-value = -∞
  2.2 FOR EACH successor of state DO
    2.2.1 minimax-value = MAX(minimax-value, MIN-VALUE(successor))
  2.3 assign minimax-value to state
  2.4 return minimax-value

```



# MIN_VALUE FUNCTION


```
1 IF Terminal-Test(state) THEN
  1.1 minimax-value = UtilityValue(state, MAX)
  1.2return minimax-value
2. ELSE
  2.1 minimax-value = ∞
  2.2 FOR EACH successor of state DO
    2.2.1 minimax-value = MIN(minimax-value, MIN-VALUE(successor))
  2.3 assign minimax-value to state
  2.4 return minimax-value

```



In [1]:
# MAX...>computer, plays with x
# MIN: PLAYER, plays with o
import sys
from random import choice
import numpy as np
sys.setrecursionlimit(15000)

initial_state = [[" "," "," "],[" "," "," "],[" "," "," "]]


def draw(state):
  for i in range (3):
    print(state[i][0]+'|'+state[i][1]+'|'+state[i][2])
    if i<2:
      print('-+-+-')
  print('\n')  


def player_has_horizontal(state, symbol):
  for i in range(3):
    if state[i]==[symbol]*3:
      return True
  return False

def player_has_diagonal(state, symbol):
  if state[1][1]==symbol:
    if state[0][0]==state[2][2]==symbol or state[0][2]==state[2][0]==symbol:
      return True
    else: 
      return False
  else: 
    return False

def player_has_vertical( state, symbol):
  for i in range(3):
    if state[0][i]==state[1][i]==state[2][i]==symbol:
      return True
  return False

def is_player_winner ( state, is_max):
  symbol="x"
  if not is_max:
    symbol="o"
  return player_has_horizontal(state,symbol) or player_has_vertical(state, symbol) or player_has_diagonal(state, symbol)

def is_terminal_state(state):
  if " " not in  [item for sublist in state for item in sublist]:
    return True
  if is_player_winner(state, True) or is_player_winner(state, False):
    return True
  return False  

def utility_value (state, is_max):
  if is_player_winner (state, True):
    return 1
  elif is_player_winner(state, False):
    return -1
  else:
    return 0  



def get_successors(state, is_max):
  
  successors = []
  for i in range(3):
    for j in range(3):
      if state[i][j]==" ":
        new_state = [x[:] for x in state]
        if is_max:
          new_state[i][j]="x"  
        else:
          new_state[i][j]="o"
        successor ={"state": new_state.copy(), "action":[i,j] }
        successors.append(successor)
  return successors



def MINIMAX(state, is_max):

  #take the maximum minimax we can get from this state
  if is_max:
    minimax_value = MAX_VALUE(state, is_max)
  else:
    minimax_value = MIN_VALUE(state, is_max)
  if is_terminal_state(state):
    return minimax_value, None
  # we need to know the action that lead to that state. we check the successors to evaluate that
  successors = get_successors(state, is_max)
  action = [None,None]
  for s in successors:
    value_successor = MINIMAX(s["state"], not is_max)[0] 
    if value_successor==minimax_value:
      action = s["action"]
      break
  return minimax_value, action

def MAX_VALUE(state, is_max):
  if is_terminal_state(state):
    return utility_value (state, is_max)
  else:
    minimax_value = -999999
    successors = get_successors(state, is_max)
    for s in successors:
      minimax_value = max(minimax_value, MIN_VALUE(s["state"], not is_max))
  return minimax_value

def MIN_VALUE(state, is_max):
  if is_terminal_state(state):
    return utility_value (state, is_max)
  else:
    minimax_value = 999999
    successors = get_successors(state, is_max)
    for s in successors:
      minimax_value = min(minimax_value, MAX_VALUE(s["state"], not is_max))
  return minimax_value


def human_play(state):
  row=int(input('Enter new position row: '))
  col=int(input('Enter new position col: '))

  if state[row][col] == " ":
    state[row][col]="o"
    print ("New Board: ")
    draw(state)
    if check_final(state):
      return "END"
    return state  
  else:
    print('Position taken, please pick a different position.')
    human_play(state)

def select_random_position(state):
  return list(choice(np.argwhere(np.array(state)==" ")))

def computer_play(state):
  print ("computer is thinking...")
  next_action = MINIMAX(state, True)[1]
  
  print (next_action)
  #### in the first stages all successors of a node can be winners, so the check will return an empty list.

  # if next_action == [None, None]:
  #   next_action = select_random_position(state)
  state[next_action[0]][next_action[1]]="x"
  draw(state)
  if check_final(state):
    return "END"
  return state
  
def check_final(state):
  if is_terminal_state(state):
    if is_player_winner(state, True):
      print ("BOT WINS!")
    elif is_player_winner(state, False):
      print ("YOU WIN")
    else:
      print ("DRAW!")
    return True
  else:
    return False


state= [[" "," "," "],[" "," "," "],[" "," "," "]]

while state!="END":
  state=computer_play(state)
  if state!="END":
    state=human_play(state)

   
# for s in get_successors(state, True):
#   print (s)
#   print (MAX_VALUE(s["state"], True))
#   print (MINIMAX(s["state"], False))

computer is thinking...
[0, 0]
x| | 
-+-+-
 | | 
-+-+-
 | | 


New Board: 
x| | 
-+-+-
 |o| 
-+-+-
 | | 


computer is thinking...
[0, 1]
x|x| 
-+-+-
 |o| 
-+-+-
 | | 


New Board: 
x|x| 
-+-+-
 |o| 
-+-+-
 | |o


computer is thinking...
[0, 2]
x|x|x
-+-+-
 |o| 
-+-+-
 | |o


BOT WINS!


In [2]:
from os import truncate
state= [["x","x","o"],[" ","o"," "],[" "," "," "]]

minimax_value = MAX_VALUE(state, True)
print (minimax_value)

successors = get_successors(state, truncate)
action = [None,None]
for s in successors:
  print (s)
  value_successor = MINIMAX(s["state"], not False)[0]
  print (value_successor)

next_action = MINIMAX(state, True)[1]
print (next_action)


0
{'state': [['x', 'x', 'o'], ['x', 'o', ' '], [' ', ' ', ' ']], 'action': [1, 0]}
1
{'state': [['x', 'x', 'o'], [' ', 'o', 'x'], [' ', ' ', ' ']], 'action': [1, 2]}
0
{'state': [['x', 'x', 'o'], [' ', 'o', ' '], ['x', ' ', ' ']], 'action': [2, 0]}
1
{'state': [['x', 'x', 'o'], [' ', 'o', ' '], [' ', 'x', ' ']], 'action': [2, 1]}
1
{'state': [['x', 'x', 'o'], [' ', 'o', ' '], [' ', ' ', 'x']], 'action': [2, 2]}
1
[2, 0]
