<a href="https://colab.research.google.com/github/xsparc/super-octo-eureka/blob/master/TTT_RL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# Create a Tic Tac Toe game were an AI uses Reinforcement Learning

In [0]:
import numpy as np

class Agent:
  
  def __init__(self, sym):
    self.sym = sym
    
  def attach_to_env(self, env):
    self.env = env
 
  def take_action(self, position):
    self.env.place_move(self.sym, position)
    
class Environment:
   
  def __init__(self, name):
    self.name = name
    self.board_dim = (3,3)
    self.board_grid =  np.zeros(self.board_dim)
    self.move_history = []
    self.num_of_sym = 3

  def register_players(self, players):
    for player in players:
      player.attach_to_env(self)
      
  def get_winner(self):
    sums = []
    sums.extend(self.board_grid.sum(0))  # sum of each columns
    sums.extend(self.board_grid.sum(1))  # sum of each rows
    sums.append(sum(self.board_grid.diagonal()))  # sum of top-left to bottom-right diagonal
    sums.append(sum(np.fliplr(self.board_grid).diagonal()))  # sum of bottom-left to top-right diagonal
    #print(sums)
    
    winner = []
    if (3.0 in sums):
      winner += "O"
    if (-3.0 in sums):
      winner += "X"
    
    if len(winner) == 1:
      return winner
    else:
      return "None"
 
  def num_to_sym(self, num):
    switcher = {
        -1:"X",
        0: " ",
        1: "O"
    }
    return switcher.get(num, "invalid")
  
  def sym_to_num(self, sym):
    switcher = {
        "X":-1,
        " ":0 ,
        "O":1 
    }
    return switcher.get(sym, "invalid")
 
  def place_move(self, sym, position):
    i,j = position
    if self.board_grid[i,j] == 0:
      self.board_grid[i,j] = self.sym_to_num(sym) 
      self.move_history.append((sym, position))
  
  def check_cell_if_empty(self, position):
    i,j = position
    if self.board_grid[i,j] == 0:
      return True
    else:
      return False
    
  def get_empty_cells(self):
    cells_list = []
    for i in range(self.board_dim[0]):
      for j in range (self.board_dim[1]):
        if self.check_cell_if_empty((i,j)) == True:
          cells_list.append((i,j))
    return cells_list
     
  def print_move_history(self):
    for i, move in enumerate(self.move_history):
      print(str(i) + ":" + str(move))
  
  def draw_board_in_sym(self):
    print("---------")
    for i in range(0,self.board_dim[0]):
      line_buff = ""
      for j in range(0,self.board_dim[1]):
        char_buff = "|" + self.num_to_sym(self.board_grid[i,j]) + "|"
        line_buff += char_buff  
      print(line_buff)
      print("---------")
  
  def draw_board_in_num(self):
    print(self.board_grid)
    
  
  # Creates a hash from a given state. 
  # Based on converting the table into a decimal converted from base 3.
  def get_state_in_hash(self):
    # Coefficients taken from the board grid with these conditions
    # -1 -> 0, 0 -> 1, 1 -> 2
    coeff = np.ravel(self.board_grid + 1)
    
    #
    power = np.arange(self.board_dim[0]*self.board_dim[1])
    base = np.full_like(coeff, self.num_of_sym)
    
    # hash taken from base3 to base10 conversion
    return np.sum((coeff*(base**power)).astype(int))
       

In [42]:
# Choose between "O" or "X"
pO = Agent("O")
pX = Agent("X")

env = Environment("TTT")
env.register_players((pO, pX))


pO.take_action((1,1))
pX.take_action((2,0))
pO.take_action((0,0))
pO.take_action((0,1))
pO.take_action((0,2))
pX.take_action((2,1))
pO.take_action((2,2))

print("Illustration of the board")
env.draw_board_in_sym()
#env.draw_board_in_num()
print("Move list:")
env.print_move_history()
print("Output from get_winner()")
print(env.get_winner())
#print(env.check_cell_if_empty((1,0)))
print("Output from get_empty_cells()")
print(env.get_empty_cells())
print("Output from get_state_in_hash()")
print(env.get_state_in_hash())

Illustration of the board
---------
|O||O||O|
---------
| ||O|| |
---------
|X||X||O|
---------
Move list:
0:('O', (1, 1))
1:('X', (2, 0))
2:('O', (0, 0))
3:('O', (0, 1))
4:('O', (0, 2))
5:('X', (2, 1))
6:('O', (2, 2))
Output from get_winner()
['O']
Output from get_empty_cells()
[(1, 0), (1, 2)]
Output from get_state_in_hash()
13580
