In [1]:
import random

In [2]:
class TicTacToe:
    def __init__(self, player1, player2):
        self.player1 = player1
        self.player2 = player2
        self.board = [' '] * 9
        self.player_1_turn = random.choice([True, False])
    
    def check_full(self, ):
        emptys = [x for x in self.board if x == ' ']
        if len(emptys) == 0:
            return True
        return False
        
    def print_board(self, ):
        b = self.board

        grid = f'''
        {b[0]} | {b[1]} | {b[2]}
        ---------
        {b[3]} | {b[4]} | {b[5]}
        ---------
        {b[6]} | {b[7]} | {b[8]}
        '''
        print(grid)
        
    def check_winner(self, char):
        b = self.board
        winner = False
        conditions = (
            (0, 1, 2), (3, 4, 5), (6, 7, 8),
            (0, 3, 6), (1, 4, 7), (2, 5, 8),
            (0, 4, 8), (2, 4, 6)
        )

        for each_condition in conditions:
            # i1, i2, i3 = each_condition[0], each_condition[1], each_condition[2]
            i1, i2, i3 = each_condition # above and this are same
            if char == b[i1] == b[i2] == b[i3]:
                winner = True
                return winner
        return winner
    
    def play(self, ):
        self.player1.new_game()
        self.player2.new_game()
        while True:
            # Check Turns
            if self.player_1_turn:
                player = self.player1
                other_player = self.player2
            else:
                player = self.player2
                other_player = self.player1
            # Ask for move
            if player.player_type == 'human':
                print(f'{player.name} Turn')
                self.print_board()
            try:
                move = player.make_move(self.board)
            except (IndexError, ValueError):
                print('Game Ended with Draw')
                break
            # Check Valid
            if move < 0 or move > 8:
                print('Invalid move, Out of Range!!')
                player.reward(-0.25, self.board) 
                continue
            if self.board[move] != ' ':
                print('Space is already filled out!!')
                player.reward(-0.25, self.board) 
                continue
            # Put Char
            self.board[move] = player.char
            # Check Winner
            win = self.check_winner(player.char)
            if win:
                player.winner()
                player.reward(5, self.board)
                other_player.reward(-5, self.board)
                self.print_board()
                break
            # Check full(draw)
            full = self.check_full()
            if full:
                print('Game ended with draw')
                player.reward(0.5, self.board)
                other_player.reward(0.5, self.board)
                self.print_board()
                break
            player.reward(0.25, self.board)
            # Swap Turn
            self.player_1_turn = not self.player_1_turn

In [3]:
class Player:
    def __init__(self, name, char, player_type='human'):
        self.name = name
        self.char = char
        self.player_type = player_type
    def make_move(self, board=None):
        move = input('Make your move: ')
        return int(move)
    def new_game(self, ):
        print(f'{self.name} is {self.char}')
    def winner(self, ):
        print(f'{self.name} is winner')
    def reward(self, reward_value, board=None):
        print(f'{self.name} gets reward {reward_value}')

In [4]:
class AI(Player):
    def __init__(self, name, char, alpha=0.01, epsilon=1, gamma=0.9):
        super().__init__(name, char, player_type='AI')
        self.epsilon = epsilon # Epsilon Greedy
        self.gamma = gamma # Discount Factor
        self.alpha = alpha # Learing Rate
        self.q_table = {}
        
    def get_Q(self, state, action):
        if self.q_table.get((state, action)) == None:
            self.q_table[(state, action)] = 5
        return self.q_table[(state, action)]
        
    def available_moves(self, board):
        return [x for x in range(9) if board[x] == ' ']
        
    def q_learn(self, state, action, reward, new_state):
        '''
        new_q_value = prev_q_value + alpha * (reward + gamma * max_q - prev_q_value)
        '''
        prev_q_value = self.get_Q(state, action)
        available_actions = self.available_moves(state)
        available_q_values = [self.get_Q(new_state, action) for action in available_actions]
        max_q = max(available_q_values)
        self.q_table[(state, action)] = prev_q_value + self.alpha * (reward + self.gamma * max_q - prev_q_value)
        
    def reward(self, reward_value, board):
        print(f'{self.name} gets reward {reward_value}')
        self.q_learn(self.prev_board, self.prev_action, reward_value, tuple(board))
        
    def make_move(self, board):
        # save old state
        self.prev_board = tuple(board)
        available_action = self.available_moves(board)
        # Selecting Random Action(Exploration)
        if random.random() < self.epsilon: # Epsilon Greedy method
            action_taken = random.choice(available_action)
            self.prev_action = action_taken
            self.epsilon *= 0.9999999 # Epsilon decay
            return action_taken
        # Selecting actions with max-Q value(Exploitation)
        q_values = []
        for each_action in available_action:
            q_values.append(self.get_Q(board, each_action))
        max_q_val = max(q_values)
        index = q_values.index(max_q_val)
        action_taken = available_action[index]
        return action_taken

In [5]:
p1 = AI('Dall-E', 'X', epsilon=1, gamma=0.85, alpha=0.01)
p2 = Player('Sagar', 'O', )

In [6]:
game = TicTacToe(p1, p2)

In [7]:
game.play()

Dall-E is X
Sagar is O
Sagar Turn

          |   |  
        ---------
          |   |  
        ---------
          |   |  
        


Make your move:  4


Sagar gets reward 0.25
Dall-E gets reward 0.25
Sagar Turn

          |   |  
        ---------
          | O | X
        ---------
          |   |  
        


Make your move:  1


Sagar gets reward 0.25
Dall-E gets reward 0.25
Sagar Turn

          | O |  
        ---------
        X | O | X
        ---------
          |   |  
        


Make your move:  8


Sagar gets reward 0.25
Dall-E gets reward 0.25
Sagar Turn

        X | O |  
        ---------
        X | O | X
        ---------
          |   | O
        


Make your move:  4


Space is already filled out!!
Sagar gets reward -0.25
Sagar Turn

        X | O |  
        ---------
        X | O | X
        ---------
          |   | O
        


Make your move:  2


Sagar gets reward 0.25
Dall-E is winner
Dall-E gets reward 5
Sagar gets reward -5

        X | O | O
        ---------
        X | O | X
        ---------
        X |   | O
        
