In [None]:
import numpy as np
import seaborn as sns
from time import time
import gc

In [None]:
init_state = np.array([
    [0, 0, 0],
    [0, 0, 0],
    [0, 0, 0]
])

In [None]:
class Game:
    def __init__(self, state):
        self.state = state
        self.empty = self.make_empty(state)
        self.first_player = FIRST
        
    def make_empty(self, state):
        emp = []
        for i in range(3):
            for j in range(3):
                if state[i][j] == 0:
                    emp.append(3*i + j)
        
        return emp
    
    def is_lose(self, a):
        opp = 2 - (a-1)
        
        for i in range(3):
            if self.state[i][0] == self.state[i][1] == self.state[i][2] == opp:
                return 1
            elif self.state[0][i] == self.state[1][i] == self.state[2][i] == opp:
                return 1
        if self.state[0][0] == self.state[1][1] == self.state[2][2] == opp:
            return 1
        return 0
    
    def is_win(self, a):       
        for i in range(3):
            if self.state[i][0] == self.state[i][1] == self.state[i][2] == a:
                return 1
            elif self.state[0][i] == self.state[1][i] == self.state[2][i] == a:
                return 1
        if self.state[0][0] == self.state[1][1] == self.state[2][2] == a:
            return 1
        if self.state[0][2] == self.state[1][1] == self.state[2][0] == a:
            return 1
        return 0
    
    def is_draw(self, a):
        if self.is_win(a):
            return 0
        if np.all(self.state):
            return 1
        else:
            return 0
        
    def is_done(self):
        if self.is_win(1) or self.is_win(2) or self.is_draw(a):
            return 1
        else:
            return 0
        
        
    def update(self, target):
        state = self.state.copy()
        x, y = target//3, target%3
        a = self.next_opp()
        state[x][y] = a
        return Game(state)
    
    
    def next_opp(self):
        a = b = 0
        for i in range(len(self.state)):
            for j in range(len(self.state)):
                if self.state[i][j] == self.first_player:
                    a += 1
                elif self.state[i][j] != 0:
                    b += 1
                    
        if a == b:
            return self.first_player
        else:
            return 2 + min(0, 1-self.first_player)

In [None]:
class Random:
    def __init__(self, status):
        self.status = status

    def action(self, game):
        return np.random.choice(game.empty)

In [None]:
class MinMax:
    def __init__(self, status):
        self.status = status

    def value(self, game):
#         print(game.state)
        if game.is_lose(self.status):
            return -1
        
        if game.is_draw(self.status):
            return 0
        
        if game.is_win(self.status):
#             print(game.state)
            return 1
        
        
        best_score = -float('inf')
        score = 0
        for a in game.empty:
            score += self.value(game.update(a))
            if score < 0:
                return 0
            
            if score > best_score:
                best_score = score
                
        return best_score
        
    def action(self, game):
        if FIRST == self.status and len(game.empty) == 9:
            return 4
        
        best_action = 0
        best_score = -float('inf')
        
        for a in game.empty:
            score = self.value(game.update(a))
            if score > best_score:
                best_action = a
                best_score = score
#                 print(best_action)
        return best_action

    

In [None]:
def play(game, m1, m2):
    global score
    while 1:
        a1 = m1.action(game)
        game = game.update(a1)
        if game.is_win(m1.status):
            score[m1.status-1] += 1
#             print(game.state)
            return 
        elif game.is_draw(m1.status):
            score[2] += 1
#             print(game.state)
            return 

        a2 = m2.action(game)
        game = game.update(a2)
        if game.is_win(m2.status):
            score[m2.status-1] += 1
#             print(game.state)
            return 
        elif game.is_draw(m2.status):
            score[2] += 1
#             print(game.state)
            return 
        

In [None]:
FIRST = 1
game = Game(init_state)
m1 = Random(1)
m2 = MinMax(2)

In [None]:
%%time
score = [0, 0, 0]
for _ in range(100):
    print(_)
    play(game, m1, m2)
    
gc.collect()

FIRST = 2
game = Game(init_state)
score1 = score.copy()
for _ in range(100):
    print(_)
    play(game, m2, m1)

In [None]:
score

In [None]:
score1

In [None]:
sns.barplot(x = [1, 2], y = score[:2])

In [None]:
# v1: 75vs120
# v2: 57vs134
# v3: 61vs128
# v4: 52vs131 // 33vs56
# v5: 47vs144 // 29vs65
# v6: 21vs153 // 20vs70 32min