In [None]:
import numpy as np
import tensorflow as tf
from keras import layers, models
import keras.backend as K

import seaborn as sns
from time import time
import gc

tf.config.experimental.set_visible_devices([], 'GPU')

In [None]:
init_state = np.array([
    [0, 0, 0],
    [0, 0, 0],
    [0, 0, 0]
])

In [None]:
class Game:
    def __init__(self, state, FIRST=1):
        self.state = state
        self.empty = self.make_empty(state)
        self.first_player = FIRST
        
    def make_empty(self, state):
        emp = []
        for i in range(3):
            for j in range(3):
                if state[i][j] == 0:
                    emp.append(3*i + j)
        
        return emp
    
    def is_lose(self):
        a = self.next_opp()
        
        for i in range(3):
            if self.state[i][0] == self.state[i][1] == self.state[i][2] != 0:
                return True
            elif self.state[0][i] == self.state[1][i] == self.state[2][i] != 0:
                return True
        if self.state[0][0] == self.state[1][1] == self.state[2][2] != 0:
            return True
        if self.state[0][2] == self.state[1][1] == self.state[2][0] != 0:
            return True
        return 0
    
    def is_draw(self):
        a = self.next_opp()
        if self.is_lose():
            return 0
        if np.all(self.state):
            return 1
        else:
            return 0
        
    def is_done(self):
        if self.is_lose() or self.is_draw():
            return 1
        else:
            return 0
        
        
    def update(self, target):
        state = self.state.copy()
        x, y = target//3, target%3
        a = self.next_opp()
        state[x][y] = a
        return Game(state)
    
    
    def next_opp(self):
        a = b = 0
        for i in range(len(self.state)):
            for j in range(len(self.state)):
                if self.state[i][j] == self.first_player:
                    a += 1
                elif self.state[i][j] != 0:
                    b += 1
                    
        if a == b:
            return self.first_player
        else:
            return 2 + min(0, 1-self.first_player)

In [None]:
class Alpha:
    def value(self, game, alpha, beta):
        if game.is_lose():
            return -1
        
        if game.is_draw():
            return 0
        
        best_score = -float('inf')
        score = 0
        for a in game.empty:
            score -= self.value(game.update(a), -beta, -alpha)
            
            if score > alpha:
                alpha = score
#                 print(score)

            if alpha >= beta:
                return alpha
                
        return alpha
        
    def action(self, game):
        best_action = game.empty[0]
        alpha = -float('inf')
        
        for a in game.empty:
            score = -self.value(game.update(a), -float('inf'), -alpha)
            if score > alpha:
                best_action = a
                alpha = score
#                 print(best_action)
        return best_action

    

In [None]:
class MCS:
    def __init__(self, n=100):
        self.n = n
    
    def playout(self, game):
        if game.is_lose():
            return -1
        
        if game.is_draw():
            return 0

        return -self.playout(game.update(np.random.choice(game.empty)))
    
    
    def action(self, game):
        values = [0] * len(game.empty)

        for i, a in enumerate(game.empty):
            for _ in range(self.n):
                g = game.update(a)
                values[i] -= self.playout(g)

        return game.empty[np.argmax(values)]

In [None]:
# class CNN:
#     def __init__(self):
#         self.model = models.load_model('./tanh2.h5')
        
#     def action(self, game):
#         status = game.next_opp()
#         target = np.reshape(game.state, (1, 1, 3, 3)).astype('float')
#         target = np.where(target==status, 1., np.where(target==0, 0, -1.))
#         res = self.model.predict(target)[0]
#         a = np.argmax(res)
#         while a not in game.empty:
#             res[a] = -float('inf')
#             a = np.argmax(res)
#         return a
                
    
#     def opp(self, status):
#         return 2 + min(0, 1-status)

In [None]:
# m1 = CNN()

In [None]:
class CNN:
    def __init__(self):
        self.model = models.load_model('./CNN.h5')

    def action(self, game):
        res = self.predict(game)
        a = np.argmax(res)
        a = game.empty[a]

        return a

    
    def make_state(self, game):
        status = game.next_opp()
        opp = 3 - status
        a = game.state
        a1 = np.where(a==status, 1, 0)
        a2 = np.where(a==opp, 1, 0)
        res = np.array([a1, a2])
        res = res.reshape(2, 3, 3).transpose(1, 2, 0).reshape(1, 3, 3, 2)
        
        return res
    
    def predict(self, game):
        state = self.make_state(game)
        
        res = self.model.predict(state)[0]
        
        res = res[game.empty]
        
        return res 


In [None]:
m2 = CNN()

In [None]:
def play(game, m1, m2):
    global score
    while 1:
        a1 = m1.action(game)
        game = game.update(a1)
        if game.is_lose():
            score[0] += 1
            return 
        elif game.is_draw():
            score[2] += 1
            return 

        a2 = m2.action(game)
        game = game.update(a2)
        if game.is_lose():
            score[1] += 1
            return 
        elif game.is_draw():
            score[2] += 1
            return 
        

In [None]:
game = Game(init_state)
# K.clear_session()
# m1 = MCS()
# m2 = MCS()

In [None]:
%%time
score = [0, 0, 0]
for _ in range(200):
#     print(_)
    print(score)
    play(game, m1, m2)
print(score)

score = [0, 0, 0]
for _ in range(200):
#     print(_)
    play(game, m2, m1)
#     print(score)
print(score)

In [None]:
# cnn vs mcs

In [None]:
score

In [None]:
# alpha vs mcs: 11vs164 MCS wins
# alpha vs minmax: 100vs0 alpha wins


In [None]:
raise 'done'