In [501]:



import numpy as np

def valid(array):    
    X = True
    for boolean in (array == 'X'):
        X &= boolean
    if X:
        return 'X'
    O = True
    for boolean in (array == 'O'):
        O &= boolean
    if O:
        return 'O'


class Table: 
    def __init__(self, array): 
        self.array = array

    def __repr__(self): 
        return str(self.array)

    def available(self):
        return [(x, y) for x in range(3) for y in range(3) if not self.array[x][y]]


    def winner(self): 
        array = np.array(self.array)
        if out := valid(array[:,0]):
            return out
        if out := valid(array[:,1]):
            return out
        if out := valid(array[:,2]):
            return out
        if out := valid(array[0,:]):
            return out
        if out := valid(array[1,:]):
            return out
        if out := valid(array[2,:]):
            return out
        
        first = self.array[0][0]
        out = True
        for i in range(3):
            out &= self.array[i][i] == first

        if out:
            return first

        third = self.array[0][2]
        out = True
        for i in range(3):
            out &= self.array[i][2-i] == third

        if out:
            return third
        
    def __hash__(self):
        return hash(tuple(hash(self.array[i][j]) for i in range(3) for j in range(3)))


    def count_turn(self):
        turn = 0
        for row in self.array:
            for el in row:
                if el:
                    turn += 1
        return turn


    def move(self, move, player):
        self.array[move[0]][move[1]] = player

    def __eq__(self, other):
        return hash(self) == hash(other)


In [508]:


class Menace:
    def __init__(self):
        self.map = {}

    def get_move(self, table) -> (int, int):
        choices = self.map.get(table, None)
        
        if not choices:
            self.set_choices(table)
            choices = self.map[table]

        i = np.random.randint(len(choices))
        return choices[i]

    def reward(self, table, move):
        try:
            self.map[table].append(move)
        except:
            pass
            

    def punish(self, table, move):
        try:
            if len(self.map[table]) > 1 and (move in self.map[table]):
                self.map[table].remove(move)
        except:
            pass

        
    def set_choices(self, table):
        available = table.available()
        available *= (12 - table.count_turn()) // 3
        self.map[table] = available
        



In [509]:


class Game: 
    def __init__(self, menace: Menace):

        self.menace = menace
        self.restart()

    def restart(self):
        self.table = Table([[None for i in range(3)] for j in range(3)])
        self.xmoves = {}
        self.ymoves = {}
        self.turn = 'X'

    def move(self):
        move = self.menace.get_move(self.table)
        if self.turn == 'X':
            self.xmoves[self.table] = move
        else:
            self.ymoves[self.table] = move
        
        self.table.move(move, self.turn)
        
        if self.turn == 'X':
            self.turn = 'O'
        else:
            self.turn = 'X'


    def finished(self):
        if self.table.winner() or not self.table.available():
            return True
        else:
            return False

    def reward_and_punish(self):
        if self.table.winner() == 'X':
            for table, move in self.xmoves.items():
                self.menace.reward(table, move)

            for table, move in self.ymoves.items():
                self.menace.punish(table, move)
        elif self.table.winner() == 'O':
            for table, move in self.xmoves.items():
                self.menace.punish(table, move)

            for table, move in self.ymoves.items():
                self.menace.reward(table, move)
        


In [510]:
class Trainer:

    def __init__(self, menace = Menace()):

        self.game = Game(menace)

    def train_for(self, number):

        for i in range(number):

            while not self.game.finished():
                self.game.move()
            self.game.reward_and_punish()
            self.game.restart()



In [511]:
game = Game(Menace())

In [512]:
trainer = Trainer()


In [515]:


menace = trainer.game.menace

In [614]:
table = Table([[None for j in range(3)] for i in range(3)])

In [619]:


move = menace.get_move(table)
table.move(move, 'X')
table

[['O', None, 'X'], [None, None, 'O'], ['X', 'X', None]]

In [611]:
table.move((2, 2), 'O')
table

[['O', 'X', None], ['X', 'O', None], ['X', None, 'O']]

In [612]:
table.winner()

'O'

In [603]:
len(menace.map.keys())

57527