In [12]:
import numpy as np
from copy import deepcopy

In [13]:
class Grid:

    def __init__(self, parent=None):
        self.GRID_SIZE = (6, 7)
        self.create_grid()
        self.children = []
        self.results = []
        self.parent = parent
        self.curr_val=1
        self.base_val=1

    def create_grid(self):
        self.grid = np.zeros(self.GRID_SIZE)
    
    def is_leaf(self):
        return True if len(self.children)==0 else False
    
    def is_terminal(self):
        if self.score_grid()!="not finished":
            return True
        return False

    def available_positions(self):
        a, b = self.GRID_SIZE
        couples = []
        for col in range(b):
            for row in range(a-1, -1, -1):
                if self.grid[row, col]==0:
                    couples.append((row, col))
                    break
        return couples
    
    def grid_complete(self):
        if len(self.available_positions())==0:
            return True
        return False
    
    def win_grid(self):
        a, b = self.GRID_SIZE
        grid = self.grid

        # Horizontal check
        for row in range(a):
            for col in range(b - 3):
                if grid[row, col] == grid[row, col + 1] == grid[row, col + 2] == grid[row, col + 3] != 0:
                    return grid[row, col]

        # Vertical check
        for col in range(b):
            for row in range(a - 1, 2, -1):
                if grid[row, col] == grid[row - 1, col] == grid[row - 2, col] == grid[row - 3, col] != 0:
                    return grid[row, col]

        # Diagonal (bottom-left to top-right) check
        for row in range(a - 1, 2, -1):
            for col in range(b - 3):
                if grid[row, col] == grid[row - 1, col + 1] == grid[row - 2, col + 2] == grid[row - 3, col + 3] != 0:
                    return grid[row, col]

        # Diagonal (top-left to bottom-right) check
        for row in range(a - 3):
            for col in range(b - 3):
                if grid[row, col] == grid[row + 1, col + 1] == grid[row + 2, col + 2] == grid[row + 3, col + 3] != 0:
                    return grid[row, col]

        return False

    
    def play_move(self, pos:tuple):
        if len(pos)!=2:
            print("position length should be 2")
            return "position length should be 2"
        self.grid[pos]=self.curr_val
        self.curr_val= -self.curr_val

        
    def get_children(self):
        if not self.children:
            self.children = [(lambda g: (g.play_move(pos), g)[1])(deepcopy(self)) for pos in self.available_positions()]
            for child in self.children:
                child.parent = self
        return self.children

    def score_grid(self):
        win = self.win_grid()

        if win==self.base_val:
            return 100
        if win==-self.base_val:
            return -100
        if self.grid_complete():
            return 0
        return "not finished"
    
    def reset_grid(self):
        self.children = []
        self.results = []
        self.parent = None

    def show_grid(self):
        grid = self.grid.astype(str)
        grid[grid == "0.0"] = " "
        grid[grid == "1.0"] = "X"
        grid[grid == "-1.0"] = "O"
        
        # Afficher la grille avec une délimitation claire
        print("-----------------------------")
        for i, row in enumerate(grid):
            print(f"| {' | '.join(row)} |")
            print("-----------------------------")

In [14]:
def ucb_score(child:Grid, parent:Grid, C=np.sqrt(2)):
    if len(child.results)==0:
        return np.inf
    return np.mean(child.results) + C*np.sqrt(np.log(len(parent.results))/len(child.results))


In [15]:
def selection(grid:Grid):
    if not grid.is_leaf():
        ucbs = [ucb_score(child, grid) for child in grid.get_children()]
        candidate = np.random.choice(np.flatnonzero(ucbs==np.max(ucbs)))
        candidate = grid.get_children()[candidate]
        return selection(candidate)
    return grid

In [16]:
def expansion(grid:Grid):
    if not grid.is_terminal():
        if len(grid.results)==0 and grid.parent is not None:
            return grid
        grid.children = grid.get_children()
        return grid.children[np.random.randint(len(grid.children))]
    return grid

In [17]:
def playout(grid:Grid):
    playout_node = deepcopy(grid)
    while not playout_node.is_terminal():
        av_pos = playout_node.available_positions()
        pos = av_pos[np.random.randint(len(av_pos))]
        playout_node.play_move(pos)
    reward = playout_node.score_grid()
    del playout_node
    return reward

In [18]:
def backpropagation(grid:Grid, reward):
    if grid.parent is None:
        grid.results.append(reward)
        return "Done"
    grid.results.append(reward)
    backpropagation(grid.parent, -reward)

In [25]:
def get_action(grid:Grid, value=1):
    for i in range(len(grid.available_positions())):
        leaf = selection(grid)
        expanded = expansion(leaf)
        for iter_playout in range(1000):
            reward = playout(expanded)
            _ = backpropagation(expanded, value*reward)
    
    ucbs = [ucb_score(child, grid) for child in grid.children]
    candidate_id = np.random.choice(np.flatnonzero(ucbs==np.max(ucbs)))
    return grid.children[candidate_id]


In [30]:
grid = Grid()
new_grid = deepcopy(grid)

while True:

    new_grid = get_action(new_grid,1)
    new_grid.show_grid()
    new_grid.reset_grid()

    new_grid = get_action(new_grid,-1)
    new_grid.show_grid()
    new_grid.reset_grid()

-----------------------------
|   |   |   |   |   |   |   |
-----------------------------
|   |   |   |   |   |   |   |
-----------------------------
|   |   |   |   |   |   |   |
-----------------------------
|   |   |   |   |   |   |   |
-----------------------------
|   |   |   |   |   |   |   |
-----------------------------
|   |   |   | X |   |   |   |
-----------------------------
-----------------------------
|   |   |   |   |   |   |   |
-----------------------------
|   |   |   |   |   |   |   |
-----------------------------
|   |   |   |   |   |   |   |
-----------------------------
|   |   |   |   |   |   |   |
-----------------------------
|   |   |   |   |   |   |   |
-----------------------------
|   |   |   | X |   | O |   |
-----------------------------
-----------------------------
|   |   |   |   |   |   |   |
-----------------------------
|   |   |   |   |   |   |   |
-----------------------------
|   |   |   |   |   |   |   |
-----------------------------
|   |   | 

KeyboardInterrupt: 

In [29]:
import concurrent.futures
from threading import Lock

def safe_append(lst, item, lock):
    with lock:
        lst.append(item)

def threaded_playout(expanded, value, lock, rewards):
    reward = playout(expanded)
    safe_append(rewards, value * reward, lock)
    _ = backpropagation(expanded, value * reward)

def get_action(grid, value=1):
    rewards = []
    lock = Lock()
    available_positions = grid.available_positions()

    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = []
        for i in range(len(available_positions)):
            leaf = selection(grid)
            expanded = expansion(leaf)
            for _ in range(1000):
                futures.append(executor.submit(threaded_playout, expanded, value, lock, rewards))
        
        for future in concurrent.futures.as_completed(futures):
            future.result()

    ucbs = [ucb_score(child, grid) for child in grid.children]
    candidate_id = np.random.choice(np.flatnonzero(ucbs == np.max(ucbs)))
    return grid.children[candidate_id]
