In [1]:
import numpy as np
import os

class CaroEnv:
    def __init__(self, verbose=False, board_size=5, win_length=3):
        self.board_size = board_size
        self.win_length = win_length
        self.board = np.zeros((board_size, board_size), dtype=np.int8)
        self.current_player = 1
        self.observation_space = (board_size, board_size)
        self.action_space = type('ActionSpace', (), {'n': board_size * board_size})
        self.max_steps = board_size * board_size
        self.verbose = verbose

    def reset(self):
        self.board = np.zeros((self.board_size, self.board_size), dtype=np.int8)
        self.current_player = 1
        return self.board.copy()

    def step(self, action):
        row = action // self.board_size
        col = action % self.board_size
        if self.board[row, col] != 0:
            return self.board.copy(), -10, True, {"info": "Invalid move"}
        self.board[row, col] = self.current_player
        reward, done = self.check_winner()
        if done:
            return self.board.copy(), reward, done, {"info": "Game over"}
        self.current_player = -self.current_player
        reward = 0.1 if self.current_player == -1 else -0.1
        if np.all(self.board != 0):
            return self.board.copy(), 0, True, {"info": "Draw"}
        return self.board.copy(), reward, False, {}

    def check_winner(self):
        for i in range(self.board_size):
            for j in range(self.board_size):
                if self.board[i, j] == 0:
                    continue
                for di, dj in [(0, 1), (1, 0), (1, 1), (1, -1)]:
                    count = 1
                    for k in range(1, self.win_length):
                        ni, nj = i + di * k, j + dj * k
                        if 0 <= ni < self.board_size and 0 <= nj < self.board_size:
                            if self.board[ni, nj] == self.board[i, j]:
                                count += 1
                            else:
                                break
                        else:
                            break
                    if count >= self.win_length:
                        reward = 10 if self.board[i, j] == 1 else -10
                        return reward, True
        return 0, False

    def get_valid_actions(self):
        return np.where(self.board.flatten() == 0)[0]

    def render(self, clear_screen=False):
        if not self.verbose:
            return
        if clear_screen:
            os.system('cls' if os.name == 'nt' else 'clear')
        print("\nBàn cờ Caro:")
        print("  ", end="")
        for j in range(self.board_size):
            print(f" {j} ", end="")
        print()
        for i in range(self.board_size):
            print(f"{i} ", end="")
            for j in range(self.board_size):
                if self.board[i, j] == 1:
                    print(" X ", end="")
                elif self.board[i, j] == -1:
                    print(" O ", end="")
                else:
                    print(" . ", end="")
            print()