In [2]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np

In [3]:
class AIMH_ENV(gym.Env):
    def __init__(self, vrp, interval_it = 100):
        super().__init__()
        self.action_space = spaces.Box(
            low = np.array([0, 0, 0], dtype=np.float32), 
            high= np.array([5, 5, 1], dtype=np.float32), 
            shape=(3,),
            dtype=np.float32,
        )
            # order = [
        #     "best_solution",
        #     "F",
        #     "CR",
        #     "MG",
        #     "percent_convergence",
        #     "std_pop",
        #     "count_total_iteration",
        # ]
        self.observation_space = gym.spaces.Box(
            low=np.array([-2, 0, 0, 0, 0, 0, 0], dtype=np.float32),
            high=np.array([2, 10, 10, 1, 1, 1, 1e5], dtype=np.float32),
            shape=(7,),  # 7 features
            dtype=np.float64,
        )
        self.vrp = vrp
        self.interval_it = interval_it
        pass


    def _get_obs(self):
        
        state = self.vrp.get_current_state()
        obs = np.array(
            [
                np.float64(state["best_solution"]),
                np.float64(state["F"]),
                np.float64(state["CR"]),
                np.float64(state["MG"]),
                np.float64(state["percent_convergence"]),
                np.float64(state["std_pop"]),
                np.float64(state["count_total_iteration"]),
            ],
            dtype=np.float32,
        )
        
        return obs
    
    def _get_info(self):
        # TODO: We might want to see more stuff here.
        """Compute auxiliary information for debugging.

        Returns:
            dict: Info in addition to the observation
        """
        return {**self.vrp.get_current_state()}
    

    def reset(self, seed=None, options=None):
        np.random.seed(seed or 42)
        self.vrp.reset()
        super().reset(seed=seed)
        observation = self._get_obs()
        info = self._get_info()
        return observation, info

    def step(self, action):
        self.vrp.action(action)
        self.vrp.evolve(n_iteration=self.interval_it)
        reward = self.vrp.get_reward()
        if self.vrp.is_terminated():
            terminated = True
            truncated = True
        else:
            terminated = False
            truncated = False
        observation = self._get_obs()
        info = self._get_info()
        return observation, reward, terminated, truncated, info

    

In [6]:
AIMH_ENV(vrp=None).action_space.sample()

array([3.255056 , 0.3950934, 0.2780268], dtype=float32)