In [528]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
from vrp import VRP

In [529]:
max_current_iteration = 1000

In [None]:
class MetaEnv(gym.Env):
    def __init__(self, vrp):
        self.vrp = vrp
        self.nS = 10
        self.nA = 3
        self.observation_space = gym.spaces.Discrete(self.nS)
        self.action_space = spaces.Discrete(self.nA)

    

    def _get_obs(self):
        """Convert internal state to observation format.

        Returns:
            dict: Observation
        """
        return 1

    def _get_info(self):
        """Compute auxiliary information for debugging.

        Returns:
            dict: Info with distance between agent and target
        """
        return {
            "objective": self.vrp.get_best_solution(),
            "mutation_rate": self.vrp.F,
            "current_iteration": self.vrp.current_iteration,
            "delta_F": self.vrp.delta_F,
        }

    def reset(self, seed=None):
        self.vrp.reset()
        super().reset(seed=seed)
        observation = self._get_obs()
        info = self._get_info()
        return observation, info

    def step(self, action):
        if action == 1:
            self.vrp.change_F("DECREASE")
        elif action == 2:
            self.vrp.change_F("INCREASE")

        self.vrp.evolve(n_iteration=10)

        reward = -self.vrp.get_best_solution()
        if self.vrp.is_exceed_max_iteration():
            terminated = True
            truncated = True
        else:
            terminated = False
            truncated = False
        observation = self._get_obs()
        info = self._get_info()
        return observation, reward, terminated, truncated, info

In [None]:
nbin_obj = 10
nbin_F = 10
nbin_bin_obj = 10

obj_bin = np.linspace(20, 40, n_bin)
F_bin = np.linspace(0, 1, n_bin)
it_bin = np.linspace(0,1000, n_bin).astype(int)
pi = np.zeros

obj_bin
F_bin
it_bin

array([   0,  111,  222,  333,  444,  555,  666,  777,  888, 1000])

In [531]:
distance = np.array(
    [
        [0, 0.664, 1.035, 1.789, 4.854, 7.586, 11.425, 11.871],
        [4.857, 0, 0.852, 1.606, 4.671, 7.403, 11.242, 11.688],
        [4.004, 4.23, 0, 0.753, 3.818, 6.55, 10.389, 10.835],
        [5.857, 6.083, 6.454, 0, 3.064, 5.796, 9.635, 10.081],
        [7.267, 7.493, 7.864, 4.066, 0, 2.934, 6.698, 7.144],
        [10.704, 10.93, 11.301, 7.503, 3.68, 0, 5.619, 6.065],
        [13.475, 13.701, 14.072, 10.274, 6.451, 5.409, 0, 0.943],
        [15.079, 15.305, 15.676, 11.878, 8.055, 7.013, 1.603, 0],
    ]
)


dimensions = len(distance) - 1
maxiters = 100
population_size = 100
bounds = np.array([[0, 1]] * dimensions)
Mutation_rate = np.array([0.5, 0.9])
Crossover_rate = np.array([0.1, 0.5])


vrp = VRP(
    population_size=population_size,
    dimensions=dimensions,
    bounds=bounds,
    Mutation_rate=Mutation_rate,
    Crossover_rate=Crossover_rate,
    distance=distance,
    max_iteration=1000,
)

In [532]:
from gymnasium.utils.env_checker import check_env

env = MetaEnv(vrp=vrp)

# This will catch many common issues
try:
    check_env(env)
    print("Environment passes all checks!")
except Exception as e:
    print(f"Environment has issues: {e}")

Environment has issues: The `reset` method does not provide an `options` or `**kwargs` keyword argument.


In [549]:
env = MetaEnv(vrp=vrp)

print(env.reset())
print(env.action_space.sample())

(1, {'objective': np.float64(37.208), 'mutation_rate': np.float64(0.5), 'current_iteration': 0, 'delta_F': np.float64(0.04)})
1


In [548]:
env.step(action=2)

(1,
 np.float64(-29.41),
 False,
 False,
 {'objective': np.float64(29.41),
  'mutation_rate': np.float64(1.0200000000000005),
  'current_iteration': 130,
  'delta_F': np.float64(0.04)})

In [535]:
n = 1000
obs_arr = np.zeros(n)
m_arr = np.zeros(n)
# Generate range
for i in range(n):
    obs, _ = env.reset()
    obs_arr[i] = obs["objective"]
    m_arr[i] = obs["mutation_rate"]

print(obs_arr.min(), obs_arr.max())


TypeError: 'int' object is not subscriptable

In [None]:
# env = MetaEnv(vrp=vrp)

# env.reset()
# done = False
# count = 0
# while not done:
#     env.step()
#     count = count + 1
#     if count > 10:
#         done = True

