In [1]:
import gymnasium as gym
import mo_gymnasium as mo_gym
import numpy as np
from src import MO_DQN
from src.utils import ChebyshevScalarisation
from matplotlib import pyplot as plt
import matplotlib
import pandas as pd
import torch

### Linear Scalarisation

In [3]:
env = mo_gym.make('mo-circle-env-v0', render_mode='rgb_array')
env.unwrapped.configure({
    "screen_width": 500,
    "screen_height": 500,
    "observation": {
        "type": "MultiAgentObservation",
        "observation_config": {
            "type": "Kinematics",
        }
    }
})

obs, info = env.reset()

linear_agent = MO_DQN.MO_DQN(env, num_objectives=2, seed=11, observation_space_shape=obs[0].shape, replay_buffer_size=1000, batch_ratio=0.2,
                      objective_names=["speed_reward", "energy_reward"])
linear_agent.train(200_000, epsilon_start=0.1, epsilon_end=0.1, inv_optimisation_frequency=1)

df = linear_agent.evaluate(hv_reference_point=np.ndarray([0,0]), seed=11)
print(df)
df.to_csv("data/linear_scalarisation_eval.csv")

Training iterations: 100%|██████████| 200000/200000 [29:06<00:00, 114.53it/s]
  logger.warn(
Weight tuple: 100%|██████████| 66/66 [03:58<00:00,  3.61s/it]

     repetition_number  weight_index weight_tuple  normalised_speed_reward  \
0                    0             0   [0.0, 1.0]                 0.003333   
1                    1             0   [0.0, 1.0]                 0.003333   
2                    2             0   [0.0, 1.0]                 0.003333   
3                    3             0   [0.0, 1.0]                 0.003333   
4                    4             0   [0.0, 1.0]                 0.003333   
..                 ...           ...          ...                      ...   
325                  0            65   [1.0, 0.0]                 0.666667   
326                  1            65   [1.0, 0.0]                 0.490000   
327                  2            65   [1.0, 0.0]                 0.666667   
328                  3            65   [1.0, 0.0]                 0.130000   
329                  4            65   [1.0, 0.0]                 0.176667   

     normalised_energy_reward  raw_speed_reward  raw_energy_rew




### Chebyshev Scalarisation

In [4]:
env = mo_gym.make('mo-circle-env-v0', render_mode='rgb_array')
env.unwrapped.configure({
    "screen_width": 500,
    "screen_height": 500,
    "observation": {
        "type": "MultiAgentObservation",
        "observation_config": {
            "type": "Kinematics",
        }
    }
})

obs, info = env.reset()
scal_arguments = [torch.tensor([-float("inf"),-float("inf")]), 0.1] #initial utopian and threshold value
cheb_agent = MO_DQN.MO_DQN(env, num_objectives=2, seed=11, observation_space_shape=obs[0].shape, replay_buffer_size=1000, batch_ratio=0.2,
                      objective_names=["speed_reward", "energy_reward"], scalarisation_method=ChebyshevScalarisation, scalarisation_argument_list=scal_arguments)
cheb_agent.train(200_000, epsilon_start=0.1, epsilon_end=0.1, inv_optimisation_frequency=1)

df = cheb_agent.evaluate(hv_reference_point=np.ndarray([0,0]), seed=11)
print(df)
df.to_csv("data/chebyshev_scalarisation_eval.csv")

Training iterations:   0%|          | 0/200000 [00:00<?, ?it/s]

Training iterations: 100%|██████████| 200000/200000 [29:21<00:00, 113.51it/s]
  logger.warn(
Weight tuple: 100%|██████████| 66/66 [04:07<00:00,  3.76s/it]

     repetition_number  weight_index weight_tuple  normalised_speed_reward  \
0                    0             0   [0.0, 1.0]                 0.030000   
1                    1             0   [0.0, 1.0]                 0.666667   
2                    2             0   [0.0, 1.0]                 0.666667   
3                    3             0   [0.0, 1.0]                 0.536667   
4                    4             0   [0.0, 1.0]                -0.003333   
..                 ...           ...          ...                      ...   
325                  0            65   [1.0, 0.0]                 0.003333   
326                  1            65   [1.0, 0.0]                 0.003333   
327                  2            65   [1.0, 0.0]                 0.003333   
328                  3            65   [1.0, 0.0]                 0.003333   
329                  4            65   [1.0, 0.0]                 0.003333   

     normalised_energy_reward  raw_speed_reward  raw_energy_rew




## Visualisation