In [1]:
import gymnasium as gym
import mo_gymnasium as mo_gym
import numpy as np
from src import MO_DQN
from src.utils import ChebyshevScalarisation
from matplotlib import pyplot as plt
import matplotlib
import pandas as pd
import torch

### Linear Scalarisation

In [2]:
env = mo_gym.make('mo-circle-env-v0', render_mode='rgb_array')
env.unwrapped.configure({
    "screen_width": 500,
    "screen_height": 500,
    "observation": {
        "type": "MultiAgentObservation",
        "observation_config": {
            "type": "Kinematics",
        }
    }
})

obs, info = env.reset()

linear_agent = MO_DQN.MO_DQN(env, num_objectives=2, seed=11, observation_space_shape=obs[0].shape, replay_buffer_size=1000, batch_ratio=0.2,
                      objective_names=["speed_reward", "energy_reward"])
#linear_agent.train(200_000, epsilon_start=0.1, epsilon_end=0.1, inv_optimisation_frequency=1)

df = linear_agent.evaluate(hv_reference_point=np.ndarray([0,0]), seed=11)
print(df)
df.to_csv("data/linear_scalarisation_eval.csv")

  logger.warn(
Weight tuple: 100%|██████████| 66/66 [02:25<00:00,  2.21s/it]

     repetition_number  weight_index weight_tuple  normalised_speed_reward  \
0                    0             0   [0.0, 1.0]                 0.616667   
1                    1             0   [0.0, 1.0]                 0.616667   
2                    2             0   [0.0, 1.0]                 0.501833   
3                    3             0   [0.0, 1.0]                 0.616667   
4                    4             0   [0.0, 1.0]                 0.415500   
..                 ...           ...          ...                      ...   
325                  0            65   [1.0, 0.0]                 0.373000   
326                  1            65   [1.0, 0.0]                 0.388000   
327                  2            65   [1.0, 0.0]                 0.329000   
328                  3            65   [1.0, 0.0]                 0.991167   
329                  4            65   [1.0, 0.0]                 0.995500   

     normalised_energy_reward  raw_speed_reward  raw_energy_rew




### Chebyshev Scalarisation

In [2]:
env = mo_gym.make('mo-circle-env-v0', render_mode='rgb_array')
env.unwrapped.configure({
    "screen_width": 500,
    "screen_height": 500,
    "observation": {
        "type": "MultiAgentObservation",
        "observation_config": {
            "type": "Kinematics",
        }
    }
})

obs, info = env.reset()
scal_arguments = [torch.tensor([-float("inf"),-float("inf")]), 0.1] #initial utopian and threshold value
cheb_agent = MO_DQN.MO_DQN(env, num_objectives=2, seed=11, observation_space_shape=obs[0].shape, replay_buffer_size=1000, batch_ratio=0.2,
                      objective_names=["speed_reward", "energy_reward"], scalarisation_method=ChebyshevScalarisation, scalarisation_argument_list=scal_arguments)
cheb_agent.train(200_000, epsilon_start=0.1, epsilon_end=0.1, inv_optimisation_frequency=1)

df = cheb_agent.evaluate(hv_reference_point=np.ndarray([0,0]), seed=11)
print(df)
df.to_csv("data/chebyshev_scalarisation_eval.csv")

Training iterations:   0%|          | 739/200000 [00:07<32:12, 103.10it/s]


KeyboardInterrupt: 

## Visualisation