In [3]:
import gymnasium as gym
import mo_gymnasium as mo_gym
import numpy as np
from src import MO_DQN
from src.utils import ChebyshevScalarisation
from matplotlib import pyplot as plt
import matplotlib
import pandas as pd
import torch

### Linear Scalarisation

In [6]:
env = mo_gym.make('mo-circle-env-v0', render_mode='rgb_array')
env.unwrapped.configure({
    "screen_width": 500,
    "screen_height": 500,
    "observation": {
        "type": "MultiAgentObservation",
        "observation_config": {
            "type": "Kinematics",
        }
    }
})
env.unwrapped.configure({
    "manual_control": True
})

obs, info = env.reset()

linear_agent = MO_DQN.MO_DQN(env, num_objectives=2, seed=11, observation_space_shape=obs[0].shape, replay_buffer_size=1000, batch_ratio=0.2,
                      objective_names=["speed_reward", "energy_reward"])
df = linear_agent.train(200_000, epsilon_start=0.1, epsilon_end=0.1, inv_optimisation_frequency=1)
print(df)
df.to_csv("data/linear_scalarisation.csv")

Iterations: 100%|██████████| 200000/200000 [36:49<00:00, 90.50it/s]

      episode  speed_reward  energy_reward
0           0      8.783333      15.362052
1           1      5.050000       9.787058
2           2      8.683333      15.262052
3           3      8.633333      15.212052
4           4      0.683333       1.932760
...       ...           ...            ...
2598     2598     13.133333      19.712052
2599     2599     13.033333      19.612052
2600     2600     13.283333      19.862052
2601     2601     13.133333      19.712052
2602     2602      0.866667       1.984484

[2603 rows x 3 columns]





### Chebyshev Scalarisation

In [2]:
env = mo_gym.make('mo-circle-env-v0', render_mode='rgb_array')
env.unwrapped.configure({
    "screen_width": 500,
    "screen_height": 500,
    "observation": {
        "type": "MultiAgentObservation",
        "observation_config": {
            "type": "Kinematics",
        }
    }
})
env.unwrapped.configure({
    "manual_control": True
})

obs, info = env.reset()
scal_arguments = [torch.tensor([-float("inf"),-float("inf")]), 0.1] #initial utopian and threshold value
linear_agent = MO_DQN.MO_DQN(env, num_objectives=2, seed=11, observation_space_shape=obs[0].shape, replay_buffer_size=1000, batch_ratio=0.2,
                      objective_names=["speed_reward", "energy_reward"], scalarisation_method=ChebyshevScalarisation, scalarisation_argument_list=scal_arguments)
df = linear_agent.train(150_000, epsilon_start=0.1, epsilon_end=0.1, inv_optimisation_frequency=1)
print(df)
df.to_csv("data/chebyshev_scalarisation.csv")

Iterations: 100%|██████████| 150000/150000 [27:01<00:00, 92.48it/s]

      episode  speed_reward  energy_reward
0           0      5.083333       8.109478
1           1     12.133333      18.712052
2           2      8.583333      15.162052
3           3      2.733333       4.574999
4           4     13.283333      19.862052
...       ...           ...            ...
1960     1960      0.800000       2.181035
1961     1961      8.783333      15.362052
1962     1962      6.083333      11.478436
1963     1963      8.483333      15.062052
1964     1964      8.433333      15.012052

[1965 rows x 3 columns]





## Visualisation