In [6]:
import torch
import numpy as np
from torch import nn
import gym
import tianshou as ts
import Environment
from tianshou.env import (
    ContinuousToDiscrete,
    DummyVectorEnv,
    PettingZooEnv,
    ShmemVectorEnv,
    SubprocVectorEnv,
)
from pettingzoo.utils.wrappers import BaseWrapper
# define a multi-agent environment

def get_env():
    env = Environment.VehicleJobSchedulingEnvACE()
    env = BaseWrapper(env)
    env = PettingZooEnv(env)
    return env
env = get_env()
num_agents = env.num_agents
obs_shape,*_ = env.observation_space.shape
act_shape = env.action_space.n 

# import the predefined networks from tianshou
from tianshou.utils.net.common import ActorCritic, Net

# define a policy network and a critic network for each agent
policy_nets = [Net(obs_shape, act_shape, hidden_sizes=[64], ) for _ in range(num_agents)] # use output_size=act_shape.sum() for multi-discrete action space
critic_nets = [Net(obs_shape * num_agents + act_shape * num_agents, hidden_sizes=[64], action_shape=1) for _ in range(num_agents)] # use act_shape.sum() instead of act_shape for multi-discrete action space

# define a policy and a critic for each agent using tianshou and the predefined networks
policies = []
critics = []
for i in range(num_agents):
    # use different optimizers and parameters for policy and critic networks
    optim_p = torch.optim.Adam(policy_nets[i].parameters(), lr=1e-4)
    optim_c = torch.optim.Adam(critic_nets[i].parameters(), lr=1e-3)
    # use DDPGPolicy
    policy = ts.policy.DDPGPolicy(actor=policy_nets[i], actor_optim=optim_p,critic=critic_nets[i], critic_optim=optim_c,gamma=0.95,tau=0.01)
    policies.append(policy)
maddpg = ts.policy.MultiAgentPolicyManager(policies,env)
# define a replay buffer to store transitions
buffer = ts.data.ReplayBuffer(size=100000)

# define a collector to interact with the environment and collect data
collector = ts.data.Collector(maddpg, env, buffer)

# define a trainer to train the policies and critics
trainer = ts.trainer.offpolicy_trainer(
    policies,
    critics,
    collectors=[collector],
    max_epoch=100,
    step_per_epoch=1000,
    collect_per_step=10,
    batch_size=128,
    update_per_step=10,
)

# start training
trainer.train()


TypeError: object of type 'PettingZooEnv' has no len()

In [3]:
env.observation_space.shape

(38,)