In [1]:
import os
import numpy as np
import imageio
folder = os.getcwd()+"/video"
os.makedirs(folder, exist_ok = True)

import multiagent.scenarios as scenarios
from multiagent.environment import MultiAgentEnv
from maddpg import MADDPGAgent

def make_env(scenario_name,benchmark=False):
    scenario = scenarios.load(scenario_name + ".py").Scenario()
    world = scenario.make_world()
    env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation)
    return env


In [2]:
env = make_env(scenario_name="simple_speaker_listener")
print('{} agents in thie environment'.format(env.n))
print(env.observation_space)
print(env.action_space)

2 agents in thie environment
[Box(3,), Box(11,)]
[Discrete(3), Discrete(5)]


In [3]:
obs_dim_list = [space.shape[0] for space in env.observation_space] 
action_dim_list = [space.n for space in env.action_space] 

In [4]:
maddpg = MADDPGAgent(obs_dim_list, action_dim_list)

#frames = []
scores = np.zeros(env.n)

obs_full = env.reset()
for t in range(80):
    actions = maddpg.get_actions(obs_full)
    next_obs_full, rewards, dones, info = env.step(actions)
    scores += rewards
    #frames.append(env.render('rgb_array')[0])
    maddpg.step(obs_full, actions, rewards,next_obs_full, dones)
    obs_full = next_obs_full
    if all(dones):
        break

In [7]:
rewards

[-7.927438627555681, -7.927438627555681]

In [6]:
obs_full[0].shape

(3,)

In [6]:
maddpg = MADDPGAgent(obs_dim_list, action_dim_list)
NUM_EPISODES = 500
PRINT_EVERY = 50
TMAX = 300

score_list = []
for e in range(1, NUM_EPISODES+1):
    obs_full = env.reset()
    scores = np.zeros(env.n)
    for t in range(TMAX):
        actions = maddpg.get_actions(obs_full)
        next_obs_full, rewards, dones, _ = env.step(actions)
        maddpg.step(obs_full, actions, rewards,next_obs_full, dones)
        obs_full = next_obs_full
        scores += rewards
        if all(dones):
            break
    score_list.append(scores)
    print('\rEpisode {}\tagent 0 {:10.2f}\tagent 1 {:10.2f}'.format(e, *scores), end = "")
    if e % PRINT_EVERY == 0:
        print('\rEpisode {}\tagent 0 {:10.2f}\tagent 1 {:10.2f}'.format(e, *scores))

Episode 50	agent 0 -2894218.13	agent 1 -2894218.13
Episode 100	agent 0 -2888629.14	agent 1 -2888629.14
Episode 150	agent 0    -469.20	agent 1    -469.2055
Episode 200	agent 0 -5382718.18	agent 1 -5382718.18
Episode 250	agent 0 -2766796.09	agent 1 -2766796.09
Episode 300	agent 0 -2862902.45	agent 1 -2862902.45
Episode 350	agent 0 -5591476.68	agent 1 -5591476.68
Episode 400	agent 0 -5473976.89	agent 1 -5473976.89
Episode 450	agent 0 -5503535.15	agent 1 -5503535.15
Episode 500	agent 0 -5557515.16	agent 1 -5557515.16


In [7]:
frames = []

obs_full = env.reset()
for t in range(300):
    actions = maddpg.get_actions(obs_full)
    next_obs_full, rewards, dones, info = env.step(actions)
    scores += rewards
    frames.append(env.render('rgb_array')[0])
    maddpg.step(obs_full, actions, rewards,next_obs_full, dones)
    obs_full = next_obs_full
    if all(dones):
        break

In [8]:
imageio.mimsave(os.path.join(folder, 'simple_speaker_listener.gif'),frames, duration=.04)