In [9]:
# the TestEnv environment is used to simply simulate the network
from flow.envs import TestEnv

# the Experiment class is used for running simulations
from flow.core.experiment import Experiment

# the base network class
from flow.networks import Network
from flow.envs import Env

# all other imports are standard
from flow.core.params import VehicleParams
from flow.core.params import NetParams
from flow.core.params import InitialConfig
from flow.core.params import EnvParams
from flow.core.params import TrafficLightParams
from flow.controllers import IDMController
from flow.core.params import SumoCarFollowingParams

# create some default parameters parameters
HORIZON=2000
env_params = EnvParams(horizon=HORIZON)
initial_config = InitialConfig()

In [10]:
le_dir = "/home/valentin/Schreibtisch/personal_sumo_files"

In [11]:
from flow.core.params import SumoParams

sim_params = SumoParams(render=True, sim_step=1, restart_instance=True)

In [12]:
vehicles=VehicleParams()

In [13]:
from flow.core.params import InFlows

inflow = InFlows()

inflow.add(veh_type="human",
           edge="right_east",
           probability=0.08)
inflow.add(veh_type="human",
           edge="right_south",
           probability=0.08)
inflow.add(veh_type="human",
           edge="right_north",
           probability=0.08)
inflow.add(veh_type="human",
           edge="left_north",
           probability=0.08)
inflow.add(veh_type="human",
           edge="left_south",
           probability=0.08)
inflow.add(veh_type="human",
           edge="left_west",
           probability=0.08)

In [14]:
inflow.get()

[{'name': 'flow_0',
  'vtype': 'human',
  'edge': 'right_east',
  'departLane': 'first',
  'departSpeed': 0,
  'begin': 1,
  'end': 86400,
  'probability': 0.08},
 {'name': 'flow_1',
  'vtype': 'human',
  'edge': 'right_south',
  'departLane': 'first',
  'departSpeed': 0,
  'begin': 1,
  'end': 86400,
  'probability': 0.08},
 {'name': 'flow_2',
  'vtype': 'human',
  'edge': 'right_north',
  'departLane': 'first',
  'departSpeed': 0,
  'begin': 1,
  'end': 86400,
  'probability': 0.08},
 {'name': 'flow_3',
  'vtype': 'human',
  'edge': 'left_north',
  'departLane': 'first',
  'departSpeed': 0,
  'begin': 1,
  'end': 86400,
  'probability': 0.08},
 {'name': 'flow_4',
  'vtype': 'human',
  'edge': 'left_south',
  'departLane': 'first',
  'departSpeed': 0,
  'begin': 1,
  'end': 86400,
  'probability': 0.08},
 {'name': 'flow_5',
  'vtype': 'human',
  'edge': 'left_west',
  'departLane': 'first',
  'departSpeed': 0,
  'begin': 1,
  'end': 86400,
  'probability': 0.08}]

In [15]:
import os

net_params = NetParams(
    inflows=inflow,
    template={
        # network geometry features
        "net": os.path.join(le_dir, "lemgo_small.net.xml"),
        # features associated with the properties of drivers
        "vtype": os.path.join(le_dir, "vtypes.add.xml"),
        # features associated with the routes vehicles take
        "rou": os.path.join(le_dir, "lemgo_small2_out.rou.xml"),
        "det": os.path.join(le_dir, "lemgo_small.add.xml")
    }
)

## Create custom network with lane area detectors

#### 3.2.3 Running the Modified Simulation

Finally, the fully imported simulation can be run as follows. 

**Warning**: the network takes time to initialize while the departure positions and times and vehicles are specified.

In [16]:
# create the network
network = Network(
    name="template",
    net_params=net_params,
    vehicles=vehicles
)

# create the environment
env = TestEnv(
    env_params=env_params,
    sim_params=sim_params,
    network=network
)

# run the simulation for 100000 steps
exp = Experiment(env=env)
_ = exp.run(1, 2000)

FatalTraCIError: connection closed by SUMO

In [9]:
# This is the custom environment
# Needs to be important in order to work properly in flow
from flow.envs.simple_env import SimpleEnv
env_name = SimpleEnv

In [10]:
# Creating flow_params. Make sure the dictionary keys are as specified. 
flow_params = dict(
    # name of the experiment
    exp_tag="first_exp",
    # name of the flow environment the experiment is running on
    env_name=env_name,
    # name of the network class the experiment uses
    network=Network,
    # simulator that is used by the experiment
    simulator='traci',
    # sumo-related parameters (see flow.core.params.SumoParams)
    sim=sim_params,
    # environment related parameters (see flow.core.params.EnvParams)
    env=env_params,
    # network-related parameters (see flow.core.params.NetParams and
    # the network's documentation or ADDITIONAL_NET_PARAMS component)
    net=net_params,
    # vehicles to be placed in the network at the start of a rollout 
    # (see flow.core.vehicles.Vehicles)
    veh=VehicleParams(),
    # (optional) parameters affecting the positioning of vehicles upon 
    # initialization/reset (see flow.core.params.InitialConfig)
    initial=initial_config
)

In [11]:
import json
import random

import ray
try:
    from ray.rllib.agents.agent import get_agent_class
except ImportError:
    from ray.rllib.agents.registry import get_agent_class
from ray.tune import run_experiments, run
from ray.tune.experiment import Experiment
from ray.tune.registry import register_env

from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder

from ray.tune.schedulers import PopulationBasedTraining

Instructions for updating:
non-resource variables are not supported in the long term


In [12]:
# number of parallel workers
N_CPUS = 1
# number of rollouts per training iteration
N_ROLLOUTS = 1

ray.init(num_cpus=N_CPUS)#, object_store_memory=1000000000)

2020-04-01 11:52:48,261	INFO node.py:498 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2020-04-01_11-52-48_258796_6450/logs.
2020-04-01 11:52:48,421	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:47989 to respond...
2020-04-01 11:52:48,561	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:45469 to respond...
2020-04-01 11:52:48,574	INFO services.py:809 -- Starting Redis shard with 1.65 GB max memory.
2020-04-01 11:52:48,622	INFO node.py:512 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2020-04-01_11-52-48_258796_6450/logs.
2020-04-01 11:52:48,626	INFO services.py:1475 -- Starting the Plasma object store with 2.47 GB memory using /dev/shm.


{'node_ip_address': '192.168.2.105',
 'redis_address': '192.168.2.105:47989',
 'object_store_address': '/tmp/ray/session_2020-04-01_11-52-48_258796_6450/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2020-04-01_11-52-48_258796_6450/sockets/raylet',
 'webui_url': None,
 'session_dir': '/tmp/ray/session_2020-04-01_11-52-48_258796_6450'}

In [13]:
def explore(config):
    # ensure we collect enough timesteps to do sgd
    if config["train_batch_size"] < config["sgd_minibatch_size"] * 2:
        config["train_batch_size"] = config["sgd_minibatch_size"] * 2
    # ensure we run at least one sgd iter
    if config["num_sgd_iter"] < 1:
        config["num_sgd_iter"] = 1
    return config

In [14]:
pbt = PopulationBasedTraining(
        time_attr="time_total_s",
        metric="episode_reward_mean",
        mode="max",
        perturbation_interval=4,
        resample_probability=0.25,
        # Specifies the mutations of these hyperparams
        hyperparam_mutations={
            "lambda": lambda: random.uniform(0.9, 1.0),
            "vf_clip_param": lambda: random.uniform(20000, 50000),
            "lr": [5e-2, 1e-3, 5e-4, 1e-4, 5e-5, 1e-5],
            "sgd_minibatch_size": lambda: random.randint(128, 16384),
            "train_batch_size": lambda: random.randint(N_CPUS*HORIZON, 2*N_CPUS*HORIZON),
        },
        custom_explore_fn=explore)

In [15]:
# The algorithm or model to train. This may refer to "
#      "the name of a built-on algorithm (e.g. RLLib's DQN "
#      "or PPO), or a user-defined trainable function or "
# #      "class registered in the tune registry.")
alg_run = "DQN"

agent_cls = get_agent_class(alg_run)
config = agent_cls._default_config.copy()
config["num_workers"] = 0  # number of parallel workers
# config["num_envs_per_worker"] = 1  # number of parallel workers
config["num_gpus"] = 0
config["lr"] = 1e-3
# config["v_max"] = 0
# config["v_min"] = -50000
config["train_batch_size"] = 128  # batch size
config["sample_batch_size"] = 16  # batch size
config["gamma"] = 0.999  # discount rate
config["model"].update({"fcnet_hiddens": [128]})  # size of hidden layers in network
config["log_level"] = "DEBUG"
config["horizon"] = HORIZON  # rollout horizon
config["timesteps_per_iteration"] = HORIZON  

# save the flow params for replay
flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True,
                       indent=4)  # generating a string version of flow_params
config['env_config']['flow_params'] = flow_json  # adding the flow_params to config dict
config['env_config']['run'] = alg_run

# Call the utility function make_create_env to be able to 
# register the Flow env for this experiment
create_env, gym_name = make_create_env(params=flow_params, version=0)

config["env"] = gym_name
# Register as rllib env with Gym
register_env(gym_name, create_env)

In [17]:
exp = Experiment(flow_params["exp_tag"], **{
        "run": alg_run,
        "config": {
            **config
        },
        "checkpoint_freq": 5,  # number of iterations between checkpoints
        "checkpoint_at_end": True,  # generate a checkpoint at the end
        "max_failures": 5,
        "stop": {  # stopping conditions
            "training_iteration": 100,  # 222number of iterations to stop after
        },
        "num_samples": 1})

In [None]:
trials = run_experiments(exp)

2020-04-01 11:52:49,345	INFO trial_runner.py:176 -- Starting a new experiment.
2020-04-01 11:52:49,384	ERROR log_sync.py:34 -- Log sync requires cluster to be setup with `ray up`.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/1 CPUs, 0/0 GPUs
Memory usage on this node: 3.1/8.2 GB

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 1/1 CPUs, 0/0 GPUs
Memory usage on this node: 3.1/8.2 GB
Result logdir: /home/valentin/ray_results/first_exp
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_SimpleEnv-v0_0:	RUNNING

[2m[36m(pid=6494)[0m Instructions for updating:
[2m[36m(pid=6494)[0m non-resource variables are not supported in the long term
[2m[36m(pid=6494)[0m Loading configuration... done.
[2m[36m(pid=6494)[0m 2020-04-01 11:52:52.669803: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=6494)[0m 2020-04-01 11:52:52.710664: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2899885000 Hz
[2m[36m(pid=6494)[0m 2020-04-01 11:52:52.711099: I tensorflow/compiler/xla/service/ser

[2m[36m(pid=6494)[0m Loading configuration... done.
Result for PPO_SimpleEnv-v0_0:
  custom_metrics: {}
  date: 2020-04-01_11-53-17
  done: false
  episode_len_mean: 1000.0
  episode_reward_max: -22579.0
  episode_reward_mean: -22861.5
  episode_reward_min: -23144.0
  episodes_this_iter: 1
  episodes_total: 2
  experiment_id: 18bfee7cff51456fa8e7b1a0fab8d1fe
  hostname: valentin-Aspire-V3-372
  info:
    grad_time_ms: 243.54
    learner:
      default_policy:
        cur_kl_coeff: 0.10000000149011612
        cur_lr: 4.999999873689376e-05
        entropy: 1.3858383893966675
        entropy_coeff: 0.0
        kl: 0.00010224748257314786
        policy_loss: -0.0006257534259930253
        total_loss: 452419.0
        vf_explained_var: 3.451108932495117e-05
        vf_loss: 452418.9375
    load_time_ms: 53.774
    num_steps_sampled: 2000
    num_steps_trained: 2000
    sample_time_ms: 10297.276
    update_time_ms: 0.006
  iterations_since_restore: 2
  node_ip: 192.168.2.105
  num_healthy



== Status ==
Using FIFO scheduling algorithm.
Resources requested: 1/1 CPUs, 0/0 GPUs
Memory usage on this node: 3.4/8.2 GB
Result logdir: /home/valentin/ray_results/first_exp
Number of trials: 1 ({'RUNNING': 1})
RUNNING trials:
 - PPO_SimpleEnv-v0_0:	RUNNING, [1 CPUs, 0 GPUs], [pid=6494], 51 s, 5 iter, 5000 ts, -2.28e+04 rew

[2m[36m(pid=6494)[0m Loading configuration... done.
Result for PPO_SimpleEnv-v0_0:
  custom_metrics: {}
  date: 2020-04-01_11-53-58
  done: false
  episode_len_mean: 1000.0
  episode_reward_max: -22200.0
  episode_reward_mean: -22750.166666666668
  episode_reward_min: -23286.0
  episodes_this_iter: 1
  episodes_total: 6
  experiment_id: 18bfee7cff51456fa8e7b1a0fab8d1fe
  hostname: valentin-Aspire-V3-372
  info:
    grad_time_ms: 117.471
    learner:
      default_policy:
        cur_kl_coeff: 0.0062500000931322575
        cur_lr: 4.999999873689376e-05
        entropy: 1.3856301307678223
        entropy_coeff: 0.0
        kl: 4.81330935144797e-05
        policy