In [1]:
import os
import pickle

import gym

from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.utils import get_linear_fn
from stable_baselines3.common.vec_env import SubprocVecEnv

from l5kit.configs import load_config_data
from l5kit.data import LocalDataManager, ChunkedDataset, filter_agents_by_frames
from l5kit.dataset import EgoDataset
from l5kit.environment.callbacks import get_callback_list
from l5kit.environment.envs.l5_env import SimulationConfigGym
from l5kit.environment.feature_extractor import CustomFeatureExtractor
from l5kit.rasterization import build_rasterizer
from l5kit.visualization.visualizer.zarr_utils import simulation_out_to_visualizer_scene_gym_cle
from l5kit.visualization.visualizer.visualizer import visualize
from l5kit.data import MapAPI

from bokeh.io import output_notebook, show

# By setting the L5KIT_DATA_FOLDER variable, we can point the script
# to the folder where the data lies.
os.environ["L5KIT_DATA_FOLDER"] = os.environ["HOME"] + '/level5_data'

env_config_path = "./gym_config.yaml"
cfg = load_config_data(env_config_path)
dm = LocalDataManager(None)
mapAPI = MapAPI.from_cfg(dm, cfg)

In [2]:
# test the call to gym environment
env = gym.make('L5-CLE-v0', env_config_path=env_config_path)

  dataset = ChunkedDataset("")
  new_dataset = ChunkedDataset("")


In [3]:
# 4 parallel processes will perform environemt rollouts
n_envs = 4
eps_length = 32

# Use kinematic model
kinematic = True

# Init training environment
env_kwargs = {'env_config_path': env_config_path, 'sim_cfg': SimulationConfigGym(eps_length), 'use_kinematic': kinematic}
env = make_vec_env("L5-CLE-v0", env_kwargs=env_kwargs, n_envs=n_envs, vec_env_cls=SubprocVecEnv,
                   vec_env_kwargs=dict(start_method='fork'))

0.402374267578125 -0.2406167984008789
0.402374267578125 -0.2406167984008789
0.402374267578125 -0.2406167984008789
0.402374267578125 -0.2406167984008789


In [4]:
# Custom Feature Extractor backbone
# Our feature extractor is a simple CNN network
policy_kwargs = dict(
    features_extractor_class=CustomFeatureExtractor,
    features_extractor_kwargs=dict(features_dim=128),
    normalize_images=False
)

In [5]:
# Training params.

# Before each model update, a total of 256 steps are unrolled in each environment,
# totaling a replay buffer size of 256*4 = 1024 steps. 
num_rollout_steps = 256

# After the replay buffer is collected, the model is updated for n_epochs.
n_epochs = 10

# discount factor
gamma = 0.95

# The clipping range of PPO is gradually decreased as training progresses
clip_schedule = get_linear_fn(0.2, 0.001, 1)

learning_rate = 3e-4

# Total number of steps to train
n_steps = 1000000

# Output name and frequency to save the model during training
save_freq = 5000
output_prefix = 'saved_model'

In [6]:
# define model
model = PPO("CnnPolicy", env, policy_kwargs=policy_kwargs, verbose=1, n_steps=num_rollout_steps,
            learning_rate=learning_rate, gamma=gamma, n_epochs=n_epochs, clip_range=clip_schedule)


Using cuda device


In [7]:
# create another environment for model evaluation purposes
eval_env = gym.make("L5-CLE-v0", env_config_path=env_config_path, sim_cfg=SimulationConfigGym(eps_length), use_kinematic=kinematic)
model.eval_env = eval_env


0.402374267578125 -0.2406167984008789


In [8]:
# define callbacks
callback = get_callback_list(output_prefix, n_envs, save_freq)

In [9]:
# train
# model.learn(n_steps, callback=callback)
model.learn(5000, callback=callback)


Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 31       |
|    ep_rew_mean     | -238     |
| time/              |          |
|    fps             | 122      |
|    iterations      | 1        |
|    time_elapsed    | 8        |
|    total_timesteps | 1024     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 31           |
|    ep_rew_mean          | -226         |
| time/                   |              |
|    fps                  | 97           |
|    iterations           | 2            |
|    time_elapsed         | 20           |
|    total_timesteps      | 2048         |
| train/                  |              |
|    approx_kl            | 0.0059264097 |
|    clip_fraction        | 0.0865       |
|    clip_range           | 0.159        |
|    entropy_loss         | -4.26        |
|    explained_variance   | -0.00111     |
|    learning_r

<stable_baselines3.ppo.ppo.PPO at 0x7f5ee4aca490>

In [10]:
## Visualize Closed loop trained models
output_notebook()
file = './logs/saved_model_5000_steps.pkl'

t_step = file.split('_')[-2]
print("Visualizatio Time Step:", t_step)
with open(file, 'rb') as f:
    sim_outs = pickle.load(f)
    for sim_out in sim_outs: # for each scene
        vis_in = simulation_out_to_visualizer_scene_gym_cle(sim_out, mapAPI)
        show(visualize(sim_out.scene_id, vis_in))

Visualizatio Time Step: 5000
