# NavSim Car Notebook

This notebook tests the airsim car simulations and trains for 2 episodes, 100 timesteps each episode.

In the following cell make the changes to the paths as needed.

In the third cell from here, make changes to the RL Config or Environment Config as needed.

In [1]:
from pathlib import Path

# Please change the following to the base path to the repos. All our code repos were in d:/work/projects. 

#base_path=Path("d:/")
#base_path = base_path / "work" / "projects"

base_path = Path.home() / "projects"


# Change the following path to your Unity Env Executable
# if you want to use Unity Editor then uncomment the last line which sets uenv_file to None

# uenv_path = Path("d:/") / 'work' 
uenv_path = Path.home() 
uenv_path = uenv_path / 'unity-envs' / "Berlin_URP" / 'Berlin_ML'
uenv_file = str(uenv_path.resolve())

#uenv_file = None  

In [2]:
import sys

%reload_ext autoreload
%autoreload 2

for pkg in ['ezai_util','ai_coop_py']:
    pkg_path = base_path / pkg
    pkg_path = str(pkg_path.resolve())
    print(pkg_path)
    if not pkg_path in sys.path:
        sys.path.append(pkg_path)
import ezai_util
import navsim
from ezai_util import DictObj

/data/afandang/projects/ezai_util
/data/afandang/projects/ai_coop_py
/data/afandang/projects/ezai_util


In [3]:
rl_conf = DictObj({
  "env_name":'airsim',
  "seed": 123,
  "discount": 0.99,
  "tau": 5e-3,
  "expl_noise": 0.1,
  "batch_size": 256,
  "batches_before_train": 2,
  "memory_capacity":10000,
  "episode_max_steps":100,
  "num_episodes":2
})

env_conf = DictObj({
    'filename' : uenv_file,
    'log_folder' : 'unity-logs',  # TODO: Replace exp-id-1 with something from env
    'seed' : rl_conf.seed,
    'timeout' : 600,
    'worker_id' : 0,
    'observation_mode' : 0,
    'max_steps' : rl_conf.episode_max_steps,
    'reward_for_goal' : 50,
    'reward_for_ep': .005,
    'reward_for_other' : -.1,
    'reward_for_falling_off_map' :  -50,
    'reward_for_step': -.0001,
    'segmentation_mode': 1
})

In [4]:
conf = DictObj().load_from_json_file("navsim_conf.json")

ezai - I - Making empty DictObj because parameters passed is not a dict
2020-11-10 22:35:17 INFO [dict.py:54] Making empty DictObj because parameters passed is not a dict


In [5]:
print(conf)

{'env_conf': {'filename': '/home/afandang/unity_envs/Berlin_ML/Berlin_ML', 'log_folder': 'unity.log', 'seed': 123, 'timeout': 600, 'worker_id': 0, 'observation_mode': 0, 'max_steps': 100, 'reward_for_goal': 50, 'reward_for_ep': 0.005, 'reward_for_other': -0.1, 'reward_for_falling_off_map': -50, 'reward_for_step': -0.0001, 'segmentation_mode': 1}, 'run_conf': {'env_name': 'navsim', 'episode_max_steps': 100, 'num_episodes': 2, 'memory_capacity': 10000, 'seed': 123, 'discount': 0.99, 'tau': 0.005, 'expl_noise': 0.1, 'batch_size': 256, 'batches_before_train': 2, 'checkpoint_interval': 1}}


In [11]:
trainer = navsim.Trainer(run_id='navsim_test',run_resume=False,conf=conf)

2020-11-10 22:43:54 INFO [environment.py:108] Connected to Unity environment with package version 1.1.0-preview and communication version 1.0.0
2020-11-10 22:44:02 INFO [environment.py:265] Connected new brain:
VectorNavigator?team=0


Unity env creation resource usage: 
time:0.06203600400000031,peak_memory:65466,current_memory:52116

Env Info
-----------
Action Space: Box(-1.0, 1.0, (2,), float32)
Action sample: [ 0.39293838 -0.42772132]
Action Space Shape: (2,)
Action Space Low: [-1. -1.]
Action Space High: [1. 1.]
Observation Mode: 0
Gym Observation Space: Tuple(Box(-inf, inf, (10,), float32))
Gym Observation Space Shape: None
Self Observation Space: Tuple(Box(-inf, inf, (10,), float32))
Self Observation Space Shape: None
Reward Range: (-inf, inf)
Metadata: {'render.modes': ['rgb_array']}
Initial State: [array([ 3.0437227e+03,  3.5454102e+01,  8.5054102e+02, -1.3631051e-09,
        9.9539620e-01, -7.4505797e-09,  9.5845744e-02,  1.1848356e+03,
        3.5924671e+01,  2.8484259e+02], dtype=float32)]
First Step State: ([array([ 3.0437227e+03,  3.5448643e+01,  8.5054102e+02, -1.3631051e-09,
        9.9539620e-01, -7.4505797e-09,  9.5845744e-02,  1.1848356e+03,
        3.5924671e+01,  2.8484259e+02], dtype=float32)], 

In [12]:
trainer.train()

100%|██████████| 2/2 [00:00<00:00,  2.14it/s]


In [None]:
trainer.agent.save_actor('model.onnx')

In [13]:
trainer.env_close()

2020-11-10 22:52:49 INFO [environment.py:417] Environment shut down with return code 0.
