# Memory Test

This notebook tests the memory class with the airsim car environment and all 3 environments - vector, visual, vector-visual

In [1]:
import sys
import os
from pathlib import Path

%reload_ext autoreload
%autoreload 2

base_path=Path("d:/")
base_path = base_path / "work" / "projects"
for pkg in ['ezai_util','ai_coop_py']:
    pkg_path = base_path / pkg
    pkg_path = str(pkg_path.resolve())
    print(pkg_path)
    if not pkg_path in sys.path:
        sys.path.append(pkg_path)
import ezai_util
import airsim

from ezai_util import DictObj

D:\work\projects\ezai_util
D:\work\projects\ai_coop_py


In [2]:
uenv_path = Path("d:/") / 'work' / 'unity-envs'
#uenv_file = uenv_path / "berlin_walk_ml_beta" / 'Berlin_ML'
uenv_file = uenv_path / "Berlin_URP" / 'Berlin_ML'

uenv_file = str(uenv_file.resolve())
#uenv_file = None  # if you want to use editor then uncomment it

In [3]:
rl_conf = DictObj({
  "env_name":'airsim',
  "seed": 123,
  "discount": 0.99,
  "tau": 5e-3,
  "expl_noise": 0.1,
  "batch_size": 256,
  "batches_before_train": 2,
  "memory_capacity":10000,
  "episode_max_steps":100,
  "num_episodes":2
})

env_conf = DictObj({
    'filename' : uenv_file,
    'log_folder' : 'unity-logs',  # TODO: Replace exp-id-1 with something from env
    'seed' : rl_conf.seed,
    'base_port' : 5005,
    'timeout' : 600,
    'worker_id' : 0,
    'observation_mode' : 0,
    'max_steps' : rl_conf.episode_max_steps,
    'reward_for_goal' : 50,
    'reward_for_ep': .005,
    'reward_for_other' : -.1,
    'reward_for_falling_off_map' :  -50,
    'reward_for_step': -.0001,
    'segmentation_mode': 1
})

In [4]:
memory_capacity = 1000
t_max = 1000

for observation_mode in [0,1,2]:
    print('testing with observation_mode ',observation_mode)
    env_conf.observation_mod = observation_mode
    env = airsim.AirSimEnv(env_conf) 
    try:
        env.info()

        # create a memory for random play
        memory = airsim.Memory(capacity = memory_capacity, 
                        state_shapes = env.observation_space_shapes, 
                        action_shape = env.action_space_shape,
                        seed = 123)
        memory.info()

        # do the random play and fill the memory

        t=0
        episode_done = False

        #1. observe initial state
        s = env.reset()

        while not episode_done:
            #2. select an action, and observe the next state
            a = env.action_space.sample()
            s_, r, episode_done, info = env.step(a)

            if(t == t_max-1):
                episode_done=True

            #3. save in memory

            #s = np.asarray(s).squeeze()
            #a = np.asarray(a).squeeze()
            #r = np.asarray(r).squeeze()
            #s_ = None if episode_done else s_ #np.asarray(s_).squeeze()
            memory.append(s=s,a=a,r=r,s_=s_, d=episode_done)

            # update the counters
            t += 1

            if (t_max and t >= t_max):
                break

        # sample the memory
        s,a,r,s_,d = memory.sample(100)
        airsim.Memory.sample_info(s,a,r,s_,d)
    finally:
        env.close()

testing with observation_mode  0


2020-10-19 14:48:00 INFO [environment.py:107] Connected to Unity environment with package version 1.1.0-preview and communication version 1.0.0
2020-10-19 14:48:02 INFO [environment.py:265] Connected new brain:
VectorNavigator?team=0


Env Info
-----------
Action Space: Box(2,)
Action sample: [ 0.39293838 -0.42772132]
Action Space Shape: (2,)
Action Space Low: [-1. -1.]
Action Space High: [1. 1.]
Observation Mode: 0
Gym Observation Space: Tuple(Box(10,))
Gym Observation Space Shape: None
Self Observation Space: Tuple(Box(10,))
Self Observation Space Shape: None
Reward Range: (-inf, inf)
Metadata: {'render.modes': ['rgb_array']}
Initial State: [array([ 3.0437227e+03,  3.5454102e+01,  8.5054102e+02, -1.3631051e-09,
        9.9539620e-01, -7.4505797e-09,  9.5845744e-02,  1.1848356e+03,
        3.5924671e+01,  2.8484259e+02], dtype=float32)]
First Step State: ([array([ 3.0437227e+03,  3.5448639e+01,  8.5054102e+02, -1.3631051e-09,
        9.9539620e-01, -7.4505797e-09,  9.5845744e-02,  1.1848356e+03,
        3.5924671e+01,  2.8484259e+02], dtype=float32)], -1e-04, False, {'step': <mlagents_envs.base_env.DecisionSteps object at 0x000001902CFE1DF0>})
Memory Info
-----------
capacity: 1000
size: 0
seed: 123
Shapes:
s: [(100

2020-10-19 14:48:17 INFO [environment.py:417] Environment shut down with return code 0 (CTRL_C_EVENT).


testing with observation_mode  1


2020-10-19 14:48:33 INFO [environment.py:107] Connected to Unity environment with package version 1.1.0-preview and communication version 1.0.0
2020-10-19 14:48:35 INFO [environment.py:265] Connected new brain:
VectorNavigator?team=0


Env Info
-----------
Action Space: Box(2,)
Action sample: [ 0.39293838 -0.42772132]
Action Space Shape: (2,)
Action Space Low: [-1. -1.]
Action Space High: [1. 1.]
Observation Mode: 0
Gym Observation Space: Tuple(Box(10,))
Gym Observation Space Shape: None
Self Observation Space: Tuple(Box(10,))
Self Observation Space Shape: None
Reward Range: (-inf, inf)
Metadata: {'render.modes': ['rgb_array']}
Initial State: [array([ 3.0437227e+03,  3.5454102e+01,  8.5054102e+02, -1.3631051e-09,
        9.9539620e-01, -7.4505797e-09,  9.5845744e-02,  1.1848356e+03,
        3.5924671e+01,  2.8484259e+02], dtype=float32)]
First Step State: ([array([ 3.0437227e+03,  3.5448639e+01,  8.5054102e+02, -1.3631051e-09,
        9.9539620e-01, -7.4505797e-09,  9.5845744e-02,  1.1848356e+03,
        3.5924671e+01,  2.8484259e+02], dtype=float32)], -1e-04, False, {'step': <mlagents_envs.base_env.DecisionSteps object at 0x000001902D010F10>})
Memory Info
-----------
capacity: 1000
size: 0
seed: 123
Shapes:
s: [(100

2020-10-19 14:48:50 INFO [environment.py:417] Environment shut down with return code 0 (CTRL_C_EVENT).


testing with observation_mode  2


2020-10-19 14:49:08 INFO [environment.py:107] Connected to Unity environment with package version 1.1.0-preview and communication version 1.0.0
2020-10-19 14:49:11 INFO [environment.py:265] Connected new brain:
VectorNavigator?team=0


Env Info
-----------
Action Space: Box(2,)
Action sample: [ 0.39293838 -0.42772132]
Action Space Shape: (2,)
Action Space Low: [-1. -1.]
Action Space High: [1. 1.]
Observation Mode: 0
Gym Observation Space: Tuple(Box(10,))
Gym Observation Space Shape: None
Self Observation Space: Tuple(Box(10,))
Self Observation Space Shape: None
Reward Range: (-inf, inf)
Metadata: {'render.modes': ['rgb_array']}
Initial State: [array([ 3.0437227e+03,  3.5454102e+01,  8.5054102e+02, -1.3631051e-09,
        9.9539620e-01, -7.4505797e-09,  9.5845744e-02,  1.1848356e+03,
        3.5924671e+01,  2.8484259e+02], dtype=float32)]
First Step State: ([array([ 3.0437227e+03,  3.5448639e+01,  8.5054102e+02, -1.3631051e-09,
        9.9539620e-01, -7.4505797e-09,  9.5845744e-02,  1.1848356e+03,
        3.5924671e+01,  2.8484259e+02], dtype=float32)], -1e-04, False, {'step': <mlagents_envs.base_env.DecisionSteps object at 0x000001902D010C40>})
Memory Info
-----------
capacity: 1000
size: 0
seed: 123
Shapes:
s: [(100

2020-10-19 14:49:27 INFO [environment.py:417] Environment shut down with return code 0 (CTRL_C_EVENT).


In [9]:
env.close()

ValueError: Environment not open