# Some *tests* of the inner functionality of the whole dqn package

## (a) Testing unity ml-agents env wrapper

In [None]:
import os
import sys
import numpy as np

from navigation.envs import mlagents

In [None]:
# definitions for full paths to executables
EXEC_BANANA_PATH = os.path.join( os.getcwd(), 'executables/Banana_Linux/Banana.x86_64' )
EXEC_BANANA_NOVIS_PATH = os.path.join( os.getcwd(), 'executables/Banana_Linux_NoVis/Banana.x86_64' )

In [None]:
# initialize the environment
_env = mlagents.createDiscreteActionsEnv( EXEC_BANANA_PATH )
## _env = mlagents.createDiscreteActionsEnv( EXEC_BANANA_NOVIS_PATH )

In [None]:
print( 'numActions: ', _env.numActions )
print( 'obsShape: ', _env.obsShape )

In [None]:
_state = _env.reset( training = False )
print( '_state.shape: ', _state.shape )
print( '_state:' )
_raysState = _state[:-2].reshape( 7, -1 )
print( '_raysState: ' )
print( _raysState )

def process( state ) :
    return state[:-2].reshape( 7, -1 )

In [None]:
_raysState[:,[0,2,4]]

In [None]:
_state = _env.reset( training = False )

while True :
    
    _action = np.random.randint( _env.numActions )
    _snext, _reward, _done, _ = _env.step( 2 )
    
    print( '#######################' )
    print( '_snext: ' )
    print( process( _snext ) )
    print( '_reward: ', _reward )
    print( '_done: ', _done )
    
    if _done :
        break

_env.close()

## Testing the sumtree data structure

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from navigation.dqn.utils import sumtree

In [None]:
smtree = sumtree.SumTree(8)

smtree.add( 'a', 3 )
smtree.add( 'b', 10 )
smtree.add( 'c', 12 )
smtree.add( 'd', 4 )
smtree.add( 'e', 1 )
smtree.add( 'f', 2 )
smtree.add( 'g', 8 )
smtree.add( 'h', 2 )

In [None]:
smtree.getNode(41.99999)

In [None]:
print( 'arr-tree: ', smtree._tree )
print( 'data: ', smtree._data )

In [None]:
smtree = sumtree.SumTree(8)

smtree.add( 'a', 3 )
smtree.add( 'b', 10 )
smtree.add( 'c', 12 )
smtree.add( 'd', 4 )
smtree.add( 'e', 1 )
smtree.add( 'f', 2 )
smtree.add( 'g', 8 )

smtree.total()

In [None]:
smtree._tree

In [None]:
smtree.getNode(39.87)

### testing sampling method

In [None]:
_val = 42. * np.random.random()
_indx, _nodeval, _data = smtree.getNode( _val )
print( 'sampled (%s), with nodeval=%.2f and indx=%i, for value=%.2f' % ( _data, _nodeval, _indx, _val )  )

### checking the probability distribution given by the node values

In [None]:
NSAMPLES = 10000
BUCKETS = [0] * len( smtree._data )
for _ in range( NSAMPLES ) :
    _indx, _, _data = smtree.getNode( 42. * np.random.random() )
    BUCKETS[_indx - len( smtree._data ) + 1] += 1
    
plt.bar( ['a','b','c','d','e','f','g','h'], BUCKETS )
plt.show()    

## Testing the priority buffer for Prioritized Exp. Replay

In [None]:
import numpy as np
from navigation.dqn.utils import prioritybuffer

pbuffer = prioritybuffer.PriorityBuffer( 128, 0 )
pbuffer.add( [0.0, 1.0], 0, [0.1, 1.1], 0, False )

pbuffer._maxpriority

In [None]:
## see here, a warning due to non-zero division (it's taking min over all possible values in data buffer)
## have to fix by keeping the min somewhere else and updated, and then use it for this calculation
## one way: keep a variable for the running min (O(1) to grab, O(BatchSize) to update-> np.min->batch)
## another way: keep a mintree that will return the min value in O(log(n) + k)
_states, _actions, _nextStates, _rewards, _endflags, _indicesBatch, _isWeights = pbuffer.sample( 1 )

print( 'states: ', _states )
print( 'actions: ', _actions )
print( 'nextstates: ', _nextStates )
print( 'rewards: ', _rewards )
print( 'endflags: ', _endflags )
print( 'treeindices: ', _indicesBatch )
print( 'importance sampling weights: ', _isWeights )

In [None]:
pbuffer.add( [0.2, 1.2], 1, [0.1, 1.1], 0, False )

In [None]:
_states, _actions, _nextStates, _rewards, _endflags, _indicesBatch, _isWeights = pbuffer.sample( 1 )

print( 'states: ', _states )
print( 'actions: ', _actions )
print( 'nextstates: ', _nextStates )
print( 'rewards: ', _rewards )
print( 'endflags: ', _endflags )
print( 'treeindices: ', _indicesBatch )
print( 'importance sampling weights: ', _isWeights )

In [None]:
pbuffer.updatePriorities( np.array( [128] ), np.array( [100.] ) )

In [None]:
_states, _actions, _nextStates, _rewards, _endflags, _indicesBatch, _isWeights = pbuffer.sample( 1 )

print( 'states: ', _states )
print( 'actions: ', _actions )
print( 'nextstates: ', _nextStates )
print( 'rewards: ', _rewards )
print( 'endflags: ', _endflags )
print( 'treeindices: ', _indicesBatch )
print( 'importance sampling weights: ', _isWeights )

In [None]:
pbuffer._maxpriority

## Testing segmentrees from various implementations

* Adaptation from openai-baselines + other sources
* OpenAI-baselines

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from navigation.dqn.utils import sumtree
from navigation.dqn.utils import segmentree

In [None]:
smtree1 = sumtree.SumTree(8)
smtree2 = segmentree.SumTree(8)

_nodevals = { 'a' : 3, 'b' : 10, 'c' : 12, 'd' : 4, 'e' : 1, 'f' : 2, 'g' : 8 }

for key in _nodevals.keys() :
    smtree1.add( key, _nodevals[key] )
    smtree2.add( key, _nodevals[key] )


In [None]:
smtree1.total()

In [None]:
smtree2.sum()

In [None]:
smtree2.getNode(40.999998888)

### checking update functionality

In [None]:
# data, tree and sum before the update
print( 'data: ', smtree2._data  )
print( 'tree: ', smtree2._tree )
print( 'sum: ', smtree2.sum() )

# do an update (change 'a' from nodeval 3 to nodeval 5)
smtree2.update( 7, 5 )

# data, tree and sum after the update
print( 'data: ', smtree2._data  )
print( 'tree: ', smtree2._tree )
print( 'sum: ', smtree2.sum() )

### checking the probability distribution generated from both

In [None]:
NSAMPLES = 100000
BUCKETS1 = [0] * len( smtree1._data )
BUCKETS2 = [0] * len( smtree2._data )
for _ in range( NSAMPLES ) :
    _rnd = np.random.random()
    
    _indx, _, _data = smtree1.getNode( smtree1.total() * _rnd )
    BUCKETS1[_indx - len( smtree1._data ) + 1] += 1
    
    _indx, _, _data = smtree2.getNode( smtree2.sum() * _rnd )
    BUCKETS2[_indx - len( smtree2._data ) + 1] += 1
    
fig1, ax1 = plt.subplots()
ax1.bar( ['a','b','c','d','e','f','g','h'], BUCKETS1 )

fig2, ax2 = plt.subplots()
ax2.bar( ['a','b','c','d','e','f','g','h'], BUCKETS2 )
plt.show()    

### testing the mintree

In [None]:
mintree = segmentree.MinTree(8)
mintree.add( 'a', 1 )
mintree.add( 'b', 3 )
mintree.add( 'c', 5 )
mintree.add( 'd', 0 )

print( 'min: ', mintree.min() )

mintree.add( 'e', -1 )

print( 'min: ', mintree.min() )

mintree.add( 'f', 10 )

print( 'min: ', mintree.min() )

### testing openai-baselines segmentree

In [None]:
from navigation.dqn.utils import openai_segmentree as op_segmentree

In [None]:
# a simple test case
smtree3 = op_segmentree.SumSegmentTree(8)

smtree3[0] = 3
smtree3[1] = 10
smtree3[2] = 12
smtree3[3] = 4
smtree3[4] = 1
smtree3[5] = 2
smtree3[6] = 8
smtree3[7] = 2

print( 'sum: ', smtree3.sum() )

In [None]:
_sum1 = smtree1.total()
_sum2 = smtree2.sum()
_sum3 = smtree3.sum()

assert (_sum1 == _sum2) and (_sum2 == _sum3), 'ERROR> sumtrees should return same total cumsum'

_nodeval = _sum1 * np.random.random()

_indx1, _, _ = smtree1.getNode( _nodeval )
_indx2, _, _ = smtree2.getNode( _nodeval )
_indx3 = smtree3.find_prefixsum_idx( _nodeval ) + smtree3._capacity - 1

print( 'indx1: ', _indx1 )
print( 'indx2: ', _indx2 )
print( 'indx3: ', _indx3 )

## Testing image conversion (rgb->hsv)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

from matplotlib.colors import rgb_to_hsv
from matplotlib.colors import hsv_to_rgb

In [None]:
## let's use a simple image with pure r,g or b color
N = 5

# pure channels of NxN
_allOn   = np.ones( (N,N) )
_allOff = np.zeros( (N,N) )

# create the images themselves (tranpose them to (h,w,d) for matplotlib)
_redRgbImage   = np.transpose( np.stack( [_allOn, _allOff, _allOff] ), (1,2,0) )
_greenRgbImage = np.transpose( np.stack( [_allOff, _allOn, _allOff] ), (1,2,0) )
_blueRgbImage  = np.transpose( np.stack( [_allOff, _allOff, _allOn] ), (1,2,0) )
_blackRgbImage = np.transpose( np.stack( [_allOff, _allOff, _allOff] ), (1,2,0) )
_whiteRgbImage = np.transpose( np.stack( [_allOn, _allOn, _allOn] ), (1,2,0) )

print( 'checking images dtype and shape -----------------' )
print( 'red.shape   : ', _redRgbImage.shape )
print( 'green.shape : ', _greenRgbImage.shape )
print( 'blue.shape  : ', _blueRgbImage.shape )
print( 'black.shape : ', _blackRgbImage.shape )
print( 'white.shape : ', _whiteRgbImage.shape )

print( 'red.shape   : ', _redRgbImage.dtype )
print( 'green.shape : ', _greenRgbImage.dtype )
print( 'blue.shape  : ', _blueRgbImage.dtype )
print( 'black.shape : ', _blackRgbImage.dtype )
print( 'white.shape : ', _whiteRgbImage.dtype )
print( '-------------------------------------------------' )

plt.grid( False )

plt.subplot(151)
plt.imshow( _redRgbImage )

plt.subplot(152)
plt.imshow( _greenRgbImage )

plt.subplot(153)
plt.imshow( _blueRgbImage )

plt.subplot(154)
plt.imshow( _blackRgbImage )

plt.subplot(155)
plt.imshow( _whiteRgbImage )

In [None]:
# convert to hsv
_redHsvImage    = rgb_to_hsv( _redRgbImage )
_greenHsvImage  = rgb_to_hsv( _greenRgbImage )
_blueHsvImage   = rgb_to_hsv( _blueRgbImage )
_blackHsvImage  = rgb_to_hsv( _blackRgbImage )
_whiteHsvImage = rgb_to_hsv( _whiteRgbImage )

print( _redHsvImage[...,0] )
print( _greenHsvImage[...,0] )
print( _blueHsvImage[...,0] )
print( _blackHsvImage[...,0] )
print( _whiteHsvImage[...,0] )

print( _redHsvImage[...,0].shape )

plt.subplot(151)
plt.imshow( _redHsvImage[...,0], cmap = 'gray', vmin = 0., vmax = 1. )

plt.subplot(152)
plt.imshow( _greenHsvImage[...,0], cmap = 'gray', vmin = 0., vmax = 1. )

plt.subplot(153)
plt.imshow( _blueHsvImage[...,0], cmap = 'gray', vmin = 0., vmax = 1. )

plt.subplot(154)
plt.imshow( _blackHsvImage[...,0], cmap = 'gray', vmin = 0., vmax = 1. )

plt.subplot(155)
plt.imshow( _whiteHsvImage[...,0], cmap = 'gray', vmin = 0., vmax = 1. )

## Testing the visual-banana environment

In [1]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt

from tqdm import tqdm
from matplotlib.colors import rgb_to_hsv
from navigation.envs import mlagents

In [2]:
# visual-banana executable path
EXEC_VISUAL_BANANA_PATH = os.path.join( os.getcwd(), 'executables/VisualBanana/VisualBanana.x86_64' )

In [3]:
# initialize the environment and analyze it a bit
_env = mlagents.createDiscreteActionsEnv( EXEC_VISUAL_BANANA_PATH, envType = 'visual', workerID = 0 )

INFO:mlagents.envs:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of Training Brains : 1
        Reset Parameters :
		
Unity brain name: VisualBananaLearning
        Number of Visual Observations (per agent): 1
        Vector Observation space size (per agent): 0
        Number of stacked Vector Observation: 1
        Vector Action space type: discrete
        Vector Action space size (per agent): [3, 3, 3, 2]
        Vector Action descriptions: , , , 


In [4]:
# analyze some info from the environment
print( 'observation space shape: ', _env.obsShape )
print( 'number of actions: ', _env.numActions )

observation space shape:  (3, 84, 84)
number of actions:  4


In [5]:
# reset the environment and check the observations
_state = _env.reset( training = False )

print( 'type(state) : ', type(_state) )
print( 'state.shape : ', _state.shape )
print( 'state.dtype : ', _state.dtype )
print( 'state.max   : ', np.max( _state ) )
print( 'state.min   : ', np.min( _state ) )

type(state) :  <class 'numpy.ndarray'>
state.shape :  (3, 84, 84)
state.dtype :  float64
state.max   :  1.0
state.min   :  0.06666666666666667


In [None]:
# run the environment for an episode
for _ in tqdm( range( 1000 ) ) :
    _state = _env.reset( training = True )
    
    while True :
    
        _action = np.random.randint( _env.numActions )
        _snext, _reward, _done, _ = _env.step( 0 )
    
        if _done :
            break

  0%|          | 0/1000 [00:00<?, ?it/s]

> [0;32m/home/gregor/Documents/wilbert/repos/DeeprlND-projects/project1-navigation/navigation/envs/mlagents.py[0m(192)[0;36mstep[0;34m()[0m
[0;32m    190 [0;31m[0;34m[0m[0m
[0m[0;32m    191 [0;31m        [0;31m# grab the required information fron the step-info object[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 192 [0;31m        _observations   = np.transpose( _stepInfo.visual_observations[0][0],
[0m[0;32m    193 [0;31m                                        (2, 0, 1) )
[0m[0;32m    194 [0;31m[0;34m[0m[0m
[0m
ipdb> dir(_stepInfo)
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'action_masks', 'agents', 'custom_observations', 'from_agent_proto', 'local_done', 

### checking the hsv conversion on the observations

In [None]:
_state = _env.reset( training = False )
_hsvState = rgb_to_hsv( np.transpose( _state, (1,2,0) ) )

def rgb2gray(rgb):
    return 0.299 * rgb[0,...] + 0.587 * rgb[1,...] + 0.114 * rgb[2,...]

_grayState = rgb2gray(_state)
print( 'graystate.shape: ', _grayState.shape )
print( _grayState[2] )

plt.figure( figsize=(15,15) )
plt.subplot(151)
plt.imshow( np.transpose( _state, (1,2,0) ) )
plt.title( 'rgb-original' )

plt.subplot(152)
plt.imshow( _hsvState[...,0], cmap = 'gray', vmin = 0., vmax = 1. )
plt.title( 'hue-channel' )

plt.subplot(153)
plt.imshow( _hsvState[...,1], cmap = 'gray', vmin = 0., vmax = 1. )
plt.title( 'saturation-channel' )

plt.subplot(154)
plt.imshow( _hsvState[...,2], cmap = 'gray', vmin = 0., vmax = 1. )
plt.title( 'value-channel' )

plt.subplot(155)
plt.imshow( _grayState, cmap = 'gray', vmin = 0., vmax = 1. )
plt.title( 'grayscale' )