In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys, os
sys.path.append(os.getcwd() + '/..')
import src

# Experiment setup
* Here we basically just load the game and check that it works

In [3]:
from __future__ import print_function 
import matplotlib.pyplot as plt
import numpy as np
import gym
from tqdm import tqdm

%matplotlib inline
%env THEANO_FLAGS="floatX=float32"

env: THEANO_FLAGS="floatX=float32"


In [4]:
# np.random.seed(seed=20)
# np.random.seed(seed=42)


In [5]:
GAME = "FrontoPolarStocks-v0"

N_AGENTS = 1
SEQ_LENGTH = 25

In [6]:
env = gym.make(GAME)
env.reset()

action_shape = (env.action_space.num_discrete_space,)
action_emb_shape = (*action_shape, 3)
state_shape = env.observation_space.shape

random_action = np.random.randint(low=0, high=3, size=action_shape)

state, _, _, _ = env.step(random_action)

action_names = np.array(["sell", "pass", "buy"]) #i guess so... i may be wrong

print(state)

[2017-09-05 00:00:27,258] Making new env: FrontoPolarStocks-v0


[ 136.010002   17.64    ]


# Basic agent setup
Here we define a simple agent that maps game images into Qvalues using shallow neural network.


In [7]:
import theano
from theano import tensor as T
import lasagne

theano.config.exception_verbosity = 'high'
theano.config.optimizer = 'fast_compile'

In [8]:
from agent.agent import build_agent

In [9]:
agent, action_layer, V_layer = build_agent(action_emb_shape, state_shape)

In [10]:
agent.action_layers[0].output_shape

(None, 2)

In [11]:
#Since it's a single lasagne network, one can get it's weights, output, etc
weights = lasagne.layers.get_all_params((action_layer,V_layer),trainable=True)

# Create and manage a pool of atari sessions to play with

* To make training more stable, we shall have an entire batch of game sessions each happening independent of others
* Why several parallel agents help training: http://arxiv.org/pdf/1602.01783v1.pdf
* Alternative approach: store more sessions: https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf

In [12]:
from agentnet.experiments.openai_gym.pool import EnvPool

pool = EnvPool(agent, GAME, N_AGENTS, max_size=10000)

[2017-09-05 00:00:28,805] Making new env: FrontoPolarStocks-v0


In [13]:
_,action_log,reward_log,_,_,_  = pool.interact(10)


print(action_names[action_log])
print(reward_log)

[[['buy' 'sell']
  ['sell' 'buy']
  ['buy' 'buy']
  ['buy' 'buy']
  ['buy' 'buy']
  ['sell' 'buy']
  ['buy' 'buy']
  ['sell' 'buy']
  ['buy' 'sell']
  ['pass' 'buy']]]
[[ -1.72505901  -0.56255999  -1.78435997   2.40297003   9.784077
    1.67338601  17.08936      5.827854    -3.28881197   0.        ]]


In [14]:
#load first sessions (this function calls interact and remembers sessions)
pool.update(SEQ_LENGTH)

# a2c loss

Here we define obective function for actor-critic (one-step) RL.

* We regularize policy with expected inverse action probabilities (discouraging very small probas) to make objective numerically stable


In [15]:
#get agent's Qvalues obtained via experience replay
replay = pool.experience_replay.sample_session_batch(100, replace=True)

_,_,_,_,(policy_seq,V_seq) = agent.get_sessions(
    replay,
    session_length=SEQ_LENGTH,
    experience_replay=True,
)


In [16]:
from agent.train_step import get_train_step

# replay.rewards = T.printing.Print(">>>>>>>>>> replay.rewards: ")(replay.rewards)

train_step = get_train_step(policy_seq, V_seq, weights, replay)

# Demo run

In [17]:
untrained_reward = pool.evaluate(save_path="./records", record_video=True)

[2017-09-05 00:00:32,977] Making new env: FrontoPolarStocks-v0
[2017-09-05 00:00:33,022] Clearing 4 monitor files from previous run (because force=True was provided)
[2017-09-05 00:00:33,024] Starting new video recorder writing to /home/manatee/Desktop/inn.prac/Algorithms/src/records/openaigym.video.0.2122.video000000.mp4
[2017-09-05 00:00:36,647] Finished writing results. You can upload them to the scoreboard via gym.upload('/home/manatee/Desktop/inn.prac/Algorithms/src/records')


Episode finished after 200 timesteps with reward=14311.205137728051


In [18]:
# from IPython.display import HTML

# video_path="records/openaigym.video.0.29209.video000000.mp4"

# HTML("""
# <video width="640" height="480" controls>
#   <source src="{}" type="video/mp4">
# </video>
# """.format(video_path))

# Vime

In [19]:
from agent.bnn import BNN
# print("action_emb_shape: ", action_emb_shape)
bnn = BNN(state_shape, action_shape, action_emb_shape, replay)

A:  (None, 2)
A_emb:  (None, 6)
S:  (None, 2)
Concat:  (None, 8)
l_dense:  :  (None, 8)  ->  (None, 50)
l_out:  :  (None, 50)  ->  (None, 2)


# Training loop

In [20]:
#starting epoch
epoch_counter = 1

#full game rewards
rewards = {epoch_counter: untrained_reward}

In [21]:
#pre-fill pool
for i in tqdm(range(100)):
# for i in tqdm(range(10)):
    pool.update(SEQ_LENGTH, append=True, preprocess=bnn.add_vime_reward)
#     pool.update(SEQ_LENGTH, append=True)

100%|██████████| 100/100 [00:06<00:00, 16.72it/s]


In [22]:
#pre-train BNN (mitigate training lag on first iterations where BNN is stupid)

for i in tqdm(range(100)):
# for i in tqdm(range(10)):
    from_pool = bnn.sample_from_pool()
    bnn.train_step(*from_pool)

100%|██████████| 100/100 [00:00<00:00, 204.96it/s]


In [23]:
#the loop may take eons to finish.
#consider interrupting early.
loss = 0

for i in tqdm(range(1000)):
# for i in tqdm(range(20)):
# for i in tqdm(range(1)):
    #train
    for i in range(10):
        pool.update(SEQ_LENGTH, append=True, preprocess=bnn.add_vime_reward)

    for i in range(10):
        new_loss = train_step()
        loss = loss * 0.99 + new_loss * 0.01
    
    for i in range(10):
        bnn.train_step(*bnn.sample_from_pool())

#     if epoch_counter % 100 == 0:
#         print("Loss: ", loss)

    if epoch_counter % 100 == 0:
        #average reward per game tick in current experience replay pool
        pool_mean_reward = np.average(pool.experience_replay.rewards.get_value()[:,:-1],
                                      weights=1 + pool.experience_replay.is_alive.get_value()[:,:-1])
        pool_size = pool.experience_replay.rewards.get_value().shape[0]
        print("iter=%i\treward/step=%.5f\tpool_size=%i\tvime ma=%.5f"%(epoch_counter,
                                                         pool_mean_reward,
                                                         pool_size,
                                                         bnn.vime_reward_ma))
        

    ##record current learning progress and show learning curves
    if epoch_counter % 500 == 0:
        n_games = 10
        rewards[epoch_counter] = pool.evaluate(
            record_video=False,
            n_games=n_games,
            verbose=False)
        print("Current score(mean over %i) = %.3f"%(n_games,np.mean(rewards[epoch_counter])))
    
    
    epoch_counter  +=1

    
# Time to drink some coffee!

  0%|          | 0/1000 [00:00<?, ?it/s]

>>>> policy_seq;  __str__ = [[[ 0.14181072  0.15308765  0.18365371  0.26436192  0.04178216  0.21530384]
  [ 0.14181072  0.15308765  0.18365371  0.26436192  0.04178216  0.21530384]
  [ 0.14181072  0.15308765  0.18365371  0.26436192  0.04178216  0.21530384]
  ..., 
  [ 0.14181072  0.15308765  0.18365371  0.26436192  0.04178216  0.21530384]
  [ 0.14181072  0.15308765  0.18365371  0.26436192  0.04178216  0.21530384]
  [ 0.14181072  0.15308765  0.18365371  0.26436192  0.04178216  0.21530384]]

 [[ 0.13323706  0.09744211  0.28547591  0.18515025  0.03576923  0.26292545]
  [ 0.13365018  0.09827401  0.28435853  0.18608937  0.03607522  0.26155266]
  [ 0.13347824  0.097927    0.28482494  0.18569791  0.03594777  0.26212415]
  ..., 
  [ 0.13385974  0.09869754  0.28378448  0.18656889  0.03622926  0.26086009]
  [ 0.13417967  0.09935572  0.28289679  0.18730903  0.03646975  0.25978902]
  [ 0.13428341  0.09957019  0.28260872  0.1875491   0.03654831  0.25944027]]

 [[ 0.14181072  0.15308765  0.18365371  




IndexError: shape mismatch: indexing arrays could not be broadcast together with shapes (100,1) (1,25) (100,25,2) 
Apply node that caused the error: AdvancedSubtensor(Elemwise{log,no_inplace}.0, InplaceDimShuffle{0,x}.0, InplaceDimShuffle{x,0}.0, AdvancedSubtensor1.0)
Toposort index: 518
Inputs types: [TensorType(float32, 3D), TensorType(int64, col), TensorType(int64, row), TensorType(int32, 3D)]
Inputs shapes: [(100, 25, 6), (100, 1), (1, 25), (100, 25, 2)]
Inputs strides: [(600, 24, 4), (8, 8), (200, 8), (200, 8, 4)]
Inputs values: ['not shown', 'not shown', 'not shown', 'not shown']
Inputs type_num: [11, 7, 7, 5]
Outputs clients: [[Elemwise{neg,no_inplace}(AdvancedSubtensor.0)]]

Backtrace when the node is created(use Theano flag traceback.limit=N to make it longer):
  File "/home/manatee/anaconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 501, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/manatee/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/manatee/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/manatee/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-16-490dc9d7dd04>", line 5, in <module>
    train_step = get_train_step(policy_seq, V_seq, weights, replay)
  File "/home/manatee/Desktop/inn.prac/Algorithms/src/agent/train_step.py", line 20, in get_train_step
    gamma_or_gammas=0.99)
  File "/home/manatee/anaconda3/lib/python3.6/site-packages/agentnet/learning/a2c.py", line 140, in get_elementwise_objective
    action_logprobas = get_values_for_actions(logpolicy, actions) if logpolicy.ndim == 3 else logpolicy
  File "/home/manatee/anaconda3/lib/python3.6/site-packages/agentnet/learning/generic.py", line 265, in get_values_for_actions
    action_values_predicted = values_for_all_actions[batch_i, time_i, actions]

Debugprint of the apply node: 
AdvancedSubtensor [id A] <TensorType(float32, 3D)> ''   
 |Elemwise{log,no_inplace} [id B] <TensorType(float32, 3D)> ''   
 | |Print{message='>>>> policy_seq; ', attrs=('__str__',), global_fn=<function _print_fn at 0x7f6ce6096a60>} [id C] <TensorType(float32, 3D)> ''   
 |   |InplaceDimShuffle{1,0,2} [id D] <TensorType(float32, 3D)> ''   
 |     |Join [id E] <TensorType(float32, 3D)> ''   
 |       |TensorConstant{0} [id F] <TensorType(int8, scalar)>
 |       |InplaceDimShuffle{x,0,1} [id G] <TensorType(float32, (True, False, False))> ''   
 |       | |SoftmaxWithBias [id H] <TensorType(float32, matrix)> ''   
 |       |   |Dot22 [id I] <TensorType(float32, matrix)> ''   
 |       |   | |sigmoid [id J] <TensorType(float32, matrix)> ''   
 |       |   | | |Elemwise{add,no_inplace} [id K] <TensorType(float32, matrix)> ''   
 |       |   | |   |Dot22 [id L] <TensorType(float32, matrix)> ''   
 |       |   | |   | |Subtensor{int64} [id M] <TensorType(float32, matrix)> ''   
 |       |   | |   | | |InplaceDimShuffle{1,0,2} [id N] <TensorType(float32, 3D)> ''   
 |       |   | |   | | | |AdvancedSubtensor1 [id O] <TensorType(float32, 3D)> ''   
 |       |   | |   | | |   |sessions.observations_history.0 [id P] <TensorType(float32, 3D)>
 |       |   | |   | | |   |RandomFunction{choice_helper}.1 [id Q] <TensorType(int32, vector)> ''   
 |       |   | |   | | |     |<RandomStateType> [id R] <RandomStateType>
 |       |   | |   | | |     |TensorConstant{(1,) of 100} [id S] <TensorType(int64, vector)>
 |       |   | |   | | |     |Subtensor{int64} [id T] <TensorType(int64, scalar)> ''   
 |       |   | |   | | |     | |Shape [id U] <TensorType(int64, vector)> ''   
 |       |   | |   | | |     | | |session.rewards_history [id V] <TensorType(float32, matrix)>
 |       |   | |   | | |     | |Constant{0} [id W] <int64>
 |       |   | |   | | |     |TensorConstant{1} [id X] <TensorType(int8, scalar)>
 |       |   | |   | | |     |TensorConstant{[]} [id Y] <TensorType(float32, vector)>
 |       |   | |   | | |Constant{0} [id W] <int64>
 |       |   | |   | |dense1.W [id Z] <TensorType(float32, matrix)>
 |       |   | |   |InplaceDimShuffle{x,0} [id BA] <TensorType(float32, row)> ''   
 |       |   | |     |dense1.b [id BB] <TensorType(float32, vector)>
 |       |   | |q-evaluator layer.W [id BC] <TensorType(float32, matrix)>
 |       |   |q-evaluator layer.b [id BD] <TensorType(float32, vector)>
 |       |InplaceDimShuffle{x,0,1} [id BE] <TensorType(float32, (True, False, False))> ''   
 |       | |SoftmaxWithBias [id BF] <TensorType(float32, matrix)> ''   
 |       |   |Dot22 [id BG] <TensorType(float32, matrix)> ''   
 |       |   | |sigmoid [id BH] <TensorType(float32, matrix)> ''   
 |       |   | | |Elemwise{add,no_inplace} [id BI] <TensorType(float32, matrix)> ''   
 |       |   | |   |Dot22 [id BJ] <TensorType(float32, matrix)> ''   
 |       |   | |   | |Subtensor{int64} [id BK] <TensorType(float32, matrix)> ''   
 |       |   | |   | | |InplaceDimShuffle{1,0,2} [id N] <TensorType(float32, 3D)> ''   
 |       |   | |   | | |Constant{1} [id BL] <int64>
 |       |   | |   | |dense1.W [id Z] <TensorType(float32, matrix)>
 |       |   | |   |InplaceDimShuffle{x,0} [id BA] <TensorType(float32, row)> ''   
 |       |   | |q-evaluator layer.W [id BC] <TensorType(float32, matrix)>
 |       |   |q-evaluator layer.b [id BD] <TensorType(float32, vector)>
 |       |InplaceDimShuffle{x,0,1} [id BM] <TensorType(float32, (True, False, False))> ''   
 |       | |SoftmaxWithBias [id BN] <TensorType(float32, matrix)> ''   
 |       |   |Dot22 [id BO] <TensorType(float32, matrix)> ''   
 |       |   | |sigmoid [id BP] <TensorType(float32, matrix)> ''   
 |       |   | | |Elemwise{add,no_inplace} [id BQ] <TensorType(float32, matrix)> ''   
 |       |   | |   |Dot22 [id BR] <TensorType(float32, matrix)> ''   
 |       |   | |   | |Subtensor{int64} [id BS] <TensorType(float32, matrix)> ''   
 |       |   | |   | | |InplaceDimShuffle{1,0,2} [id N] <TensorType(float32, 3D)> ''   
 |       |   | |   | | |Constant{2} [id BT] <int64>
 |       |   | |   | |dense1.W [id Z] <TensorType(float32, matrix)>
 |       |   | |   |InplaceDimShuffle{x,0} [id BA] <TensorType(float32, row)> ''   
 |       |   | |q-evaluator layer.W [id BC] <TensorType(float32, matrix)>
 |       |   |q-evaluator layer.b [id BD] <TensorType(float32, vector)>
 |       |InplaceDimShuffle{x,0,1} [id BU] <TensorType(float32, (True, False, False))> ''   
 |       | |SoftmaxWithBias [id BV] <TensorType(float32, matrix)> ''   
 |       |   |Dot22 [id BW] <TensorType(float32, matrix)> ''   
 |       |   | |sigmoid [id BX] <TensorType(float32, matrix)> ''   
 |       |   | | |Elemwise{add,no_inplace} [id BY] <TensorType(float32, matrix)> ''   
 |       |   | |   |Dot22 [id BZ] <TensorType(float32, matrix)> ''   
 |       |   | |   | |Subtensor{int64} [id CA] <TensorType(float32, matrix)> ''   
 |       |   | |   | | |InplaceDimShuffle{1,0,2} [id N] <TensorType(float32, 3D)> ''   
 |       |   | |   | | |Constant{3} [id CB] <int64>
 |       |   | |   | |dense1.W [id Z] <TensorType(float32, matrix)>
 |       |   | |   |InplaceDimShuffle{x,0} [id BA] <TensorType(float32, row)> ''   
 |       |   | |q-evaluator layer.W [id BC] <TensorType(float32, matrix)>
 |       |   |q-evaluator layer.b [id BD] <TensorType(float32, vector)>
 |       |InplaceDimShuffle{x,0,1} [id CC] <TensorType(float32, (True, False, False))> ''   
 |       | |SoftmaxWithBias [id CD] <TensorType(float32, matrix)> ''   
 |       |   |Dot22 [id CE] <TensorType(float32, matrix)> ''   
 |       |   | |sigmoid [id CF] <TensorType(float32, matrix)> ''   
 |       |   | | |Elemwise{add,no_inplace} [id CG] <TensorType(float32, matrix)> ''   
 |       |   | |   |Dot22 [id CH] <TensorType(float32, matrix)> ''   
 |       |   | |   | |Subtensor{int64} [id CI] <TensorType(float32, matrix)> ''   
 |       |   | |   | | |InplaceDimShuffle{1,0,2} [id N] <TensorType(float32, 3D)> ''   
 |       |   | |   | | |Constant{4} [id CJ] <int64>
 |       |   | |   | |dense1.W [id Z] <TensorType(float32, matrix)>
 |       |   | |   |InplaceDimShuffle{x,0} [id BA] <TensorType(float32, row)> ''   
 |       |   | |q-evaluator layer.W [id BC] <TensorType(float32, matrix)>
 |       |   |q-evaluator layer.b [id BD] <TensorType(float32, vector)>
 |       |InplaceDimShuffle{x,0,1} [id CK] <TensorType(float32, (True, False, False))> ''   
 |       | |SoftmaxWithBias [id CL] <TensorType(float32, matrix)> ''   
 |       |   |Dot22 [id CM] <TensorType(float32, matrix)> ''   
 |       |   | |sigmoid [id CN] <TensorType(float32, matrix)> ''   
 |       |   | | |Elemwise{add,no_inplace} [id CO] <TensorType(float32, matrix)> ''   
 |       |   | |   |Dot22 [id CP] <TensorType(float32, matrix)> ''   
 |       |   | |   | |Subtensor{int64} [id CQ] <TensorType(float32, matrix)> ''   
 |       |   | |   | | |InplaceDimShuffle{1,0,2} [id N] <TensorType(float32, 3D)> ''   
 |       |   | |   | | |Constant{5} [id CR] <int64>
 |       |   | |   | |dense1.W [id Z] <TensorType(float32, matrix)>
 |       |   | |   |InplaceDimShuffle{x,0} [id BA] <TensorType(float32, row)> ''   
 |       |   | |q-evaluator layer.W [id BC] <TensorType(float32, matrix)>
 |       |   |q-evaluator layer.b [id BD] <TensorType(float32, vector)>
 |       |InplaceDimShuffle{x,0,1} [id CS] <TensorType(float32, (True, False, False))> ''   
 |       | |SoftmaxWithBias [id CT] <TensorType(float32, matrix)> ''   
 |       |   |Dot22 [id CU] <TensorType(float32, matrix)> ''   
 |       |   | |sigmoid [id CV] <TensorType(float32, matrix)> ''   
 |       |   | | |Elemwise{add,no_inplace} [id CW] <TensorType(float32, matrix)> ''   
 |       |   | |   |Dot22 [id CX] <TensorType(float32, matrix)> ''   
 |       |   | |   | |Subtensor{int64} [id CY] <TensorType(float32, matrix)> ''   
 |       |   | |   | | |InplaceDimShuffle{1,0,2} [id N] <TensorType(float32, 3D)> ''   
 |       |   | |   | | |Constant{6} [id CZ] <int64>
 |       |   | |   | |dense1.W [id Z] <TensorType(float32, matrix)>
 |       |   | |   |InplaceDimShuffle{x,0} [id BA] <TensorType(float32, row)> ''   
 |       |   | |q-evaluator layer.W [id BC] <TensorType(float32, matrix)>
 |       |   |q-evaluator layer.b [id BD] <TensorType(float32, vector)>
 |       |InplaceDimShuffle{x,0,1} [id DA] <TensorType(float32, (True, False, False))> ''   
 |       | |SoftmaxWithBias [id DB] <TensorType(float32, matrix)> ''   
 |       |   |Dot22 [id DC] <TensorType(float32, matrix)> ''   
 |       |   | |sigmoid [id DD] <TensorType(float32, matrix)> ''   
 |       |   | | |Elemwise{add,no_inplace} [id DE] <TensorType(float32, matrix)> ''   
 |       |   | |   |Dot22 [id DF] <TensorType(float32, matrix)> ''   
 |       |   | |   | |Subtensor{int64} [id DG] <TensorType(float32, matrix)> ''   
 |       |   | |   | | |InplaceDimShuffle{1,0,2} [id N] <TensorType(float32, 3D)> ''   
 |       |   | |   | | |Constant{7} [id DH] <int64>
 |       |   | |   | |dense1.W [id Z] <TensorType(float32, matrix)>
 |       |   | |   |InplaceDimShuffle{x,0} [id BA] <TensorType(float32, row)> ''   
 |       |   | |q-evaluator layer.W [id BC] <TensorType(float32, matrix)>
 |       |   |q-evaluator layer.b [id BD] <TensorType(float32, vector)>
 |       |InplaceDimShuffle{x,0,1} [id DI] <TensorType(float32, (True, False, False))> ''   
 |       | |SoftmaxWithBias [id DJ] <TensorType(float32, matrix)> ''   
 |       |   |Dot22 [id DK] <TensorType(float32, matrix)> ''   
 |       |   | |sigmoid [id DL] <TensorType(float32, matrix)> ''   
 |       |   | | |Elemwise{add,no_inplace} [id DM] <TensorType(float32, matrix)> ''   
 |       |   | |   |Dot22 [id DN] <TensorType(float32, matrix)> ''   
 |       |   | |   | |Subtensor{int64} [id DO] <TensorType(float32, matrix)> ''   
 |       |   | |   | | |InplaceDimShuffle{1,0,2} [id N] <TensorType(float32, 3D)> ''   
 |       |   | |   | | |Constant{8} [id DP] <int64>
 |       |   | |   | |dense1.W [id Z] <TensorType(float32, matrix)>
 |       |   | |   |InplaceDimShuffle{x,0} [id BA] <TensorType(float32, row)> ''   
 |       |   | |q-evaluator layer.W [id BC] <TensorType(float32, matrix)>
 |       |   |q-evaluator layer.b [id BD] <TensorType(float32, vector)>
 |       |InplaceDimShuffle{x,0,1} [id DQ] <TensorType(float32, (True, False, False))> ''   
 |       | |SoftmaxWithBias [id DR] <TensorType(float32, matrix)> ''   
 |       |   |Dot22 [id DS] <TensorType(float32, matrix)> ''   
 |       |   | |sigmoid [id DT] <TensorType(float32, matrix)> ''   
 |       |   | | |Elemwise{add,no_inplace} [id DU] <TensorType(float32, matrix)> ''   
 |       |   | |   |Dot22 [id DV] <TensorType(float32, matrix)> ''   
 |       |   | |   | |Subtensor{int64} [id DW] <TensorType(float32, matrix)> ''   
 |       |   | |   | | |InplaceDimShuffle{1,0,2} [id N] <TensorType(float32, 3D)> ''   
 |       |   | |   | | |Constant{9} [id DX] <int64>
 |       |   | |   | |dense1.W [id Z] <TensorType(float32, matrix)>
 |       |   | |   |InplaceDimShuffle{x,0} [id BA] <TensorType(float32, row)> ''   
 |       |   | |q-evaluator layer.W [id BC] <TensorType(float32, matrix)>
 |       |   |q-evaluator layer.b [id BD] <TensorType(float32, vector)>
 |       |InplaceDimShuffle{x,0,1} [id DY] <TensorType(float32, (True, False, False))> ''   
 |       | |SoftmaxWithBias [id DZ] <TensorType(float32, matrix)> ''   
 |       |   |Dot22 [id EA] <TensorType(float32, matrix)> ''   
 |       |   | |sigmoid [id EB] <TensorType(float32, matrix)> ''   
 |       |   | | |Elemwise{add,no_inplace} [id EC] <TensorType(float32, matrix)> ''   
 |       |   | |   |Dot22 [id ED] <TensorType(float32, matrix)> ''   
 |       |   | |   | |Subtensor{int64} [id EE] <TensorType(float32, matrix)> ''   
 |       |   | |   | | |InplaceDimShuffle{1,0,2} [id N] <TensorType(float32, 3D)> ''   
 |       |   | |   | | |Constant{10} [id EF] <int64>
 |       |   | |   | |dense1.W [id Z] <TensorType(float32, matrix)>
 |       |   | |   |InplaceDimShuffle{x,0} [id BA] <TensorType(float32, row)> ''   
 |       |   | |q-evaluator layer.W [id BC] <TensorType(float32, matrix)>
 |       |   |q-evaluator layer.b [id BD] <TensorType(float32, vector)>
 |       |InplaceDimShuffle{x,0,1} [id EG] <TensorType(float32, (True, False, False))> ''   
 |       | |SoftmaxWithBias [id EH] <TensorType(float32, matrix)> ''   
 |       |   |Dot22 [id EI] <TensorType(float32, matrix)> ''   
 |       |   | |sigmoid [id EJ] <TensorType(float32, matrix)> ''   
 |       |   | | |Elemwise{add,no_inplace} [id EK] <TensorType(float32, matrix)> ''   
 |       |   | |   |Dot22 [id EL] <TensorType(float32, matrix)> ''   
 |       |   | |   | |Subtensor{int64} [id EM] <TensorType(float32, matrix)> ''   
 |       |   | |   | | |InplaceDimShuffle{1,0,2} [id N] <TensorType(float32, 3D)> ''   
 |       |   | |   | | |Constant{11} [id EN] <int64>
 |       |   | |   | |dense1.W [id Z] <TensorType(float32, matrix)>
 |       |   | |   |InplaceDimShuffle{x,0} [id BA] <TensorType(float32, row)> ''   
 |       |   | |q-evaluator layer.W [id BC] <TensorType(float32, matrix)>
 |       |   |q-evaluator layer.b [id BD] <TensorType(float32, vector)>
 |       |InplaceDimShuffle{x,0,1} [id EO] <TensorType(float32, (True, False, False))> ''   
 |       | |SoftmaxWithBias [id EP] <TensorType(float32, matrix)> ''   
 |       |   |Dot22 [id EQ] <TensorType(float32, matrix)> ''   
 |       |   | |sigmoid [id ER] <TensorType(float32, matrix)> ''   
 |       |   | | |Elemwise{add,no_inplace} [id ES] <TensorType(float32, matrix)> ''   
 |       |   | |   |Dot22 [id ET] <TensorType(float32, matrix)> ''   
 |       |   | |   | |Subtensor{int64} [id EU] <TensorType(float32, matrix)> ''   
 |       |   | |   | | |InplaceDimShuffle{1,0,2} [id N] <TensorType(float32, 3D)> ''   
 |       |   | |   | | |Constant{12} [id EV] <int64>
 |       |   | |   | |dense1.W [id Z] <TensorType(float32, matrix)>
 |       |   | |   |InplaceDimShuffle{x,0} [id BA] <TensorType(float32, row)> ''   
 |       |   | |q-evaluator layer.W [id BC] <TensorType(float32, matrix)>
 |       |   |q-evaluator layer.b [id BD] <TensorType(float32, vector)>
 |       |InplaceDimShuffle{x,0,1} [id EW] <TensorType(float32, (True, False, False))> ''   
 |       | |SoftmaxWithBias [id EX] <TensorType(float32, matrix)> ''   
 |       |   |Dot22 [id EY] <TensorType(float32, matrix)> ''   
 |       |   | |sigmoid [id EZ] <TensorType(float32, matrix)> ''   
 |       |   | | |Elemwise{add,no_inplace} [id FA] <TensorType(float32, matrix)> ''   
 |       |   | |   |Dot22 [id FB] <TensorType(float32, matrix)> ''   
 |       |   | |   | |Subtensor{int64} [id FC] <TensorType(float32, matrix)> ''   
 |       |   | |   | | |InplaceDimShuffle{1,0,2} [id N] <TensorType(float32, 3D)> ''   
 |       |   | |   | | |Constant{13} [id FD] <int64>
 |       |   | |   | |dense1.W [id Z] <TensorType(float32, matrix)>
 |       |   | |   |InplaceDimShuffle{x,0} [id BA] <TensorType(float32, row)> ''   
 |       |   | |q-evaluator layer.W [id BC] <TensorType(float32, matrix)>
 |       |   |q-evaluator layer.b [id BD] <TensorType(float32, vector)>
 |       |InplaceDimShuffle{x,0,1} [id FE] <TensorType(float32, (True, False, False))> ''   
 |       | |SoftmaxWithBias [id FF] <TensorType(float32, matrix)> ''   
 |       |   |Dot22 [id FG] <TensorType(float32, matrix)> ''   
 |       |   | |sigmoid [id FH] <TensorType(float32, matrix)> ''   
 |       |   | | |Elemwise{add,no_inplace} [id FI] <TensorType(float32, matrix)> ''   
 |       |   | |   |Dot22 [id FJ] <TensorType(float32, matrix)> ''   
 |       |   | |   | |Subtensor{int64} [id FK] <TensorType(float32, matrix)> ''   
 |       |   | |   | | |InplaceDimShuffle{1,0,2} [id N] <TensorType(float32, 3D)> ''   
 |       |   | |   | | |Constant{14} [id FL] <int64>
 |       |   | |   | |dense1.W [id Z] <TensorType(float32, matrix)>
 |       |   | |   |InplaceDimShuffle{x,0} [id BA] <TensorType(float32, row)> ''   
 |       |   | |q-evaluator layer.W [id BC] <TensorType(float32, matrix)>
 |       |   |q-evaluator layer.b [id BD] <TensorType(float32, vector)>
 |       |InplaceDimShuffle{x,0,1} [id FM] <TensorType(float32, (True, False, False))> ''   
 |       | |SoftmaxWithBias [id FN] <TensorType(float32, matrix)> ''   
 |       |   |Dot22 [id FO] <TensorType(float32, matrix)> ''   
 |       |   | |sigmoid [id FP] <TensorType(float32, matrix)> ''   
 |       |   | | |Elemwise{add,no_inplace} [id FQ] <TensorType(float32, matrix)> ''   
 |       |   | |   |Dot22 [id FR] <TensorType(float32, matrix)> ''   
 |       |   | |   | |Subtensor{int64} [id FS] <TensorType(float32, matrix)> ''   
 |       |   | |   | | |InplaceDimShuffle{1,0,2} [id N] <TensorType(float32, 3D)> ''   
 |       |   | |   | | |Constant{15} [id FT] <int64>
 |       |   | |   | |dense1.W [id Z] <TensorType(float32, matrix)>
 |       |   | |   |InplaceDimShuffle{x,0} [id BA] <TensorType(float32, row)> ''   
 |       |   | |q-evaluator layer.W [id BC] <TensorType(float32, matrix)>
 |       |   |q-evaluator layer.b [id BD] <TensorType(float32, vector)>
 |       |InplaceDimShuffle{x,0,1} [id FU] <TensorType(float32, (True, False, False))> ''   
 |       | |SoftmaxWithBias [id FV] <TensorType(float32, matrix)> ''   
 |       |   |Dot22 [id FW] <TensorType(float32, matrix)> ''   
 |       |   | |sigmoid [id FX] <TensorType(float32, matrix)> ''   
 |       |   | | |Elemwise{add,no_inplace} [id FY] <TensorType(float32, matrix)> ''   
 |       |   | |   |Dot22 [id FZ] <TensorType(float32, matrix)> ''   
 |       |   | |   | |Subtensor{int64} [id GA] <TensorType(float32, matrix)> ''   
 |       |   | |   | | |InplaceDimShuffle{1,0,2} [id N] <TensorType(float32, 3D)> ''   
 |       |   | |   | | |Constant{16} [id GB] <int64>
 |       |   | |   | |dense1.W [id Z] <TensorType(float32, matrix)>
 |       |   | |   |InplaceDimShuffle{x,0} [id BA] <TensorType(float32, row)> ''   
 |       |   | |q-evaluator layer.W [id BC] <TensorType(float32, matrix)>
 |       |   |q-evaluator layer.b [id BD] <TensorType(float32, vector)>
 |       |InplaceDimShuffle{x,0,1} [id GC] <TensorType(float32, (True, False, False))> ''   
 |       | |SoftmaxWithBias [id GD] <TensorType(float32, matrix)> ''   
 |       |   |Dot22 [id GE] <TensorType(float32, matrix)> ''   
 |       |   | |sigmoid [id GF] <TensorType(float32, matrix)> ''   
 |       |   | | |Elemwise{add,no_inplace} [id GG] <TensorType(float32, matrix)> ''   
 |       |   | |   |Dot22 [id GH] <TensorType(float32, matrix)> ''   
 |       |   | |   | |Subtensor{int64} [id GI] <TensorType(float32, matrix)> ''   
 |       |   | |   | | |InplaceDimShuffle{1,0,2} [id N] <TensorType(float32, 3D)> ''   
 |       |   | |   | | |Constant{17} [id GJ] <int64>
 |       |   | |   | |dense1.W [id Z] <TensorType(float32, matrix)>
 |       |   | |   |InplaceDimShuffle{x,0} [id BA] <TensorType(float32, row)> ''   
 |       |   | |q-evaluator layer.W [id BC] <TensorType(float32, matrix)>
 |       |   |q-evaluator layer.b [id BD] <TensorType(float32, vector)>
 |       |InplaceDimShuffle{x,0,1} [id GK] <TensorType(float32, (True, False, False))> ''   
 |       | |SoftmaxWithBias [id GL] <TensorType(float32, matrix)> ''   
 |       |   |Dot22 [id GM] <TensorType(float32, matrix)> ''   
 |       |   | |sigmoid [id GN] <TensorType(float32, matrix)> ''   
 |       |   | | |Elemwise{add,no_inplace} [id GO] <TensorType(float32, matrix)> ''   
 |       |   | |   |Dot22 [id GP] <TensorType(float32, matrix)> ''   
 |       |   | |   | |Subtensor{int64} [id GQ] <TensorType(float32, matrix)> ''   
 |       |   | |   | | |InplaceDimShuffle{1,0,2} [id N] <TensorType(float32, 3D)> ''   
 |       |   | |   | | |Constant{18} [id GR] <int64>
 |       |   | |   | |dense1.W [id Z] <TensorType(float32, matrix)>
 |       |   | |   |InplaceDimShuffle{x,0} [id BA] <TensorType(float32, row)> ''   
 |       |   | |q-evaluator layer.W [id BC] <TensorType(float32, matrix)>
 |       |   |q-evaluator layer.b [id BD] <TensorType(float32, vector)>
 |       |InplaceDimShuffle{x,0,1} [id GS] <TensorType(float32, (True, False, False))> ''   
 |       | |SoftmaxWithBias [id GT] <TensorType(float32, matrix)> ''   
 |       |   |Dot22 [id GU] <TensorType(float32, matrix)> ''   
 |       |   | |sigmoid [id GV] <TensorType(float32, matrix)> ''   
 |       |   | | |Elemwise{add,no_inplace} [id GW] <TensorType(float32, matrix)> ''   
 |       |   | |   |Dot22 [id GX] <TensorType(float32, matrix)> ''   
 |       |   | |   | |Subtensor{int64} [id GY] <TensorType(float32, matrix)> ''   
 |       |   | |   | | |InplaceDimShuffle{1,0,2} [id N] <TensorType(float32, 3D)> ''   
 |       |   | |   | | |Constant{19} [id GZ] <int64>
 |       |   | |   | |dense1.W [id Z] <TensorType(float32, matrix)>
 |       |   | |   |InplaceDimShuffle{x,0} [id BA] <TensorType(float32, row)> ''   
 |       |   | |q-evaluator layer.W [id BC] <TensorType(float32, matrix)>
 |       |   |q-evaluator layer.b [id BD] <TensorType(float32, vector)>
 |       |InplaceDimShuffle{x,0,1} [id HA] <TensorType(float32, (True, False, False))> ''   
 |       | |SoftmaxWithBias [id HB] <TensorType(float32, matrix)> ''   
 |       |   |Dot22 [id HC] <TensorType(float32, matrix)> ''   
 |       |   | |sigmoid [id HD] <TensorType(float32, matrix)> ''   
 |       |   | | |Elemwise{add,no_inplace} [id HE] <TensorType(float32, matrix)> ''   
 |       |   | |   |Dot22 [id HF] <TensorType(float32, matrix)> ''   
 |       |   | |   | |Subtensor{int64} [id HG] <TensorType(float32, matrix)> ''   
 |       |   | |   | | |InplaceDimShuffle{1,0,2} [id N] <TensorType(float32, 3D)> ''   
 |       |   | |   | | |Constant{20} [id HH] <int64>
 |       |   | |   | |dense1.W [id Z] <TensorType(float32, matrix)>
 |       |   | |   |InplaceDimShuffle{x,0} [id BA] <TensorType(float32, row)> ''   
 |       |   | |q-evaluator layer.W [id BC] <TensorType(float32, matrix)>
 |       |   |q-evaluator layer.b [id BD] <TensorType(float32, vector)>
 |       |InplaceDimShuffle{x,0,1} [id HI] <TensorType(float32, (True, False, False))> ''   
 |       | |SoftmaxWithBias [id HJ] <TensorType(float32, matrix)> ''   
 |       |   |Dot22 [id HK] <TensorType(float32, matrix)> ''   
 |       |   | |sigmoid [id HL] <TensorType(float32, matrix)> ''   
 |       |   | | |Elemwise{add,no_inplace} [id HM] <TensorType(float32, matrix)> ''   
 |       |   | |   |Dot22 [id HN] <TensorType(float32, matrix)> ''   
 |       |   | |   | |Subtensor{int64} [id HO] <TensorType(float32, matrix)> ''   
 |       |   | |   | | |InplaceDimShuffle{1,0,2} [id N] <TensorType(float32, 3D)> ''   
 |       |   | |   | | |Constant{21} [id HP] <int64>
 |       |   | |   | |dense1.W [id Z] <TensorType(float32, matrix)>
 |       |   | |   |InplaceDimShuffle{x,0} [id BA] <TensorType(float32, row)> ''   
 |       |   | |q-evaluator layer.W [id BC] <TensorType(float32, matrix)>
 |       |   |q-evaluator layer.b [id BD] <TensorType(float32, vector)>
 |       |InplaceDimShuffle{x,0,1} [id HQ] <TensorType(float32, (True, False, False))> ''   
 |       | |SoftmaxWithBias [id HR] <TensorType(float32, matrix)> ''   
 |       |   |Dot22 [id HS] <TensorType(float32, matrix)> ''   
 |       |   | |sigmoid [id HT] <TensorType(float32, matrix)> ''   
 |       |   | | |Elemwise{add,no_inplace} [id HU] <TensorType(float32, matrix)> ''   
 |       |   | |   |Dot22 [id HV] <TensorType(float32, matrix)> ''   
 |       |   | |   | |Subtensor{int64} [id HW] <TensorType(float32, matrix)> ''   
 |       |   | |   | | |InplaceDimShuffle{1,0,2} [id N] <TensorType(float32, 3D)> ''   
 |       |   | |   | | |Constant{22} [id HX] <int64>
 |       |   | |   | |dense1.W [id Z] <TensorType(float32, matrix)>
 |       |   | |   |InplaceDimShuffle{x,0} [id BA] <TensorType(float32, row)> ''   
 |       |   | |q-evaluator layer.W [id BC] <TensorType(float32, matrix)>
 |       |   |q-evaluator layer.b [id BD] <TensorType(float32, vector)>
 |       |InplaceDimShuffle{x,0,1} [id HY] <TensorType(float32, (True, False, False))> ''   
 |       | |SoftmaxWithBias [id HZ] <TensorType(float32, matrix)> ''   
 |       |   |Dot22 [id IA] <TensorType(float32, matrix)> ''   
 |       |   | |sigmoid [id IB] <TensorType(float32, matrix)> ''   
 |       |   | | |Elemwise{add,no_inplace} [id IC] <TensorType(float32, matrix)> ''   
 |       |   | |   |Dot22 [id ID] <TensorType(float32, matrix)> ''   
 |       |   | |   | |Subtensor{int64} [id IE] <TensorType(float32, matrix)> ''   
 |       |   | |   | | |InplaceDimShuffle{1,0,2} [id N] <TensorType(float32, 3D)> ''   
 |       |   | |   | | |Constant{23} [id IF] <int64>
 |       |   | |   | |dense1.W [id Z] <TensorType(float32, matrix)>
 |       |   | |   |InplaceDimShuffle{x,0} [id BA] <TensorType(float32, row)> ''   
 |       |   | |q-evaluator layer.W [id BC] <TensorType(float32, matrix)>
 |       |   |q-evaluator layer.b [id BD] <TensorType(float32, vector)>
 |       |InplaceDimShuffle{x,0,1} [id IG] <TensorType(float32, (True, False, False))> ''   
 |         |SoftmaxWithBias [id IH] <TensorType(float32, matrix)> ''   
 |           |Dot22 [id II] <TensorType(float32, matrix)> ''   
 |           | |sigmoid [id IJ] <TensorType(float32, matrix)> ''   
 |           | | |Elemwise{add,no_inplace} [id IK] <TensorType(float32, matrix)> ''   
 |           | |   |Dot22 [id IL] <TensorType(float32, matrix)> ''   
 |           | |   | |Subtensor{int64} [id IM] <TensorType(float32, matrix)> ''   
 |           | |   | | |InplaceDimShuffle{1,0,2} [id N] <TensorType(float32, 3D)> ''   
 |           | |   | | |Constant{24} [id IN] <int64>
 |           | |   | |dense1.W [id Z] <TensorType(float32, matrix)>
 |           | |   |InplaceDimShuffle{x,0} [id BA] <TensorType(float32, row)> ''   
 |           | |q-evaluator layer.W [id BC] <TensorType(float32, matrix)>
 |           |q-evaluator layer.b [id BD] <TensorType(float32, vector)>
 |InplaceDimShuffle{0,x} [id IO] <TensorType(int64, col)> ''   
 | |ARange{dtype='int64'} [id IP] <TensorType(int64, vector)> ''   
 |   |TensorConstant{0} [id F] <TensorType(int8, scalar)>
 |   |Subtensor{int64} [id IQ] <TensorType(int64, scalar)> ''   
 |   | |Shape [id IR] <TensorType(int64, vector)> ''   
 |   | | |Elemwise{log,no_inplace} [id B] <TensorType(float32, 3D)> ''   
 |   | |Constant{0} [id W] <int64>
 |   |TensorConstant{1} [id X] <TensorType(int8, scalar)>
 |InplaceDimShuffle{x,0} [id IS] <TensorType(int64, row)> ''   
 | |ARange{dtype='int64'} [id IT] <TensorType(int64, vector)> ''   
 |   |TensorConstant{0} [id F] <TensorType(int8, scalar)>
 |   |Subtensor{int64} [id IU] <TensorType(int64, scalar)> ''   
 |   | |Shape [id IR] <TensorType(int64, vector)> ''   
 |   | |Constant{1} [id BL] <int64>
 |   |TensorConstant{1} [id X] <TensorType(int8, scalar)>
 |AdvancedSubtensor1 [id IV] <TensorType(int32, 3D)> ''   
   |session.actions_history.0 [id IW] <TensorType(int32, 3D)>
   |RandomFunction{choice_helper}.1 [id Q] <TensorType(int32, vector)> ''   

Storage map footprint:
 - Print{message='>>>> policy_seq; ', attrs=('__str__',), global_fn=<function _print_fn at 0x7f6ce6096a60>}.0, Shape: (100, 25, 6), ElemSize: 4 Byte(s), TotalSize: 60000 Byte(s)
 - Elemwise{log,no_inplace}.0, Shape: (100, 25, 6), ElemSize: 4 Byte(s), TotalSize: 60000 Byte(s)
 - sessions.observations_history.0, Shared Input, Shape: (111, 25, 2), ElemSize: 4 Byte(s), TotalSize: 22200 Byte(s)
 - session.actions_history.0, Shared Input, Shape: (111, 25, 2), ElemSize: 4 Byte(s), TotalSize: 22200 Byte(s)
 - AdvancedSubtensor1.0, Shape: (100, 25, 2), ElemSize: 4 Byte(s), TotalSize: 20000 Byte(s)
 - session.rewards_history, Shared Input, Shape: (111, 25), ElemSize: 4 Byte(s), TotalSize: 11100 Byte(s)
 - sigmoid.0, Shape: (100, 10), ElemSize: 4 Byte(s), TotalSize: 4000 Byte(s)
 - sigmoid.0, Shape: (100, 10), ElemSize: 4 Byte(s), TotalSize: 4000 Byte(s)
 - sigmoid.0, Shape: (100, 10), ElemSize: 4 Byte(s), TotalSize: 4000 Byte(s)
 - sigmoid.0, Shape: (100, 10), ElemSize: 4 Byte(s), TotalSize: 4000 Byte(s)
 - sigmoid.0, Shape: (100, 10), ElemSize: 4 Byte(s), TotalSize: 4000 Byte(s)
 - sigmoid.0, Shape: (100, 10), ElemSize: 4 Byte(s), TotalSize: 4000 Byte(s)
 - sigmoid.0, Shape: (100, 10), ElemSize: 4 Byte(s), TotalSize: 4000 Byte(s)
 - sigmoid.0, Shape: (100, 10), ElemSize: 4 Byte(s), TotalSize: 4000 Byte(s)
 - sigmoid.0, Shape: (100, 10), ElemSize: 4 Byte(s), TotalSize: 4000 Byte(s)
 - sigmoid.0, Shape: (100, 10), ElemSize: 4 Byte(s), TotalSize: 4000 Byte(s)
 - sigmoid.0, Shape: (100, 10), ElemSize: 4 Byte(s), TotalSize: 4000 Byte(s)
 - sigmoid.0, Shape: (100, 10), ElemSize: 4 Byte(s), TotalSize: 4000 Byte(s)
 - sigmoid.0, Shape: (100, 10), ElemSize: 4 Byte(s), TotalSize: 4000 Byte(s)
 - sigmoid.0, Shape: (100, 10), ElemSize: 4 Byte(s), TotalSize: 4000 Byte(s)
 - sigmoid.0, Shape: (100, 10), ElemSize: 4 Byte(s), TotalSize: 4000 Byte(s)
 - sigmoid.0, Shape: (100, 10), ElemSize: 4 Byte(s), TotalSize: 4000 Byte(s)
 - sigmoid.0, Shape: (100, 10), ElemSize: 4 Byte(s), TotalSize: 4000 Byte(s)
 - sigmoid.0, Shape: (100, 10), ElemSize: 4 Byte(s), TotalSize: 4000 Byte(s)
 - sigmoid.0, Shape: (100, 10), ElemSize: 4 Byte(s), TotalSize: 4000 Byte(s)
 - sigmoid.0, Shape: (100, 10), ElemSize: 4 Byte(s), TotalSize: 4000 Byte(s)
 - sigmoid.0, Shape: (100, 10), ElemSize: 4 Byte(s), TotalSize: 4000 Byte(s)
 - sigmoid.0, Shape: (100, 10), ElemSize: 4 Byte(s), TotalSize: 4000 Byte(s)
 - sigmoid.0, Shape: (100, 10), ElemSize: 4 Byte(s), TotalSize: 4000 Byte(s)
 - sigmoid.0, Shape: (100, 10), ElemSize: 4 Byte(s), TotalSize: 4000 Byte(s)
 - sigmoid.0, Shape: (100, 10), ElemSize: 4 Byte(s), TotalSize: 4000 Byte(s)
 - session.is_alive, Shared Input, Shape: (111, 25), ElemSize: 1 Byte(s), TotalSize: 2775 Byte(s)
 - SoftmaxWithBias.0, Shape: (100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - SoftmaxWithBias.0, Shape: (100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - SoftmaxWithBias.0, Shape: (100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - SoftmaxWithBias.0, Shape: (100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - SoftmaxWithBias.0, Shape: (100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - SoftmaxWithBias.0, Shape: (100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - SoftmaxWithBias.0, Shape: (100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - SoftmaxWithBias.0, Shape: (100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - SoftmaxWithBias.0, Shape: (100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - SoftmaxWithBias.0, Shape: (100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - SoftmaxWithBias.0, Shape: (100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - SoftmaxWithBias.0, Shape: (100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - SoftmaxWithBias.0, Shape: (100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - SoftmaxWithBias.0, Shape: (100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - SoftmaxWithBias.0, Shape: (100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - SoftmaxWithBias.0, Shape: (100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - SoftmaxWithBias.0, Shape: (100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - SoftmaxWithBias.0, Shape: (100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - SoftmaxWithBias.0, Shape: (100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - SoftmaxWithBias.0, Shape: (100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - SoftmaxWithBias.0, Shape: (100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - SoftmaxWithBias.0, Shape: (100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - SoftmaxWithBias.0, Shape: (100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - SoftmaxWithBias.0, Shape: (100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - SoftmaxWithBias.0, Shape: (100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - InplaceDimShuffle{x,0,1}.0, Shape: (1, 100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - InplaceDimShuffle{x,0,1}.0, Shape: (1, 100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - InplaceDimShuffle{x,0,1}.0, Shape: (1, 100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - InplaceDimShuffle{x,0,1}.0, Shape: (1, 100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - InplaceDimShuffle{x,0,1}.0, Shape: (1, 100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - InplaceDimShuffle{x,0,1}.0, Shape: (1, 100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - InplaceDimShuffle{x,0,1}.0, Shape: (1, 100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - InplaceDimShuffle{x,0,1}.0, Shape: (1, 100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - InplaceDimShuffle{x,0,1}.0, Shape: (1, 100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - InplaceDimShuffle{x,0,1}.0, Shape: (1, 100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - InplaceDimShuffle{x,0,1}.0, Shape: (1, 100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - InplaceDimShuffle{x,0,1}.0, Shape: (1, 100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - InplaceDimShuffle{x,0,1}.0, Shape: (1, 100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - InplaceDimShuffle{x,0,1}.0, Shape: (1, 100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - InplaceDimShuffle{x,0,1}.0, Shape: (1, 100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - InplaceDimShuffle{x,0,1}.0, Shape: (1, 100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - InplaceDimShuffle{x,0,1}.0, Shape: (1, 100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - InplaceDimShuffle{x,0,1}.0, Shape: (1, 100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - InplaceDimShuffle{x,0,1}.0, Shape: (1, 100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - InplaceDimShuffle{x,0,1}.0, Shape: (1, 100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - InplaceDimShuffle{x,0,1}.0, Shape: (1, 100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - InplaceDimShuffle{x,0,1}.0, Shape: (1, 100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - InplaceDimShuffle{x,0,1}.0, Shape: (1, 100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - InplaceDimShuffle{x,0,1}.0, Shape: (1, 100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - InplaceDimShuffle{x,0,1}.0, Shape: (1, 100, 6), ElemSize: 4 Byte(s), TotalSize: 2400 Byte(s)
 - Subtensor{int64}.0, Shape: (100, 2), ElemSize: 4 Byte(s), TotalSize: 800 Byte(s)
 - Subtensor{int64}.0, Shape: (100, 2), ElemSize: 4 Byte(s), TotalSize: 800 Byte(s)
 - Subtensor{int64}.0, Shape: (100, 2), ElemSize: 4 Byte(s), TotalSize: 800 Byte(s)
 - Subtensor{int64}.0, Shape: (100, 2), ElemSize: 4 Byte(s), TotalSize: 800 Byte(s)
 - Subtensor{int64}.0, Shape: (100, 2), ElemSize: 4 Byte(s), TotalSize: 800 Byte(s)
 - Subtensor{int64}.0, Shape: (100, 2), ElemSize: 4 Byte(s), TotalSize: 800 Byte(s)
 - Subtensor{int64}.0, Shape: (100, 2), ElemSize: 4 Byte(s), TotalSize: 800 Byte(s)
 - Subtensor{int64}.0, Shape: (100, 2), ElemSize: 4 Byte(s), TotalSize: 800 Byte(s)
 - Subtensor{int64}.0, Shape: (100, 2), ElemSize: 4 Byte(s), TotalSize: 800 Byte(s)
 - Subtensor{int64}.0, Shape: (100, 2), ElemSize: 4 Byte(s), TotalSize: 800 Byte(s)
 - Subtensor{int64}.0, Shape: (100, 2), ElemSize: 4 Byte(s), TotalSize: 800 Byte(s)
 - Subtensor{int64}.0, Shape: (100, 2), ElemSize: 4 Byte(s), TotalSize: 800 Byte(s)
 - Subtensor{int64}.0, Shape: (100, 2), ElemSize: 4 Byte(s), TotalSize: 800 Byte(s)
 - Subtensor{int64}.0, Shape: (100, 2), ElemSize: 4 Byte(s), TotalSize: 800 Byte(s)
 - Subtensor{int64}.0, Shape: (100, 2), ElemSize: 4 Byte(s), TotalSize: 800 Byte(s)
 - Subtensor{int64}.0, Shape: (100, 2), ElemSize: 4 Byte(s), TotalSize: 800 Byte(s)
 - Subtensor{int64}.0, Shape: (100, 2), ElemSize: 4 Byte(s), TotalSize: 800 Byte(s)
 - Subtensor{int64}.0, Shape: (100, 2), ElemSize: 4 Byte(s), TotalSize: 800 Byte(s)
 - Subtensor{int64}.0, Shape: (100, 2), ElemSize: 4 Byte(s), TotalSize: 800 Byte(s)
 - Subtensor{int64}.0, Shape: (100, 2), ElemSize: 4 Byte(s), TotalSize: 800 Byte(s)
 - Subtensor{int64}.0, Shape: (100, 2), ElemSize: 4 Byte(s), TotalSize: 800 Byte(s)
 - Subtensor{int64}.0, Shape: (100, 2), ElemSize: 4 Byte(s), TotalSize: 800 Byte(s)
 - Subtensor{int64}.0, Shape: (100, 2), ElemSize: 4 Byte(s), TotalSize: 800 Byte(s)
 - Subtensor{int64}.0, Shape: (100, 2), ElemSize: 4 Byte(s), TotalSize: 800 Byte(s)
 - Subtensor{int64}.0, Shape: (100, 2), ElemSize: 4 Byte(s), TotalSize: 800 Byte(s)
 - InplaceDimShuffle{0,x}.0, Shape: (100, 1), ElemSize: 8 Byte(s), TotalSize: 800 Byte(s)
 - RandomFunction{choice_helper}.1, Shape: (100,), ElemSize: 4 Byte(s), TotalSize: 400 Byte(s)
 - q-evaluator layer.W, Shared Input, Shape: (10, 6), ElemSize: 4 Byte(s), TotalSize: 240 Byte(s)
 - <TensorType(float32, matrix)>, Shared Input, Shape: (10, 6), ElemSize: 4 Byte(s), TotalSize: 240 Byte(s)
 - InplaceDimShuffle{x,0}.0, Shape: (1, 25), ElemSize: 8 Byte(s), TotalSize: 200 Byte(s)
 - dense1.W, Shared Input, Shape: (2, 10), ElemSize: 4 Byte(s), TotalSize: 80 Byte(s)
 - <TensorType(float32, matrix)>, Shared Input, Shape: (2, 10), ElemSize: 4 Byte(s), TotalSize: 80 Byte(s)
 - dense1.b, Shared Input, Shape: (10,), ElemSize: 4 Byte(s), TotalSize: 40 Byte(s)
 - state values.W, Shared Input, Shape: (10, 1), ElemSize: 4 Byte(s), TotalSize: 40 Byte(s)
 - <TensorType(float32, vector)>, Shared Input, Shape: (10,), ElemSize: 4 Byte(s), TotalSize: 40 Byte(s)
 - <TensorType(float32, col)>, Shared Input, Shape: (10, 1), ElemSize: 4 Byte(s), TotalSize: 40 Byte(s)
 - q-evaluator layer.b, Shared Input, Shape: (6,), ElemSize: 4 Byte(s), TotalSize: 24 Byte(s)
 - <TensorType(float32, vector)>, Shared Input, Shape: (6,), ElemSize: 4 Byte(s), TotalSize: 24 Byte(s)
 - Constant{0}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - TensorConstant{(1,) of 100}, Shape: (1,), ElemSize: 8 Byte(s), TotalSize: 8 Byte(s)
 - Constant{24}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Constant{23}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Constant{22}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Constant{21}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Constant{20}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Constant{19}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Constant{18}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Constant{17}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Constant{16}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Constant{15}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Constant{14}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Constant{13}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Constant{12}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Constant{11}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Constant{10}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Constant{9}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Constant{8}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Constant{7}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Constant{6}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Constant{5}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Constant{4}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Constant{3}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Constant{2}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Constant{1}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Constant{-1}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - TensorConstant{1.0}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - state values.b, Shared Input, Shape: (1,), ElemSize: 4 Byte(s), TotalSize: 4 Byte(s)
 - <TensorType(float32, (True,))>, Shared Input, Shape: (1,), ElemSize: 4 Byte(s), TotalSize: 4 Byte(s)
 - TensorConstant{(1,) of 0.9}, Shape: (1,), ElemSize: 4 Byte(s), TotalSize: 4 Byte(s)
 - TensorConstant{(1, 1) of 0.9}, Shape: (1, 1), ElemSize: 4 Byte(s), TotalSize: 4 Byte(s)
 - TensorConstant{(1, 1) of 1.0}, Shape: (1, 1), ElemSize: 4 Byte(s), TotalSize: 4 Byte(s)
 - TensorConstant{(1, 1, 1) of 1.0}, Shape: (1, 1, 1), ElemSize: 4 Byte(s), TotalSize: 4 Byte(s)
 - TensorConstant{(1, 1, 1) of 0.0}, Shape: (1, 1, 1), ElemSize: 4 Byte(s), TotalSize: 4 Byte(s)
 - TensorConstant{(1, 1) of 0.0}, Shape: (1, 1), ElemSize: 4 Byte(s), TotalSize: 4 Byte(s)
 - TensorConstant{0.009999999776482582}, Shape: (), ElemSize: 4 Byte(s), TotalSize: 4.0 Byte(s)
 - TensorConstant{(1, 1) of 0.99}, Shape: (1, 1), ElemSize: 4 Byte(s), TotalSize: 4 Byte(s)
 - TensorConstant{(1, 1) of 0.001}, Shape: (1, 1), ElemSize: 4 Byte(s), TotalSize: 4 Byte(s)
 - TensorConstant{(1, 1) of 0.1}, Shape: (1, 1), ElemSize: 4 Byte(s), TotalSize: 4 Byte(s)
 - TensorConstant{(1,) of 0.001}, Shape: (1,), ElemSize: 4 Byte(s), TotalSize: 4 Byte(s)
 - TensorConstant{(1,) of 0.1}, Shape: (1,), ElemSize: 4 Byte(s), TotalSize: 4 Byte(s)
 - TensorConstant{(1, 1) of 1e-06}, Shape: (1, 1), ElemSize: 4 Byte(s), TotalSize: 4 Byte(s)
 - TensorConstant{(1,) of 1e-06}, Shape: (1,), ElemSize: 4 Byte(s), TotalSize: 4 Byte(s)
 - TensorConstant{1}, Shape: (), ElemSize: 1 Byte(s), TotalSize: 1.0 Byte(s)
 - TensorConstant{(1, 1) of 1}, Shape: (1, 1), ElemSize: 1 Byte(s), TotalSize: 1 Byte(s)
 - TensorConstant{False}, Shape: (), ElemSize: 1 Byte(s), TotalSize: 1.0 Byte(s)
 - TensorConstant{0}, Shape: (), ElemSize: 1 Byte(s), TotalSize: 1.0 Byte(s)
 - Constant{0}, Shape: (), ElemSize: 1 Byte(s), TotalSize: 1.0 Byte(s)
 - TensorConstant{(1, 1) of 2}, Shape: (1, 1), ElemSize: 1 Byte(s), TotalSize: 1 Byte(s)
 - TensorConstant{(1,) of 2}, Shape: (1,), ElemSize: 1 Byte(s), TotalSize: 1 Byte(s)
 - TensorConstant{[]}, Shape: (0,), ElemSize: 4 Byte(s), TotalSize: 0 Byte(s)
 TotalSize: 380818.0 Byte(s) 0.000 GB
 TotalSize inputs: 59418.0 Byte(s) 0.000 GB



In [None]:
iters, session_rewards=zip(*sorted(rewards.items(),key=lambda pr:pr[0]))

In [None]:
plt.plot(iters,list(map(np.mean, session_rewards)))
plt.title("Training progress")
plt.xlabel("Epoch counter")
plt.ylabel("Mean Income")
plt.show()

In [None]:
_,_,_,_,(pool_policy,pool_V) = agent.get_sessions(
    pool.experience_replay,
    session_length=SEQ_LENGTH,
    experience_replay=True,)

states = pool.experience_replay.observations[0].get_value().reshape((-1, 5)).T[-2:]
values = pool_V.ravel().eval()
optimal_actid = pool_policy.argmax(-1).ravel().eval()

In [None]:
states.shape

In [None]:
values.shape

In [None]:
plt.scatter(
    *states,
#     c=values,
    alpha = 0.1)
plt.title("predicted state values")
plt.xlabel("previous")
plt.ylabel("current")
plt.show()

In [None]:
obs_x, obs_y = states

for i in range(3):
    sel = (optimal_actid==i)
    plt.scatter(obs_x[sel],
                obs_y[sel],
                c=['red','blue','green'][i],
                alpha = 0.1,
                label=action_names[i])
    
plt.title("most likely action id")
plt.xlabel("previous")
plt.ylabel("current")
plt.legend(loc='best')
plt.show()

In [None]:
#for MountainCar-v0 evaluation session is cropped to 200 ticks
untrained_reward = pool.evaluate(save_path="./records",record_video=True)

### 