In [1]:
import os
import numpy as np
import mxnet as mx
from utils import check_dir
from memory import Memory
from utils import create_input, translate_state
from evaluation_mxnet import evaluate
from mxnet import gluon, nd, autograd
from environments.SimpleEnv import SimpleEnv

In [2]:
# training cases
order = "MXNET_map_only"
# batch size
batch_size = 1500
# agent view
agent_view = 5
map_size = 20
# action max
action_max = 3
# learning rate
model_save = "./model_save/"
lr = 0.001
num_episode = 1000000
# start play
replay_start = 10000
# update step
update_step = 1000
# gamma in q-loss calculation
gamma = 0.99
# memory pool size
memory_length = 100000
# file to save train log
summary = "./{}_Reward.csv".format(order)
eval_statistics = "./{}_CSV.csv".format(order)
# the number of step it take to linearly anneal the epsilon to it min value
annealing_end = 200000
# min level of stochastically of policy (epsilon)-greedy
epsilon_min = 0.2
# temporary files
temporary_model = "./{}/{}.params".format(model_save, order)
temporary_pool = "./{}/{}.pool".format(model_save, order)

In [3]:
if os.path.exists(summary):
    os.remove(summary)
ctx = mx.gpu()
for i in ["model_save", "data_save"]:
    check_dir(i)

In [4]:
# build models
from model.simple_stack import SimpleStack
online_model = SimpleStack()
offline_model = SimpleStack()
online_model.collect_params().initialize(mx.init.MSRAPrelu(), ctx=ctx)
offline_model.collect_params().initialize(mx.init.MSRAPrelu(), ctx=ctx)
offline_model.collect_params().zero_grad()



In [5]:
# create env
env = SimpleEnv(display=False, agent_view=agent_view, map_size=map_size)
env.reset_env()
memory_pool = Memory(memory_length)
annealing = 0
total_reward = np.zeros(num_episode)
eval_result = []
loss_func = gluon.loss.L2Loss()
trainer = gluon.Trainer(offline_model.collect_params(), 'adam', {'learning_rate': lr})

In [None]:
_print = True
best = 0
_all = 0
_update = 0
for epoch in range(num_episode):
    env.reset_env()
    finish = 0
    cum_clipped_dr = 0
    if epoch == 51:
        print("Model Structure: ")
        print(offline_model)
    if sum(env.step_count) > replay_start and _print:
        print('annealing and learning are started')
        _print = False
    while not finish:
        _all += 1
        if sum(env.step_count) > replay_start:
            annealing += 1
        eps = np.maximum(1 - sum(env.step_count) / annealing_end, epsilon_min)
        if np.random.random() < eps:
            by = "Random"
            action = np.random.randint(0, action_max)
        else:
            by = "Model"
            data = create_input([translate_state(env.map.state())])
            data = [nd.array(i, ctx=ctx) for i in data]
            action = offline_model(data)
            action = int(nd.argmax(action, axis=1).asnumpy()[0])
        old, new, reward_get, finish = env.step(action)
        memory_pool.add(old, new, action, reward_get, finish)
        if finish and epoch > 50:
            cum_clipped_dr += env.detect_rate[-1]
            dr_50 = float(np.mean(env.detect_rate[-50:]))
            dr_all = float(np.mean(env.detect_rate))
            if epoch % 50 == 0:
                text = "DR: %f(50), %f(all), eps: %f" % (dr_50, dr_all, eps)
                print(text)
                with open(summary, "a") as f:
                    f.writelines(text + "\n")
            if epoch % 100 == 0 and annealing > replay_start:
                eval_result.extend(evaluate(ctx, offline_model, env, 5))
            # save model and replace online model each update_step
            if annealing > replay_start and annealing % update_step == 0:
                offline_model.save_parameters(temporary_model)
                online_model.load_parameters(temporary_model, ctx)
                if best < dr_all:
                    best = dr_all
                    offline_model.save_parameters(temporary_model+ ".best")
    #  train every 2 epoch
    if annealing > replay_start and epoch % 2 == 0:
        _update += 1
        # Sample random mini batch of transitions
        if len(memory_pool.memory) > batch_size:
            bz = batch_size
        else:
            bz = len(memory_pool.memory)
        for_train = memory_pool.next_batch(bz)
        with autograd.record(train_mode=True):
            _state =[nd.array(i, ctx=ctx) for i in for_train["state"]]
            _state_next = [nd.array(i, ctx=ctx) for i in for_train["state_next"]]
            _finish = nd.array(for_train["finish"], ctx=ctx)
            _action = nd.array(for_train["action"], ctx=ctx)
            _reward = nd.array(for_train["reward"], ctx=ctx)
            q_sp = nd.max(online_model(_state_next), axis=1)
            q_sp = q_sp * (nd.ones(bz, ctx=ctx) - _finish)
            q_s_array = offline_model(_state)
            q_s = nd.pick(q_s_array, _action, 1)
            loss = nd.mean(loss_func(q_s, (_reward + gamma * q_sp)))
        loss.backward()
        trainer.step(bz)
    total_reward[int(epoch) - 1] = cum_clipped_dr

  return n_r / road.sum(), n_w / walkway.sum()
[02:38:08] ../src/operator/nn/./cudnn/./cudnn_algoreg-inl.h:97: Running performance tests to find the best convolution algorithm, this can take a while... (set the environment variable MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable)
  return n_r / road.sum(), n_w / walkway.sum()


Model Structure: 
SimpleStack(
  (map): Sequential(
    (0): Conv2D(2 -> 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (1): Conv2D(16 -> 32, kernel_size=(2, 2), stride=(1, 1), bias=False)
    (2): Conv2D(32 -> 32, kernel_size=(2, 2), stride=(1, 1), bias=False)
  )
  (decision_making): Sequential(
    (0): Dense(10368 -> 3, Activation(sigmoid))
  )
)
DR: 0.080063(50), 0.067167(all), eps: 0.950000
annealing and learning are started
DR: 0.065495(50), 0.066613(all), eps: 0.925000
DR: 0.079995(50), 0.069942(all), eps: 0.900000


  return n_r / road.sum(), n_w / walkway.sum()


DR: 0.059025(50), 0.067767(all), eps: 0.875000
DR: 0.065733(50), 0.067429(all), eps: 0.850000
DR: 0.085026(50), 0.068954(all), eps: 0.822500
DR: 0.071282(50), 0.069240(all), eps: 0.797500
DR: 0.064933(50), 0.068103(all), eps: 0.770000
DR: 0.066246(50), 0.067921(all), eps: 0.745000
DR: 0.088005(50), 0.069209(all), eps: 0.717500
DR: 0.075877(50), 0.069750(all), eps: 0.692500
DR: 0.064263(50), 0.068822(all), eps: 0.665000
DR: 0.102470(50), 0.071155(all), eps: 0.640000
DR: 0.073732(50), 0.070863(all), eps: 0.612500
DR: 0.083950(50), 0.071655(all), eps: 0.587500
DR: 0.099844(50), 0.072885(all), eps: 0.560000
DR: 0.064126(50), 0.072414(all), eps: 0.535000
DR: 0.074819(50), 0.072169(all), eps: 0.507500
DR: 0.082649(50), 0.072675(all), eps: 0.482500
DR: 0.062917(50), 0.071924(all), eps: 0.455000
DR: 0.087961(50), 0.072627(all), eps: 0.430000
DR: 0.066951(50), 0.072111(all), eps: 0.402500
DR: 0.051494(50), 0.071283(all), eps: 0.377500
DR: 0.055664(50), 0.070431(all), eps: 0.350000
DR: 0.052236(

DR: 0.031074(50), 0.046101(all), eps: 0.200000
DR: 0.027255(50), 0.045981(all), eps: 0.200000
DR: 0.056440(50), 0.046036(all), eps: 0.200000
DR: 0.043044(50), 0.045999(all), eps: 0.200000
DR: 0.031958(50), 0.045926(all), eps: 0.200000
DR: 0.034976(50), 0.045846(all), eps: 0.200000
DR: 0.056454(50), 0.045901(all), eps: 0.200000
DR: 0.032749(50), 0.045814(all), eps: 0.200000
DR: 0.054322(50), 0.045857(all), eps: 0.200000
DR: 0.031551(50), 0.045761(all), eps: 0.200000
DR: 0.029351(50), 0.045679(all), eps: 0.200000
DR: 0.040470(50), 0.045662(all), eps: 0.200000
DR: 0.032825(50), 0.045598(all), eps: 0.200000
DR: 0.054365(50), 0.045635(all), eps: 0.200000
DR: 0.029920(50), 0.045558(all), eps: 0.200000
DR: 0.038100(50), 0.045502(all), eps: 0.200000
DR: 0.049400(50), 0.045521(all), eps: 0.200000
DR: 0.041017(50), 0.045477(all), eps: 0.200000
DR: 0.034546(50), 0.045424(all), eps: 0.200000
DR: 0.040897(50), 0.045384(all), eps: 0.200000
DR: 0.027869(50), 0.045301(all), eps: 0.200000
DR: 0.046267(

DR: 0.027692(50), 0.040669(all), eps: 0.200000
DR: 0.037615(50), 0.040661(all), eps: 0.200000
DR: 0.029904(50), 0.040627(all), eps: 0.200000
DR: 0.033814(50), 0.040609(all), eps: 0.200000
DR: 0.042272(50), 0.040606(all), eps: 0.200000
DR: 0.030933(50), 0.040580(all), eps: 0.200000
DR: 0.042062(50), 0.040577(all), eps: 0.200000
DR: 0.045709(50), 0.040590(all), eps: 0.200000
DR: 0.037815(50), 0.040572(all), eps: 0.200000
DR: 0.031683(50), 0.040549(all), eps: 0.200000
DR: 0.028690(50), 0.040507(all), eps: 0.200000
DR: 0.036675(50), 0.040497(all), eps: 0.200000
DR: 0.028599(50), 0.040458(all), eps: 0.200000
DR: 0.039857(50), 0.040456(all), eps: 0.200000
DR: 0.044220(50), 0.040456(all), eps: 0.200000
DR: 0.050928(50), 0.040483(all), eps: 0.200000
DR: 0.060078(50), 0.040526(all), eps: 0.200000
DR: 0.017368(50), 0.040467(all), eps: 0.200000
DR: 0.027654(50), 0.040428(all), eps: 0.200000
DR: 0.023355(50), 0.040385(all), eps: 0.200000
DR: 0.039731(50), 0.040378(all), eps: 0.200000
DR: 0.023948(

DR: 0.034408(50), 0.038165(all), eps: 0.200000
DR: 0.040784(50), 0.038162(all), eps: 0.200000
DR: 0.039112(50), 0.038164(all), eps: 0.200000
DR: 0.052838(50), 0.038185(all), eps: 0.200000
DR: 0.036199(50), 0.038181(all), eps: 0.200000
DR: 0.026892(50), 0.038155(all), eps: 0.200000
DR: 0.019373(50), 0.038122(all), eps: 0.200000
DR: 0.024069(50), 0.038094(all), eps: 0.200000
DR: 0.038903(50), 0.038096(all), eps: 0.200000
DR: 0.023767(50), 0.038066(all), eps: 0.200000
DR: 0.023982(50), 0.038041(all), eps: 0.200000
DR: 0.018695(50), 0.038001(all), eps: 0.200000
DR: 0.027015(50), 0.037982(all), eps: 0.200000
DR: 0.020067(50), 0.037947(all), eps: 0.200000
DR: 0.036194(50), 0.037943(all), eps: 0.200000
DR: 0.022396(50), 0.037910(all), eps: 0.200000
DR: 0.047866(50), 0.037927(all), eps: 0.200000
DR: 0.049009(50), 0.037944(all), eps: 0.200000
DR: 0.022724(50), 0.037918(all), eps: 0.200000
DR: 0.038910(50), 0.037914(all), eps: 0.200000
DR: 0.053853(50), 0.037942(all), eps: 0.200000
DR: 0.023791(

DR: 0.029401(50), 0.036576(all), eps: 0.200000
DR: 0.040526(50), 0.036581(all), eps: 0.200000
DR: 0.028359(50), 0.036568(all), eps: 0.200000
DR: 0.026654(50), 0.036555(all), eps: 0.200000
DR: 0.029161(50), 0.036540(all), eps: 0.200000
DR: 0.053885(50), 0.036563(all), eps: 0.200000
DR: 0.047150(50), 0.036573(all), eps: 0.200000
DR: 0.040699(50), 0.036578(all), eps: 0.200000
DR: 0.026570(50), 0.036562(all), eps: 0.200000
DR: 0.050646(50), 0.036580(all), eps: 0.200000
DR: 0.035104(50), 0.036575(all), eps: 0.200000
DR: 0.037609(50), 0.036576(all), eps: 0.200000
DR: 0.026878(50), 0.036561(all), eps: 0.200000
DR: 0.043080(50), 0.036569(all), eps: 0.200000
DR: 0.044315(50), 0.036575(all), eps: 0.200000
DR: 0.027423(50), 0.036563(all), eps: 0.200000
DR: 0.027959(50), 0.036547(all), eps: 0.200000
DR: 0.030672(50), 0.036539(all), eps: 0.200000
DR: 0.034021(50), 0.036532(all), eps: 0.200000
DR: 0.037390(50), 0.036533(all), eps: 0.200000
DR: 0.025654(50), 0.036515(all), eps: 0.200000
DR: 0.031180(

DR: 0.039520(50), 0.036714(all), eps: 0.200000
DR: 0.048808(50), 0.036723(all), eps: 0.200000
DR: 0.034855(50), 0.036721(all), eps: 0.200000
DR: 0.025436(50), 0.036706(all), eps: 0.200000
DR: 0.053604(50), 0.036724(all), eps: 0.200000
DR: 0.066088(50), 0.036763(all), eps: 0.200000
DR: 0.048017(50), 0.036775(all), eps: 0.200000
DR: 0.057295(50), 0.036796(all), eps: 0.200000
DR: 0.045363(50), 0.036805(all), eps: 0.200000
DR: 0.046546(50), 0.036813(all), eps: 0.200000
DR: 0.064233(50), 0.036842(all), eps: 0.200000
DR: 0.058673(50), 0.036862(all), eps: 0.200000
DR: 0.053041(50), 0.036879(all), eps: 0.200000
DR: 0.072293(50), 0.036913(all), eps: 0.200000
DR: 0.044605(50), 0.036921(all), eps: 0.200000
DR: 0.056901(50), 0.036939(all), eps: 0.200000
DR: 0.040717(50), 0.036943(all), eps: 0.200000
DR: 0.050129(50), 0.036954(all), eps: 0.200000
DR: 0.069470(50), 0.036988(all), eps: 0.200000
DR: 0.054461(50), 0.037008(all), eps: 0.200000
DR: 0.052520(50), 0.037025(all), eps: 0.200000
DR: 0.036441(

DR: 0.035245(50), 0.040389(all), eps: 0.200000
DR: 0.035241(50), 0.040385(all), eps: 0.200000
DR: 0.030038(50), 0.040373(all), eps: 0.200000
DR: 0.056716(50), 0.040388(all), eps: 0.200000
DR: 0.034407(50), 0.040380(all), eps: 0.200000
DR: 0.040510(50), 0.040380(all), eps: 0.200000
DR: 0.042243(50), 0.040383(all), eps: 0.200000
DR: 0.036723(50), 0.040380(all), eps: 0.200000
DR: 0.032440(50), 0.040369(all), eps: 0.200000
DR: 0.034021(50), 0.040364(all), eps: 0.200000
DR: 0.050565(50), 0.040370(all), eps: 0.200000
DR: 0.037740(50), 0.040368(all), eps: 0.200000
DR: 0.037290(50), 0.040361(all), eps: 0.200000
DR: 0.051557(50), 0.040371(all), eps: 0.200000
DR: 0.040043(50), 0.040368(all), eps: 0.200000
DR: 0.061768(50), 0.040387(all), eps: 0.200000
DR: 0.049716(50), 0.040392(all), eps: 0.200000
DR: 0.046895(50), 0.040398(all), eps: 0.200000
DR: 0.034084(50), 0.040389(all), eps: 0.200000
DR: 0.053886(50), 0.040401(all), eps: 0.200000
DR: 0.023922(50), 0.040383(all), eps: 0.200000
DR: 0.044917(

DR: 0.042599(50), 0.039881(all), eps: 0.200000
DR: 0.047691(50), 0.039884(all), eps: 0.200000
DR: 0.034369(50), 0.039880(all), eps: 0.200000
DR: 0.029781(50), 0.039869(all), eps: 0.200000
DR: 0.035469(50), 0.039866(all), eps: 0.200000
DR: 0.023330(50), 0.039852(all), eps: 0.200000
DR: 0.040215(50), 0.039852(all), eps: 0.200000
DR: 0.047854(50), 0.039855(all), eps: 0.200000
DR: 0.029636(50), 0.039847(all), eps: 0.200000
DR: 0.046111(50), 0.039850(all), eps: 0.200000
DR: 0.044876(50), 0.039853(all), eps: 0.200000
DR: 0.034609(50), 0.039846(all), eps: 0.200000
DR: 0.063462(50), 0.039864(all), eps: 0.200000
DR: 0.056188(50), 0.039874(all), eps: 0.200000
DR: 0.057564(50), 0.039888(all), eps: 0.200000
DR: 0.048887(50), 0.039893(all), eps: 0.200000
DR: 0.064231(50), 0.039912(all), eps: 0.200000
DR: 0.040805(50), 0.039910(all), eps: 0.200000
DR: 0.041619(50), 0.039911(all), eps: 0.200000
DR: 0.041998(50), 0.039918(all), eps: 0.200000
DR: 0.054029(50), 0.039929(all), eps: 0.200000
DR: 0.050461(

DR: 0.029238(50), 0.039880(all), eps: 0.200000
DR: 0.028231(50), 0.039872(all), eps: 0.200000
DR: 0.030547(50), 0.039864(all), eps: 0.200000
DR: 0.044645(50), 0.039868(all), eps: 0.200000
DR: 0.035940(50), 0.039864(all), eps: 0.200000
DR: 0.041001(50), 0.039864(all), eps: 0.200000
DR: 0.026443(50), 0.039853(all), eps: 0.200000
DR: 0.033978(50), 0.039849(all), eps: 0.200000
DR: 0.034488(50), 0.039843(all), eps: 0.200000
DR: 0.032425(50), 0.039838(all), eps: 0.200000
DR: 0.026548(50), 0.039828(all), eps: 0.200000
DR: 0.032754(50), 0.039823(all), eps: 0.200000
DR: 0.028327(50), 0.039813(all), eps: 0.200000
DR: 0.032870(50), 0.039809(all), eps: 0.200000
DR: 0.030459(50), 0.039805(all), eps: 0.200000
DR: 0.024489(50), 0.039795(all), eps: 0.200000
DR: 0.029868(50), 0.039786(all), eps: 0.200000
DR: 0.023004(50), 0.039774(all), eps: 0.200000
DR: 0.022817(50), 0.039760(all), eps: 0.200000
DR: 0.022613(50), 0.039749(all), eps: 0.200000
DR: 0.023507(50), 0.039736(all), eps: 0.200000
DR: 0.025976(

DR: 0.028855(50), 0.039259(all), eps: 0.200000
DR: 0.036443(50), 0.039255(all), eps: 0.200000
DR: 0.027593(50), 0.039248(all), eps: 0.200000
DR: 0.028487(50), 0.039239(all), eps: 0.200000
DR: 0.031088(50), 0.039234(all), eps: 0.200000
DR: 0.038127(50), 0.039231(all), eps: 0.200000
DR: 0.043570(50), 0.039234(all), eps: 0.200000
DR: 0.037390(50), 0.039231(all), eps: 0.200000
DR: 0.036562(50), 0.039229(all), eps: 0.200000
DR: 0.051667(50), 0.039235(all), eps: 0.200000
DR: 0.040250(50), 0.039235(all), eps: 0.200000
DR: 0.025057(50), 0.039224(all), eps: 0.200000
DR: 0.036146(50), 0.039222(all), eps: 0.200000
DR: 0.042435(50), 0.039222(all), eps: 0.200000
DR: 0.025084(50), 0.039214(all), eps: 0.200000
DR: 0.038005(50), 0.039213(all), eps: 0.200000
DR: 0.034782(50), 0.039210(all), eps: 0.200000
DR: 0.047443(50), 0.039213(all), eps: 0.200000
DR: 0.072879(50), 0.039233(all), eps: 0.200000
DR: 0.040072(50), 0.039232(all), eps: 0.200000
DR: 0.056529(50), 0.039243(all), eps: 0.200000
DR: 0.061781(

DR: 0.080866(50), 0.040467(all), eps: 0.200000
DR: 0.054271(50), 0.040474(all), eps: 0.200000
DR: 0.064326(50), 0.040486(all), eps: 0.200000
DR: 0.085176(50), 0.040510(all), eps: 0.200000
DR: 0.056387(50), 0.040517(all), eps: 0.200000
DR: 0.054470(50), 0.040525(all), eps: 0.200000
DR: 0.054638(50), 0.040534(all), eps: 0.200000
DR: 0.080307(50), 0.040556(all), eps: 0.200000
DR: 0.076362(50), 0.040573(all), eps: 0.200000
DR: 0.038315(50), 0.040572(all), eps: 0.200000
DR: 0.065055(50), 0.040584(all), eps: 0.200000
DR: 0.055759(50), 0.040592(all), eps: 0.200000
DR: 0.079160(50), 0.040612(all), eps: 0.200000
DR: 0.053832(50), 0.040619(all), eps: 0.200000
DR: 0.057475(50), 0.040627(all), eps: 0.200000
DR: 0.108437(50), 0.040663(all), eps: 0.200000
DR: 0.057127(50), 0.040670(all), eps: 0.200000
DR: 0.089606(50), 0.040696(all), eps: 0.200000
DR: 0.070983(50), 0.040711(all), eps: 0.200000
DR: 0.065521(50), 0.040724(all), eps: 0.200000
DR: 0.062732(50), 0.040738(all), eps: 0.200000
DR: 0.066261(

DR: 0.072311(50), 0.042811(all), eps: 0.200000
DR: 0.090281(50), 0.042836(all), eps: 0.200000
DR: 0.054575(50), 0.042842(all), eps: 0.200000
DR: 0.053005(50), 0.042846(all), eps: 0.200000
DR: 0.089691(50), 0.042869(all), eps: 0.200000
DR: 0.069176(50), 0.042880(all), eps: 0.200000
DR: 0.077005(50), 0.042897(all), eps: 0.200000
DR: 0.089111(50), 0.042918(all), eps: 0.200000
DR: 0.085905(50), 0.042939(all), eps: 0.200000
DR: 0.075184(50), 0.042955(all), eps: 0.200000
DR: 0.074286(50), 0.042970(all), eps: 0.200000
DR: 0.076171(50), 0.042984(all), eps: 0.200000
DR: 0.086284(50), 0.043005(all), eps: 0.200000
DR: 0.099742(50), 0.043032(all), eps: 0.200000
DR: 0.078948(50), 0.043049(all), eps: 0.200000
DR: 0.077321(50), 0.043064(all), eps: 0.200000
DR: 0.066935(50), 0.043076(all), eps: 0.200000
DR: 0.060442(50), 0.043083(all), eps: 0.200000
DR: 0.078646(50), 0.043100(all), eps: 0.200000
DR: 0.082555(50), 0.043119(all), eps: 0.200000
DR: 0.074834(50), 0.043135(all), eps: 0.200000
DR: 0.086811(

DR: 0.058035(50), 0.044601(all), eps: 0.200000
DR: 0.077819(50), 0.044616(all), eps: 0.200000
DR: 0.040651(50), 0.044612(all), eps: 0.200000
DR: 0.064425(50), 0.044621(all), eps: 0.200000
DR: 0.054068(50), 0.044624(all), eps: 0.200000
DR: 0.066129(50), 0.044633(all), eps: 0.200000
DR: 0.038335(50), 0.044630(all), eps: 0.200000
DR: 0.059963(50), 0.044636(all), eps: 0.200000
DR: 0.054456(50), 0.044639(all), eps: 0.200000
DR: 0.043822(50), 0.044638(all), eps: 0.200000
DR: 0.087450(50), 0.044656(all), eps: 0.200000
DR: 0.074183(50), 0.044669(all), eps: 0.200000
DR: 0.035504(50), 0.044664(all), eps: 0.200000
DR: 0.035046(50), 0.044660(all), eps: 0.200000
DR: 0.052805(50), 0.044665(all), eps: 0.200000
DR: 0.062371(50), 0.044673(all), eps: 0.200000
DR: 0.061299(50), 0.044678(all), eps: 0.200000
DR: 0.072684(50), 0.044691(all), eps: 0.200000
DR: 0.058144(50), 0.044695(all), eps: 0.200000
DR: 0.070746(50), 0.044707(all), eps: 0.200000
DR: 0.093133(50), 0.044727(all), eps: 0.200000
DR: 0.065711(

In [None]:
old["reward"]