This experiment, higher learning rate. Larger rollouts.

In [1]:
import deep_rl

In [2]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [3]:
import torch
from torch.nn import functional as F
from torch.autograd import Variable
from torch import nn, optim
import torch.utils.data

# load as dask array
import time

import logging
import sys
import os
import glob
import numpy as np
import datetime
from matplotlib import pyplot as plt
from tqdm import tqdm_notebook as tqdm

In [4]:
from deep_rl.utils.logger import get_logger, get_default_log_dir

from deep_rl.network.network_heads import CategoricalActorCriticNet, QuantileNet, OptionCriticNet, DeterministicActorCriticNet, GaussianActorCriticNet
from deep_rl.network.network_bodies import FCBody
from deep_rl.utils.normalizer import RunningStatsNormalizer, RescaleNormalizer
from deep_rl.component.task import ParallelizedTask

In [5]:
from world_models_sonic.models.vae import VAE5, loss_function_vae
from world_models_sonic.helpers.summarize import TorchSummarizeDf
from world_models_sonic.models.rnn import MDNRNN
from world_models_sonic.models.inverse_model import InverseModel2
from world_models_sonic.models.world_model import WorldModel
from world_models_sonic.custom_envs.env import make_env
from world_models_sonic.custom_envs.wrappers import RandomGameReset
from world_models_sonic import config
from world_models_sonic.helpers.deep_rl import PPOAgent, run_iterations, SonicWorldModelDeepRL, CategoricalWorldActorCriticNet, Config

Importing 0 potential games...
Imported 0 games


# Init

In [6]:

env_name = 'sonic256'
z_dim = 512  # latent dimensions
channels = 3

# RNN
action_dim = 10
image_size = 128

verbose = True  # Set this true to render (and make it go slower)

NAME = 'RNN_v3b_128im_512z_1512_v6k_VAE5_all_noframestack'
ppo_save_file = './outputs/{NAME}/PPO_512z_all_g.pkl'.format(NAME=NAME)
cuda = torch.cuda.is_available()

In [7]:
# Logging
if not os.path.isdir('./outputs/{NAME}'.format(NAME=NAME)):
    os.makedirs('./outputs/{NAME}'.format(NAME=NAME))

# Log to file and stream
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logger = logging.getLogger(NAME)

log_dir = log_dir='./outputs/{NAME}'.format(NAME=NAME)
print(log_dir)

deep_rl_logger = get_logger(
    NAME[-10:],
    file_name='deep_rl_ppo.log',
    level=logging.INFO,
    log_dir='./outputs/{NAME}'.format(NAME=NAME), )

./outputs/RNN_v3b_128im_512z_1512_v6k_VAE5_all_noframestack


# World model

In [8]:
# Load VAE
# TODO swap z and k dim, since it's inconsistent with other models
vae = VAE5(image_size=image_size, z_dim=128, conv_dim=64, code_dim=8, k_dim=z_dim, channels=channels)
    
# Load MDRNN
action_dim, hidden_size, n_mixture, temp = action_dim, z_dim*2, 5, 1.0

mdnrnn = MDNRNN(z_dim, action_dim, hidden_size, n_mixture, temp)
    
finv = InverseModel2(z_dim, action_dim, hidden_size=64)
    
world_model = WorldModel(vae, mdnrnn, finv, logger=deep_rl_logger, lambda_vae_kld=1 / 12., lambda_finv=1/200, lambda_vae=1/8, lambda_loss=1000)
world_model = world_model.train()
if cuda:
    world_model = world_model.cuda()

In [9]:
# Optimiser for world models
import torch.optim.lr_scheduler
torch.cuda.empty_cache()
optimizer = optim.Adam(world_model.parameters(), lr=4e-5)

world_model.optimizer = optimizer

# Train

In [10]:
# batch: We have two differen't batch sizes, determined by your gpu:
# VAE: rollout_length * world_model_batch_size which is how much VAE data you can get in your gpu, for my 8GB gpu it was ~50
# PPO: (config.rollout_length*config.num_workers)/config.num_mini_batches: how much PPO data you can git for me it was ~500


z_state_dim=world_model.mdnrnn.z_dim + world_model.mdnrnn.hidden_size


def task_fn(log_dir):
    return SonicWorldModelDeepRL(
        env_fn=lambda: RandomGameReset(make_env(
            'sonic', max_episode_steps=4000, to_gray=False, image_size=image_size)),
        log_dir=log_dir,
        verbose=verbose
    )

config = Config()

verbose = False  # Set this true to render (and make it go slower)
config.num_workers = 1 if verbose else 10
config.task_fn = lambda: ParallelizedTask(
    task_fn, config.num_workers, single_process=config.num_workers == 1)
config.optimizer_fn = lambda params: torch.optim.Adam(params, 1e-4, eps=1e-6)
config.network_fn = lambda state_dim, action_dim: CategoricalWorldActorCriticNet(
    state_dim, action_dim, FCBody(z_state_dim, hidden_units=(64, 64), gate=F.relu), gpu=0 if cuda else -1, world_model_fn=lambda: world_model,
    render=(config.num_workers==1 and verbose),
    z_shape=(32, 16)
)
# see params here https://github.com/openai/retro-baselines/blob/master/agents/ppo2_agent.py
config.discount = 0.99
config.logger = deep_rl_logger
config.use_gae = True
config.gae_tau = 0.95
config.entropy_weight = 0.02
config.value_weight = 0.5
config.gradient_clip = 0.5  # Watch grad_norm in tensorboard and set this to upper quartile to avoid clipping too much
config.rollout_length = 36
config.optimization_epochs = 3
config.num_mini_batches = 8
config.ppo_ratio_clip = 0.1
config.iteration_log_interval = 1

config.train_world_model = False
if not config.train_world_model:
    config.rollout_length*=12  # More can fit in batch
config.world_model_batch_size = 2

# I tuned these so the intrinsic reward was 1) within an order of magnitude of the extrinsic. 2) smaller, 3) negative when stuck
# TODO use reward normalisers etc to reduce the need for these hyperparameters
config.curiosity = False
config.curiosity_only = False
config.curiosity_weight = 0.01
config.curiosity_boredom = 2 # how many standard deviations above the mean does it's new experience need to be, so it's not bored
# config.reward_normalizer = RunningStatsNormalizer()
# config.intrinsic_reward_normalizer = RunningStatsNormalizer()
agent = PPOAgent(config)

print(('total rollout of ', config.rollout_length*config.num_workers))
print(('made of sequences of length', (config.rollout_length)))
print(('ppo mini batch', (config.rollout_length*config.num_workers)//config.num_mini_batches))


if os.path.isfile(ppo_save_file):
    print(('loading ppo_save_file', ppo_save_file, 'modified', time.ctime(os.path.getmtime(ppo_save_file))))
    agent.load(ppo_save_file)
    
#     # also load normalizers
    state_dict = torch.load(ppo_save_file.replace('.pkl', '-intrinsic_reward_normalizer.pkl'))
    config.intrinsic_reward_normalizer.load_state_dict(state_dict)

    state_dict = torch.load(ppo_save_file.replace('.pkl', '-reward_normalizer.pkl'))
    config.reward_normalizer.load_state_dict(state_dict)
else:
    print("couldn't find save")

game: SonicAndKnuckles3-Genesis state: MarbleGardenZone.Act1
game: SonicTheHedgehog2-Genesis state: EmeraldHillZone.Act2
game: SonicTheHedgehog-Genesis state: ScrapBrainZone.Act2
game: SonicTheHedgehog2-Genesis state: MetropolisZone.Act1
game: SonicTheHedgehog-Genesis state: SpringYardZone.Act2
game: SonicAndKnuckles3-Genesis state: SandopolisZone.Act1
game: SonicAndKnuckles3-Genesis state: DeathEggZone.Act1
game: SonicTheHedgehog2-Genesis state: EmeraldHillZone.Act1
game: SonicAndKnuckles3-Genesis state: AngelIslandZone.Act1
game: SonicAndKnuckles3-Genesis state: DeathEggZone.Act1
('total rollout of ', 4320)
('made of sequences of length', 432)
('ppo mini batch', 540)
('loading ppo_save_file', './outputs/RNN_v3b_128im_512z_1512_v6k_VAE5_all_noframestack/PPO_512z_all_g.pkl', 'modified', 'Fri Jun 22 16:57:50 2018')


Process ProcessWrapper-8:
Process ProcessWrapper-4:
Process ProcessWrapper-5:
Traceback (most recent call last):
Process ProcessWrapper-7:
Traceback (most recent call last):
Traceback (most recent call last):
Process ProcessWrapper-9:
Process ProcessWrapper-2:
  File "/home/wassname/.pyenv/versions/3.5.3/lib/python3.5/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()
Process ProcessWrapper-6:
Process ProcessWrapper-3:
Process ProcessWrapper-1:
  File "/home/wassname/.pyenv/versions/3.5.3/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/home/wassname/.pyenv/versions/3.5.3/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/home/wassname/.pyenv/versions/3.5.3/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/media/oldhome/wassname/Documents/projects/retro_sonic_comp/DeepRL/deep_rl/component/task.py", line 177, in run
    op, data = self.pi

In [11]:
# # DEBUG

# # reset from checkpoint
# agent.load('./outputs/RNN_v3b_128im_512z_1512_v6j_VAE5_all/PPO_512z_all_g-20180606_02-06-59.pkl')

# # # Reset just rnn
# world_model.mdnrnn = MDNRNN(z_dim, action_dim, hidden_size, n_mixture, temp)
# world_model.mdnrnn.cuda()

# # if we want to reset the actor
# from deep_rl.network.network_heads import ActorCriticNet
# agent.network.network = ActorCriticNet(agent.network.z_state_dim, action_dim, FCBody(z_state_dim, hidden_units=(64, 64), gate=F.relu), None, None)
# agent.network.network.cuda()


# world_model.finv = finv = InverseModel(z_dim, action_dim, hidden_size=z_dim*2)
# world_model.finv.cuda()

In [12]:
# agent.load('/home/wassname/Documents/projects/retro_sonic_comp/world-models-pytorch/outputs/RNN_v3b_128im_512z_1512_v6k_VAE5_all_noframestack/PPO_512z_all_g-20180622_06-38-24.pkl')

In [13]:
print(""""
# To monitor with tensorboard at http://localhost:6006/
cd ./outputs/{NAME}/
tensorboard  --logdir .
""".format(NAME=NAME))

"
# To monitor with tensorboard at http://localhost:6006/
cd ./outputs/RNN_v3b_128im_512z_1512_v6k_VAE5_all_noframestack/
tensorboard  --logdir .



In [14]:
try:
    run_iterations(agent, log_dir=log_dir)
except KeyboardInterrupt:
    # Close the environment
    if config.num_workers == 1:
        agent.task.tasks[0].env.close()
    else:
        [t.close() for t in agent.task.tasks]

    print("saving", ppo_save_file)
    agent.save(ppo_save_file)
#     torch.save(config.intrinsic_reward_normalizer.state_dict(), ppo_save_file.replace('.pkl', '-intrinsic_reward_normalizer.pkl'))
#     torch.save(config.reward_normalizer.state_dict(), ppo_save_file.replace('.pkl', '-reward_normalizer.pkl'))

    # Backup since it sometimes get's corrupted
    ts = datetime.datetime.utcnow().strftime('%Y%m%d_%H-%M-%S')
    print("saving backup",
          ppo_save_file.replace('.pkl', '-%s.pkl' % ts),)
    agent.save(ppo_save_file.replace('.pkl', '-%s.pkl' % ts))
    raise

2018-06-22 16:58:54,533 - framestack - INFO: action counts: {0: 334, 1: 89, 2: 1027, 3: 46, 4: 1041, 5: 200, 6: 282, 7: 73, 8: 893, 9: 335}
2018-06-22 16:58:54,558 - framestack - INFO: steps: 4320, steps/s: 88.58
  epoch reward:       0.0000/1.2000/7.9751 [n=10] (min/mean/max)
  running reward:     0.0000/1.2000/7.9751 [n=10]

2018-06-22 16:59:44,564 - framestack - INFO: action counts: {0: 317, 1: 91, 2: 1105, 3: 45, 4: 1009, 5: 230, 6: 231, 7: 79, 8: 902, 9: 311}
2018-06-22 16:59:44,590 - framestack - INFO: steps: 8640, steps/s: 87.46
  epoch reward:       0.0000/1.2000/7.9751 [n=10] (min/mean/max)
  running reward:     0.0000/1.2000/7.9751 [n=20]

2018-06-22 17:00:34,102 - framestack - INFO: action counts: {0: 316, 1: 83, 2: 1063, 3: 35, 4: 1028, 5: 218, 6: 262, 7: 65, 8: 891, 9: 359}
2018-06-22 17:00:34,124 - framestack - INFO: steps: 12960, steps/s: 87.38
  epoch reward:       0.0000/1.2000/7.9751 [n=10] (min/mean/max)
  running reward:     0.0000/1.2000/7.9751 [n=30]

2018-06-22 1

2018-06-22 17:19:41,007 - framestack - INFO: action counts: {0: 319, 1: 112, 2: 974, 3: 61, 4: 975, 5: 253, 6: 313, 7: 112, 8: 828, 9: 373}
2018-06-22 17:19:41,032 - framestack - INFO: steps: 112320, steps/s: 86.74
  epoch reward:       2.0606/8.7370/26.6185 [n=10] (min/mean/max)
  running reward:     0.0000/6.9152/26.6303 [n=260]

2018-06-22 17:20:32,032 - framestack - INFO: action counts: {0: 321, 1: 99, 2: 1013, 3: 74, 4: 948, 5: 244, 6: 319, 7: 113, 8: 825, 9: 364}
2018-06-22 17:20:32,063 - framestack - INFO: steps: 116640, steps/s: 86.67
  epoch reward:       2.0606/8.7370/26.6185 [n=10] (min/mean/max)
  running reward:     0.0000/6.9826/26.6303 [n=270]

2018-06-22 17:21:20,807 - framestack - INFO: action counts: {0: 305, 1: 118, 2: 986, 3: 72, 4: 977, 5: 214, 6: 294, 7: 98, 8: 877, 9: 379}
2018-06-22 17:21:20,829 - framestack - INFO: steps: 120960, steps/s: 86.74
  epoch reward:       2.8419/7.4618/26.6185 [n=10] (min/mean/max)
  running reward:     0.0000/6.9997/26.6303 [n=280]


2018-06-22 17:40:36,117 - framestack - INFO: action counts: {0: 290, 1: 76, 2: 1133, 3: 41, 4: 1091, 5: 181, 6: 218, 7: 68, 8: 920, 9: 302}
2018-06-22 17:40:36,140 - framestack - INFO: steps: 220320, steps/s: 86.43
  epoch reward:       1.8440/12.4674/27.9323 [n=10] (min/mean/max)
  running reward:     0.0000/8.7931/28.3140 [n=500]

2018-06-22 17:41:24,722 - framestack - INFO: action counts: {0: 250, 1: 54, 2: 1177, 3: 32, 4: 1072, 5: 194, 6: 222, 7: 59, 8: 955, 9: 305}
2018-06-22 17:41:24,746 - framestack - INFO: steps: 224640, steps/s: 86.47
  epoch reward:       1.8440/11.5406/27.9323 [n=10] (min/mean/max)
  running reward:     0.0000/8.9999/28.3140 [n=500]

2018-06-22 17:42:12,597 - framestack - INFO: action counts: {0: 252, 1: 76, 2: 1183, 3: 35, 4: 1047, 5: 175, 6: 242, 7: 58, 8: 929, 9: 323}
2018-06-22 17:42:12,620 - framestack - INFO: steps: 228960, steps/s: 86.55
  epoch reward:       1.8440/11.5406/27.9323 [n=10] (min/mean/max)
  running reward:     0.0000/9.2067/28.3140 [n=5

2018-06-22 18:00:48,975 - framestack - INFO: action counts: {0: 211, 1: 55, 2: 1305, 3: 18, 4: 1083, 5: 161, 6: 199, 7: 29, 8: 961, 9: 298}
2018-06-22 18:00:49,003 - framestack - INFO: steps: 328320, steps/s: 87.30
  epoch reward:       2.9088/9.3046/26.1445 [n=10] (min/mean/max)
  running reward:     0.7183/10.8482/28.3140 [n=500]

2018-06-22 18:01:37,013 - framestack - INFO: action counts: {0: 179, 1: 56, 2: 1358, 3: 21, 4: 1071, 5: 117, 6: 186, 7: 39, 8: 1011, 9: 282}
2018-06-22 18:01:37,040 - framestack - INFO: steps: 332640, steps/s: 87.34
  epoch reward:       2.9088/8.8037/26.1445 [n=10] (min/mean/max)
  running reward:     0.7183/10.8495/28.3140 [n=500]

2018-06-22 18:02:25,382 - framestack - INFO: action counts: {0: 194, 1: 44, 2: 1345, 3: 14, 4: 1152, 5: 141, 6: 186, 7: 36, 8: 951, 9: 257}
2018-06-22 18:02:25,404 - framestack - INFO: steps: 336960, steps/s: 87.36
  epoch reward:       2.9088/8.8037/26.1445 [n=10] (min/mean/max)
  running reward:     0.7183/10.8763/28.3140 [n=

2018-06-22 18:20:21,739 - framestack - INFO: steps: 432000, steps/s: 87.57
  epoch reward:       3.2713/12.1956/45.7083 [n=10] (min/mean/max)
  running reward:     0.7183/10.7414/45.7083 [n=500]

2018-06-22 18:21:09,543 - framestack - INFO: action counts: {0: 258, 1: 54, 2: 1214, 3: 25, 4: 1172, 5: 140, 6: 190, 7: 46, 8: 915, 9: 306}
2018-06-22 18:21:09,570 - framestack - INFO: steps: 436320, steps/s: 87.60
  epoch reward:       3.2713/13.2667/45.7083 [n=10] (min/mean/max)
  running reward:     0.7183/10.7574/45.7083 [n=500]

2018-06-22 18:21:56,162 - framestack - INFO: action counts: {0: 229, 1: 47, 2: 1208, 3: 30, 4: 1171, 5: 165, 6: 203, 7: 45, 8: 932, 9: 290}
2018-06-22 18:21:56,190 - framestack - INFO: steps: 440640, steps/s: 87.65
  epoch reward:       5.1114/14.2116/45.7083 [n=10] (min/mean/max)
  running reward:     0.7183/10.8108/45.7083 [n=500]

2018-06-22 18:22:41,961 - framestack - INFO: action counts: {0: 230, 1: 38, 2: 1224, 3: 26, 4: 1159, 5: 154, 6: 210, 7: 44, 8: 940, 

2018-06-22 18:39:25,817 - framestack - INFO: action counts: {0: 257, 1: 61, 2: 1211, 3: 23, 4: 1088, 5: 144, 6: 208, 7: 51, 8: 960, 9: 317}
2018-06-22 18:39:25,839 - framestack - INFO: steps: 540000, steps/s: 88.94
  epoch reward:       1.8836/15.3900/46.7847 [n=10] (min/mean/max)
  running reward:     1.3836/9.8850/46.7847 [n=500]

2018-06-22 18:40:11,840 - framestack - INFO: action counts: {0: 295, 1: 70, 2: 1146, 3: 34, 4: 1065, 5: 170, 6: 203, 7: 53, 8: 946, 9: 338}
2018-06-22 18:40:11,865 - framestack - INFO: steps: 544320, steps/s: 88.98
  epoch reward:       1.8836/14.6341/46.7847 [n=10] (min/mean/max)
  running reward:     1.3836/9.9916/46.7847 [n=500]

2018-06-22 18:40:57,046 - framestack - INFO: action counts: {0: 268, 1: 74, 2: 1153, 3: 13, 4: 1068, 5: 164, 6: 187, 7: 68, 8: 993, 9: 332}
2018-06-22 18:40:57,070 - framestack - INFO: steps: 548640, steps/s: 89.03
  epoch reward:       0.7183/13.5595/46.7847 [n=10] (min/mean/max)
  running reward:     0.7183/10.0867/46.7847 [n=

2018-06-22 18:58:40,223 - framestack - INFO: action counts: {0: 252, 1: 52, 2: 1148, 3: 17, 4: 1080, 5: 141, 6: 175, 7: 49, 8: 1071, 9: 335}
2018-06-22 18:58:40,243 - framestack - INFO: steps: 648000, steps/s: 89.72
  epoch reward:       0.1048/18.7466/49.3889 [n=10] (min/mean/max)
  running reward:     0.1048/10.0752/49.3889 [n=500]

2018-06-22 18:59:26,358 - framestack - INFO: action counts: {0: 264, 1: 50, 2: 1181, 3: 23, 4: 1049, 5: 138, 6: 217, 7: 57, 8: 1029, 9: 312}
2018-06-22 18:59:26,385 - framestack - INFO: steps: 652320, steps/s: 89.74
  epoch reward:       2.7224/19.5385/49.3889 [n=10] (min/mean/max)
  running reward:     0.1048/10.2007/49.3889 [n=500]

2018-06-22 19:00:12,604 - framestack - INFO: action counts: {0: 260, 1: 43, 2: 1213, 3: 22, 4: 1037, 5: 155, 6: 173, 7: 52, 8: 1053, 9: 312}
2018-06-22 19:00:12,627 - framestack - INFO: steps: 656640, steps/s: 89.77
  epoch reward:       2.7224/19.4662/48.7525 [n=10] (min/mean/max)
  running reward:     0.1048/10.3058/49.388

2018-06-22 19:16:51,940 - framestack - INFO: steps: 751680, steps/s: 90.45
  epoch reward:       1.1403/14.5077/47.7578 [n=10] (min/mean/max)
  running reward:     0.1048/11.4869/49.3889 [n=500]

2018-06-22 19:17:37,316 - framestack - INFO: action counts: {0: 280, 1: 53, 2: 1110, 3: 33, 4: 1073, 5: 159, 6: 208, 7: 76, 8: 1037, 9: 291}
2018-06-22 19:17:37,342 - framestack - INFO: steps: 756000, steps/s: 90.47
  epoch reward:       5.1114/21.1137/47.7578 [n=10] (min/mean/max)
  running reward:     0.1048/11.6014/49.3889 [n=500]

2018-06-22 19:18:23,375 - framestack - INFO: action counts: {0: 268, 1: 48, 2: 1184, 3: 30, 4: 1040, 5: 135, 6: 184, 7: 59, 8: 1080, 9: 292}
2018-06-22 19:18:23,398 - framestack - INFO: steps: 760320, steps/s: 90.49
  epoch reward:       5.1114/22.4926/47.7578 [n=10] (min/mean/max)
  running reward:     0.1048/11.7586/49.3889 [n=500]

2018-06-22 19:19:07,928 - framestack - INFO: action counts: {0: 291, 1: 72, 2: 1090, 3: 35, 4: 1041, 5: 161, 6: 208, 7: 50, 8: 106

2018-06-22 19:36:03,207 - framestack - INFO: action counts: {0: 269, 1: 61, 2: 1047, 3: 33, 4: 1126, 5: 167, 6: 198, 7: 63, 8: 1060, 9: 296}
2018-06-22 19:36:03,232 - framestack - INFO: steps: 859680, steps/s: 90.87
  epoch reward:       2.1456/12.5799/47.8236 [n=10] (min/mean/max)
  running reward:     0.1048/14.3042/49.3889 [n=500]

2018-06-22 19:36:49,252 - framestack - INFO: action counts: {0: 271, 1: 51, 2: 1056, 3: 34, 4: 1158, 5: 122, 6: 211, 7: 66, 8: 1075, 9: 276}
2018-06-22 19:36:49,273 - framestack - INFO: steps: 864000, steps/s: 90.89
  epoch reward:       2.1456/12.8376/47.8236 [n=10] (min/mean/max)
  running reward:     1.1403/14.1860/49.3889 [n=500]

2018-06-22 19:37:37,254 - framestack - INFO: action counts: {0: 240, 1: 44, 2: 1075, 3: 26, 4: 1134, 5: 126, 6: 189, 7: 62, 8: 1166, 9: 258}
2018-06-22 19:37:37,278 - framestack - INFO: steps: 868320, steps/s: 90.88
  epoch reward:       2.1456/12.3716/47.8236 [n=10] (min/mean/max)
  running reward:     1.1403/14.0427/49.224

2018-06-22 19:54:37,040 - framestack - INFO: steps: 963360, steps/s: 91.12
  epoch reward:       6.3695/12.2395/26.5950 [n=10] (min/mean/max)
  running reward:     1.1403/13.1054/49.2242 [n=500]

2018-06-22 19:55:23,435 - framestack - INFO: action counts: {0: 258, 1: 51, 2: 1146, 3: 31, 4: 1075, 5: 144, 6: 212, 7: 56, 8: 1079, 9: 268}
2018-06-22 19:55:23,457 - framestack - INFO: steps: 967680, steps/s: 91.13
  epoch reward:       0.7183/11.4443/26.5950 [n=10] (min/mean/max)
  running reward:     0.7183/13.0441/49.2242 [n=500]

2018-06-22 19:56:08,855 - framestack - INFO: action counts: {0: 265, 1: 48, 2: 1123, 3: 22, 4: 1074, 5: 119, 6: 171, 7: 61, 8: 1092, 9: 345}
2018-06-22 19:56:08,877 - framestack - INFO: steps: 972000, steps/s: 91.14
  epoch reward:       0.7183/11.4443/26.5950 [n=10] (min/mean/max)
  running reward:     0.7183/12.8507/49.2242 [n=500]

2018-06-22 19:56:53,964 - framestack - INFO: action counts: {0: 256, 1: 43, 2: 1209, 3: 26, 4: 1056, 5: 138, 6: 181, 7: 65, 8: 107

2018-06-22 20:13:39,581 - framestack - INFO: action counts: {0: 270, 1: 74, 2: 1085, 3: 30, 4: 964, 5: 154, 6: 222, 7: 65, 8: 1102, 9: 354}
2018-06-22 20:13:39,605 - framestack - INFO: steps: 1071360, steps/s: 91.47
  epoch reward:       0.4673/15.8662/49.0361 [n=10] (min/mean/max)
  running reward:     0.4673/10.2832/49.0361 [n=500]

2018-06-22 20:14:25,795 - framestack - INFO: action counts: {0: 282, 1: 80, 2: 1016, 3: 52, 4: 995, 5: 150, 6: 225, 7: 78, 8: 1102, 9: 340}
2018-06-22 20:14:25,819 - framestack - INFO: steps: 1075680, steps/s: 91.47
  epoch reward:       0.4673/16.6936/49.0361 [n=10] (min/mean/max)
  running reward:     0.4673/10.3655/49.0361 [n=500]

2018-06-22 20:15:11,512 - framestack - INFO: action counts: {0: 315, 1: 94, 2: 1001, 3: 42, 4: 975, 5: 170, 6: 240, 7: 73, 8: 1055, 9: 355}
2018-06-22 20:15:11,534 - framestack - INFO: steps: 1080000, steps/s: 91.49
  epoch reward:       1.8382/17.0323/49.0361 [n=10] (min/mean/max)
  running reward:     0.4673/10.4493/49.036

2018-06-22 20:31:57,149 - framestack - INFO: steps: 1175040, steps/s: 91.74
  epoch reward:       0.1048/10.4338/48.1758 [n=10] (min/mean/max)
  running reward:     0.1048/10.5615/49.0361 [n=500]

2018-06-22 20:32:43,622 - framestack - INFO: action counts: {0: 275, 1: 66, 2: 1080, 3: 31, 4: 994, 5: 130, 6: 178, 7: 59, 8: 1211, 9: 296}
2018-06-22 20:32:43,652 - framestack - INFO: steps: 1179360, steps/s: 91.74
  epoch reward:       0.1048/10.4338/48.1758 [n=10] (min/mean/max)
  running reward:     0.1048/10.5254/49.0361 [n=500]

2018-06-22 20:33:31,083 - framestack - INFO: action counts: {0: 247, 1: 69, 2: 1115, 3: 47, 4: 1071, 5: 124, 6: 164, 7: 66, 8: 1144, 9: 273}
2018-06-22 20:33:31,106 - framestack - INFO: steps: 1183680, steps/s: 91.74
  epoch reward:       0.1048/10.2862/48.1758 [n=10] (min/mean/max)
  running reward:     0.1048/10.5023/49.0361 [n=500]

2018-06-22 20:34:17,046 - framestack - INFO: action counts: {0: 224, 1: 73, 2: 1077, 3: 32, 4: 984, 5: 140, 6: 206, 7: 57, 8: 12

2018-06-22 20:51:11,370 - framestack - INFO: action counts: {0: 254, 1: 31, 2: 1195, 3: 20, 4: 1133, 5: 90, 6: 149, 7: 38, 8: 1112, 9: 298}
2018-06-22 20:51:11,395 - framestack - INFO: steps: 1283040, steps/s: 91.89
  epoch reward:       1.1360/5.4247/7.0388 [n=10] (min/mean/max)
  running reward:     0.1048/10.5761/49.0361 [n=500]

2018-06-22 20:51:56,832 - framestack - INFO: action counts: {0: 237, 1: 28, 2: 1173, 3: 21, 4: 1096, 5: 94, 6: 142, 7: 45, 8: 1220, 9: 264}
2018-06-22 20:51:56,855 - framestack - INFO: steps: 1287360, steps/s: 91.90
  epoch reward:       1.1360/5.8872/8.1216 [n=10] (min/mean/max)
  running reward:     0.1048/10.3765/49.0361 [n=500]

2018-06-22 20:52:42,253 - framestack - INFO: action counts: {0: 236, 1: 30, 2: 1171, 3: 23, 4: 1062, 5: 86, 6: 154, 7: 38, 8: 1229, 9: 291}
2018-06-22 20:52:42,272 - framestack - INFO: steps: 1291680, steps/s: 91.91
  epoch reward:       1.1360/7.1479/22.0513 [n=10] (min/mean/max)
  running reward:     0.1048/10.1856/49.0361 [n=

2018-06-22 21:09:36,329 - framestack - INFO: steps: 1386720, steps/s: 92.04
  epoch reward:       2.7279/9.0023/20.2559 [n=10] (min/mean/max)
  running reward:     0.1048/10.7638/48.1758 [n=500]

2018-06-22 21:10:20,465 - framestack - INFO: action counts: {0: 245, 1: 60, 2: 1124, 3: 33, 4: 1083, 5: 100, 6: 224, 7: 71, 8: 1065, 9: 315}
2018-06-22 21:10:20,493 - framestack - INFO: steps: 1391040, steps/s: 92.06
  epoch reward:       2.7279/9.0023/20.2559 [n=10] (min/mean/max)
  running reward:     0.1048/10.7352/48.1758 [n=500]

2018-06-22 21:11:05,299 - framestack - INFO: action counts: {0: 263, 1: 47, 2: 1040, 3: 37, 4: 1166, 5: 105, 6: 187, 7: 62, 8: 1117, 9: 296}
2018-06-22 21:11:05,326 - framestack - INFO: steps: 1395360, steps/s: 92.07
  epoch reward:       2.7279/9.0318/20.2559 [n=10] (min/mean/max)
  running reward:     0.1048/10.7071/48.1758 [n=500]

2018-06-22 21:11:49,729 - framestack - INFO: action counts: {0: 250, 1: 51, 2: 1085, 3: 25, 4: 1164, 5: 105, 6: 185, 7: 52, 8: 112

2018-06-22 21:28:41,470 - framestack - INFO: action counts: {0: 236, 1: 34, 2: 1092, 3: 28, 4: 1231, 5: 101, 6: 189, 7: 50, 8: 1090, 9: 269}
2018-06-22 21:28:41,494 - framestack - INFO: steps: 1494720, steps/s: 92.21
  epoch reward:       0.4673/7.2865/18.2030 [n=10] (min/mean/max)
  running reward:     0.4673/9.9841/47.6783 [n=500]

2018-06-22 21:29:27,626 - framestack - INFO: action counts: {0: 243, 1: 45, 2: 1092, 3: 33, 4: 1233, 5: 106, 6: 197, 7: 55, 8: 1054, 9: 262}
2018-06-22 21:29:27,650 - framestack - INFO: steps: 1499040, steps/s: 92.21
  epoch reward:       0.4673/7.1654/18.2030 [n=10] (min/mean/max)
  running reward:     0.4673/10.0189/47.6783 [n=500]

2018-06-22 21:30:14,674 - framestack - INFO: action counts: {0: 261, 1: 40, 2: 1125, 3: 29, 4: 1264, 5: 83, 6: 162, 7: 59, 8: 1039, 9: 258}
2018-06-22 21:30:14,697 - framestack - INFO: steps: 1503360, steps/s: 92.21
  epoch reward:       0.4673/5.4630/13.0706 [n=10] (min/mean/max)
  running reward:     0.4673/10.0104/47.6783 

2018-06-22 21:47:08,399 - framestack - INFO: steps: 1598400, steps/s: 92.30
  epoch reward:       0.1048/11.7775/24.9221 [n=10] (min/mean/max)
  running reward:     0.1048/8.8703/28.4084 [n=500]

2018-06-22 21:47:53,738 - framestack - INFO: action counts: {0: 268, 1: 61, 2: 1016, 3: 28, 4: 1306, 5: 111, 6: 174, 7: 62, 8: 980, 9: 314}
2018-06-22 21:47:53,760 - framestack - INFO: steps: 1602720, steps/s: 92.31
  epoch reward:       0.1048/10.1171/24.3344 [n=10] (min/mean/max)
  running reward:     0.1048/8.8926/28.4084 [n=500]

2018-06-22 21:48:39,646 - framestack - INFO: action counts: {0: 263, 1: 63, 2: 1014, 3: 32, 4: 1337, 5: 62, 6: 182, 7: 47, 8: 1019, 9: 301}
2018-06-22 21:48:39,669 - framestack - INFO: steps: 1607040, steps/s: 92.32
  epoch reward:       0.1048/10.1171/24.3344 [n=10] (min/mean/max)
  running reward:     0.1048/8.9149/28.4084 [n=500]

2018-06-22 21:49:25,060 - framestack - INFO: action counts: {0: 272, 1: 40, 2: 1023, 3: 34, 4: 1277, 5: 100, 6: 141, 7: 74, 8: 1044,

2018-06-22 22:06:10,841 - framestack - INFO: action counts: {0: 282, 1: 47, 2: 1053, 3: 17, 4: 1378, 5: 96, 6: 150, 7: 48, 8: 975, 9: 274}
2018-06-22 22:06:10,866 - framestack - INFO: steps: 1706400, steps/s: 92.45
  epoch reward:       2.9134/13.2261/48.5225 [n=10] (min/mean/max)
  running reward:     0.1048/10.4036/48.5225 [n=500]

2018-06-22 22:06:56,840 - framestack - INFO: action counts: {0: 303, 1: 52, 2: 1048, 3: 30, 4: 1263, 5: 102, 6: 168, 7: 50, 8: 998, 9: 306}
2018-06-22 22:06:56,867 - framestack - INFO: steps: 1710720, steps/s: 92.45
  epoch reward:       2.9134/12.2724/48.5225 [n=10] (min/mean/max)
  running reward:     0.1048/10.5033/48.5225 [n=500]

2018-06-22 22:07:41,188 - framestack - INFO: action counts: {0: 301, 1: 51, 2: 1054, 3: 37, 4: 1295, 5: 81, 6: 160, 7: 66, 8: 1021, 9: 254}
2018-06-22 22:07:41,214 - framestack - INFO: steps: 1715040, steps/s: 92.46
  epoch reward:       3.1907/14.5166/48.5225 [n=10] (min/mean/max)
  running reward:     0.1048/10.6503/48.5225

2018-06-22 22:24:33,873 - framestack - INFO: steps: 1810080, steps/s: 92.54
  epoch reward:       3.8890/11.2600/17.0720 [n=10] (min/mean/max)
  running reward:     0.1048/10.7222/48.5225 [n=500]

2018-06-22 22:25:21,007 - framestack - INFO: action counts: {0: 265, 1: 41, 2: 1106, 3: 19, 4: 1175, 5: 101, 6: 161, 7: 49, 8: 1113, 9: 290}
2018-06-22 22:25:21,034 - framestack - INFO: steps: 1814400, steps/s: 92.54
  epoch reward:       3.8890/11.2600/17.0720 [n=10] (min/mean/max)
  running reward:     0.1048/10.7118/48.5225 [n=500]

2018-06-22 22:26:08,787 - framestack - INFO: action counts: {0: 271, 1: 53, 2: 1062, 3: 24, 4: 1165, 5: 99, 6: 155, 7: 62, 8: 1145, 9: 284}
2018-06-22 22:26:08,810 - framestack - INFO: steps: 1818720, steps/s: 92.53
  epoch reward:       3.8890/12.3644/26.3647 [n=10] (min/mean/max)
  running reward:     0.1048/10.7568/48.5225 [n=500]

2018-06-22 22:26:54,208 - framestack - INFO: action counts: {0: 280, 1: 47, 2: 1074, 3: 31, 4: 1128, 5: 117, 6: 194, 7: 57, 8: 1

2018-06-22 22:43:37,803 - framestack - INFO: action counts: {0: 343, 1: 55, 2: 1181, 3: 29, 4: 1059, 5: 110, 6: 186, 7: 49, 8: 1023, 9: 285}
2018-06-22 22:43:37,829 - framestack - INFO: steps: 1918080, steps/s: 92.65
  epoch reward:       0.7183/12.4712/27.9287 [n=10] (min/mean/max)
  running reward:     0.4673/9.9746/48.5225 [n=500]

2018-06-22 22:44:22,031 - framestack - INFO: action counts: {0: 312, 1: 53, 2: 1139, 3: 43, 4: 1127, 5: 110, 6: 194, 7: 76, 8: 991, 9: 275}
2018-06-22 22:44:22,054 - framestack - INFO: steps: 1922400, steps/s: 92.66
  epoch reward:       0.7183/11.3865/27.9287 [n=10] (min/mean/max)
  running reward:     0.4673/9.9378/48.5225 [n=500]

2018-06-22 22:45:08,066 - framestack - INFO: action counts: {0: 307, 1: 66, 2: 1152, 3: 36, 4: 1066, 5: 129, 6: 183, 7: 58, 8: 1032, 9: 291}
2018-06-22 22:45:08,090 - framestack - INFO: steps: 1926720, steps/s: 92.66
  epoch reward:       0.7183/10.7026/27.9287 [n=10] (min/mean/max)
  running reward:     0.4673/9.9064/48.5225

2018-06-22 23:01:54,980 - framestack - INFO: steps: 2021760, steps/s: 92.74
  epoch reward:       0.7183/7.2901/24.9431 [n=10] (min/mean/max)
  running reward:     0.4673/10.1721/27.9287 [n=500]

2018-06-22 23:02:41,028 - framestack - INFO: action counts: {0: 343, 1: 43, 2: 1119, 3: 36, 4: 1145, 5: 133, 6: 209, 7: 66, 8: 896, 9: 330}
2018-06-22 23:02:41,059 - framestack - INFO: steps: 2026080, steps/s: 92.75
  epoch reward:       0.4673/4.6031/10.0247 [n=10] (min/mean/max)
  running reward:     0.4673/10.0390/27.9287 [n=500]

2018-06-22 23:03:26,254 - framestack - INFO: action counts: {0: 302, 1: 43, 2: 1203, 3: 37, 4: 1126, 5: 123, 6: 173, 7: 69, 8: 877, 9: 367}
2018-06-22 23:03:26,277 - framestack - INFO: steps: 2030400, steps/s: 92.75
  epoch reward:       0.4673/4.9429/11.2994 [n=10] (min/mean/max)
  running reward:     0.4673/9.9127/27.9287 [n=500]

2018-06-22 23:04:13,592 - framestack - INFO: action counts: {0: 360, 1: 67, 2: 1149, 3: 45, 4: 1111, 5: 127, 6: 192, 7: 65, 8: 913, 9

2018-06-22 23:21:07,019 - framestack - INFO: action counts: {0: 302, 1: 59, 2: 1087, 3: 47, 4: 1303, 5: 127, 6: 156, 7: 59, 8: 889, 9: 291}
2018-06-22 23:21:07,043 - framestack - INFO: steps: 2129760, steps/s: 92.80
  epoch reward:       0.7183/5.4055/8.6454 [n=10] (min/mean/max)
  running reward:     0.1048/9.1826/27.9336 [n=500]

2018-06-22 23:21:52,622 - framestack - INFO: action counts: {0: 271, 1: 54, 2: 1115, 3: 34, 4: 1261, 5: 125, 6: 164, 7: 51, 8: 949, 9: 296}
2018-06-22 23:21:52,643 - framestack - INFO: steps: 2134080, steps/s: 92.80
  epoch reward:       0.1048/4.6342/8.6454 [n=10] (min/mean/max)
  running reward:     0.1048/9.0259/27.9336 [n=500]

2018-06-22 23:22:37,998 - framestack - INFO: action counts: {0: 292, 1: 55, 2: 1067, 3: 31, 4: 1287, 5: 90, 6: 165, 7: 51, 8: 1011, 9: 271}
2018-06-22 23:22:38,024 - framestack - INFO: steps: 2138400, steps/s: 92.80
  epoch reward:       0.1048/4.6342/8.6454 [n=10] (min/mean/max)
  running reward:     0.1048/8.8908/27.9336 [n=500]

2018-06-22 23:40:15,019 - framestack - INFO: action counts: {0: 295, 1: 68, 2: 1165, 3: 37, 4: 1210, 5: 140, 6: 159, 7: 69, 8: 878, 9: 299}
2018-06-22 23:40:15,043 - framestack - INFO: steps: 2237760, steps/s: 93.07
  epoch reward:       1.9148/9.9275/26.2840 [n=10] (min/mean/max)
  running reward:     0.1048/9.0752/48.2639 [n=500]

2018-06-22 23:41:02,247 - framestack - INFO: action counts: {0: 267, 1: 55, 2: 1173, 3: 48, 4: 1229, 5: 130, 6: 178, 7: 50, 8: 847, 9: 343}
2018-06-22 23:41:02,270 - framestack - INFO: steps: 2242080, steps/s: 93.08
  epoch reward:       1.9148/9.9275/26.2840 [n=10] (min/mean/max)
  running reward:     0.1048/9.1817/48.2639 [n=500]

2018-06-22 23:41:47,500 - framestack - INFO: action counts: {0: 297, 1: 55, 2: 1211, 3: 39, 4: 1181, 5: 148, 6: 156, 7: 59, 8: 860, 9: 314}
2018-06-22 23:41:47,523 - framestack - INFO: steps: 2246400, steps/s: 93.10
  epoch reward:       1.9148/9.7135/26.2840 [n=10] (min/mean/max)
  running reward:     0.1048/9.2771/48.2639 [n=5

2018-06-22 23:58:43,556 - framestack - INFO: steps: 2341440, steps/s: 93.45
  epoch reward:       1.6429/6.1744/14.0283 [n=10] (min/mean/max)
  running reward:     0.1048/10.0867/48.2639 [n=500]

2018-06-22 23:59:29,269 - framestack - INFO: action counts: {0: 293, 1: 47, 2: 1261, 3: 42, 4: 1229, 5: 112, 6: 136, 7: 52, 8: 845, 9: 303}
2018-06-22 23:59:29,292 - framestack - INFO: steps: 2345760, steps/s: 93.47
  epoch reward:       1.6429/6.1744/14.0283 [n=10] (min/mean/max)
  running reward:     0.1048/10.1021/48.2639 [n=500]

2018-06-23 00:00:15,222 - framestack - INFO: action counts: {0: 293, 1: 61, 2: 1270, 3: 27, 4: 1271, 5: 118, 6: 152, 7: 60, 8: 798, 9: 270}
2018-06-23 00:00:15,250 - framestack - INFO: steps: 2350080, steps/s: 93.49
  epoch reward:       1.6429/6.9977/16.9297 [n=10] (min/mean/max)
  running reward:     0.1048/10.1493/48.2639 [n=500]

2018-06-23 00:01:00,695 - framestack - INFO: action counts: {0: 262, 1: 37, 2: 1306, 3: 36, 4: 1269, 5: 106, 6: 149, 7: 63, 8: 809, 

2018-06-23 00:17:46,662 - framestack - INFO: action counts: {0: 314, 1: 45, 2: 1227, 3: 41, 4: 1174, 5: 134, 6: 182, 7: 72, 8: 837, 9: 294}
2018-06-23 00:17:46,687 - framestack - INFO: steps: 2449440, steps/s: 93.75
  epoch reward:       0.7183/14.1097/48.0483 [n=10] (min/mean/max)
  running reward:     0.4673/10.3720/48.0483 [n=500]

2018-06-23 00:18:33,187 - framestack - INFO: action counts: {0: 315, 1: 65, 2: 1185, 3: 60, 4: 1131, 5: 138, 6: 228, 7: 72, 8: 797, 9: 329}
2018-06-23 00:18:33,214 - framestack - INFO: steps: 2453760, steps/s: 93.76
  epoch reward:       0.7183/15.5955/48.0483 [n=10] (min/mean/max)
  running reward:     0.4673/10.4854/48.0483 [n=500]

2018-06-23 00:19:19,618 - framestack - INFO: action counts: {0: 307, 1: 68, 2: 1140, 3: 50, 4: 1211, 5: 134, 6: 183, 7: 88, 8: 835, 9: 304}
2018-06-23 00:19:19,637 - framestack - INFO: steps: 2458080, steps/s: 93.77
  epoch reward:       6.3255/15.7001/48.0483 [n=10] (min/mean/max)
  running reward:     0.4673/10.6008/48.048

2018-06-23 00:36:14,073 - framestack - INFO: steps: 2553120, steps/s: 94.00
  epoch reward:       0.1048/10.2322/27.9323 [n=10] (min/mean/max)
  running reward:     0.1048/10.6416/49.0550 [n=500]

2018-06-23 00:37:00,263 - framestack - INFO: action counts: {0: 300, 1: 78, 2: 1213, 3: 34, 4: 1222, 5: 125, 6: 200, 7: 74, 8: 774, 9: 300}
2018-06-23 00:37:00,290 - framestack - INFO: steps: 2557440, steps/s: 94.01
  epoch reward:       2.0826/14.7221/47.3378 [n=10] (min/mean/max)
  running reward:     0.1048/10.8126/49.0550 [n=500]

2018-06-23 00:37:45,899 - framestack - INFO: action counts: {0: 277, 1: 66, 2: 1233, 3: 41, 4: 1195, 5: 135, 6: 185, 7: 63, 8: 836, 9: 289}
2018-06-23 00:37:45,923 - framestack - INFO: steps: 2561760, steps/s: 94.02
  epoch reward:       2.0826/14.7221/47.3378 [n=10] (min/mean/max)
  running reward:     0.1048/10.9835/49.0550 [n=500]

2018-06-23 00:38:31,021 - framestack - INFO: action counts: {0: 279, 1: 51, 2: 1236, 3: 46, 4: 1191, 5: 124, 6: 208, 7: 54, 8: 81

2018-06-23 00:55:29,701 - framestack - INFO: action counts: {0: 285, 1: 45, 2: 1209, 3: 38, 4: 1238, 5: 109, 6: 165, 7: 69, 8: 862, 9: 300}
2018-06-23 00:55:29,726 - framestack - INFO: steps: 2661120, steps/s: 94.06
  epoch reward:       6.5675/13.8073/27.9336 [n=10] (min/mean/max)
  running reward:     0.1048/11.7182/49.0550 [n=500]

2018-06-23 00:56:13,686 - framestack - INFO: action counts: {0: 288, 1: 49, 2: 1239, 3: 44, 4: 1269, 5: 115, 6: 166, 7: 64, 8: 825, 9: 261}
2018-06-23 00:56:13,712 - framestack - INFO: steps: 2665440, steps/s: 94.07
  epoch reward:       6.5675/13.8073/27.9336 [n=10] (min/mean/max)
  running reward:     0.1048/11.7121/49.0550 [n=500]

2018-06-23 00:57:00,750 - framestack - INFO: action counts: {0: 234, 1: 51, 2: 1146, 3: 39, 4: 1294, 5: 110, 6: 183, 7: 62, 8: 901, 9: 300}
2018-06-23 00:57:00,774 - framestack - INFO: steps: 2669760, steps/s: 94.06
  epoch reward:       6.4220/12.4966/27.9336 [n=10] (min/mean/max)
  running reward:     0.1048/11.6502/49.055

2018-06-23 01:13:51,308 - framestack - INFO: steps: 2764800, steps/s: 94.06
  epoch reward:       1.8528/11.4876/18.3592 [n=10] (min/mean/max)
  running reward:     0.1048/11.2856/47.3378 [n=500]

2018-06-23 01:14:36,768 - framestack - INFO: action counts: {0: 228, 1: 44, 2: 1161, 3: 20, 4: 1383, 5: 90, 6: 157, 7: 65, 8: 856, 9: 316}
2018-06-23 01:14:36,796 - framestack - INFO: steps: 2769120, steps/s: 94.07
  epoch reward:       1.8528/13.0865/25.6730 [n=10] (min/mean/max)
  running reward:     0.1048/11.3426/47.3378 [n=500]

2018-06-23 01:15:22,658 - framestack - INFO: action counts: {0: 220, 1: 37, 2: 1184, 3: 20, 4: 1381, 5: 86, 6: 158, 7: 50, 8: 895, 9: 289}
2018-06-23 01:15:22,687 - framestack - INFO: steps: 2773440, steps/s: 94.07
  epoch reward:       0.7183/12.9730/25.6730 [n=10] (min/mean/max)
  running reward:     0.1048/11.3077/47.3378 [n=500]

2018-06-23 01:16:09,030 - framestack - INFO: action counts: {0: 235, 1: 34, 2: 1209, 3: 20, 4: 1311, 5: 111, 6: 154, 7: 50, 8: 905,

2018-06-23 01:33:01,661 - framestack - INFO: action counts: {0: 254, 1: 59, 2: 1150, 3: 48, 4: 1175, 5: 138, 6: 184, 7: 72, 8: 956, 9: 284}
2018-06-23 01:33:01,685 - framestack - INFO: steps: 2872800, steps/s: 94.03
  epoch reward:       4.0323/15.0955/47.8556 [n=10] (min/mean/max)
  running reward:     0.7183/12.4993/48.8833 [n=500]

2018-06-23 01:33:47,546 - framestack - INFO: action counts: {0: 276, 1: 62, 2: 1167, 3: 51, 4: 1151, 5: 125, 6: 198, 7: 76, 8: 922, 9: 292}
2018-06-23 01:33:47,569 - framestack - INFO: steps: 2877120, steps/s: 94.02
  epoch reward:       4.0323/15.0955/47.8556 [n=10] (min/mean/max)
  running reward:     0.7183/12.5251/48.8833 [n=500]

2018-06-23 01:34:32,397 - framestack - INFO: action counts: {0: 246, 1: 68, 2: 1127, 3: 51, 4: 1199, 5: 133, 6: 198, 7: 81, 8: 877, 9: 340}
2018-06-23 01:34:32,422 - framestack - INFO: steps: 2881440, steps/s: 94.02
  epoch reward:       4.0323/12.6852/33.3959 [n=10] (min/mean/max)
  running reward:     0.7183/12.5026/48.883

2018-06-23 01:51:25,484 - framestack - INFO: steps: 2976480, steps/s: 94.01
  epoch reward:       0.7183/8.1927/17.1103 [n=10] (min/mean/max)
  running reward:     0.1048/12.4496/49.4372 [n=500]

2018-06-23 01:52:10,708 - framestack - INFO: action counts: {0: 272, 1: 56, 2: 1125, 3: 37, 4: 1143, 5: 132, 6: 198, 7: 79, 8: 947, 9: 331}
2018-06-23 01:52:10,732 - framestack - INFO: steps: 2980800, steps/s: 94.02
  epoch reward:       0.7183/6.4116/16.5352 [n=10] (min/mean/max)
  running reward:     0.1048/12.3480/49.4372 [n=500]

2018-06-23 01:52:57,189 - framestack - INFO: action counts: {0: 269, 1: 48, 2: 1167, 3: 33, 4: 1133, 5: 143, 6: 197, 7: 81, 8: 928, 9: 321}
2018-06-23 01:52:57,212 - framestack - INFO: steps: 2985120, steps/s: 94.01
  epoch reward:       0.7183/6.4116/16.5352 [n=10] (min/mean/max)
  running reward:     0.1048/12.2145/49.4372 [n=500]

2018-06-23 01:53:43,409 - framestack - INFO: action counts: {0: 269, 1: 60, 2: 1171, 3: 40, 4: 1162, 5: 115, 6: 154, 7: 62, 8: 979, 

2018-06-23 02:10:33,723 - framestack - INFO: action counts: {0: 213, 1: 41, 2: 1259, 3: 26, 4: 1232, 5: 113, 6: 162, 7: 65, 8: 922, 9: 287}
2018-06-23 02:10:33,746 - framestack - INFO: steps: 3084480, steps/s: 94.05
  epoch reward:       0.4701/13.3348/29.9340 [n=10] (min/mean/max)
  running reward:     0.1048/10.7692/49.4372 [n=500]

2018-06-23 02:11:20,261 - framestack - INFO: action counts: {0: 234, 1: 50, 2: 1266, 3: 32, 4: 1170, 5: 125, 6: 179, 7: 62, 8: 894, 9: 308}
2018-06-23 02:11:20,288 - framestack - INFO: steps: 3088800, steps/s: 94.04
  epoch reward:       0.4701/13.3348/29.9340 [n=10] (min/mean/max)
  running reward:     0.1048/10.7340/49.4372 [n=500]

2018-06-23 02:12:06,276 - framestack - INFO: action counts: {0: 276, 1: 61, 2: 1195, 3: 40, 4: 1093, 5: 138, 6: 184, 7: 77, 8: 917, 9: 339}
2018-06-23 02:12:06,300 - framestack - INFO: steps: 3093120, steps/s: 94.04
  epoch reward:       0.4701/12.1249/29.9340 [n=10] (min/mean/max)
  running reward:     0.1048/10.6746/49.437

2018-06-23 02:28:46,714 - framestack - INFO: steps: 3188160, steps/s: 94.10
  epoch reward:       3.3070/13.2300/47.5631 [n=10] (min/mean/max)
  running reward:     0.1048/10.3376/48.6114 [n=500]

2018-06-23 02:29:33,147 - framestack - INFO: action counts: {0: 230, 1: 41, 2: 1213, 3: 26, 4: 1213, 5: 98, 6: 153, 7: 43, 8: 1013, 9: 290}
2018-06-23 02:29:33,171 - framestack - INFO: steps: 3192480, steps/s: 94.10
  epoch reward:       3.3070/13.2864/47.5631 [n=10] (min/mean/max)
  running reward:     0.1048/10.4395/48.6114 [n=500]

2018-06-23 02:30:17,638 - framestack - INFO: action counts: {0: 238, 1: 35, 2: 1223, 3: 42, 4: 1133, 5: 90, 6: 187, 7: 59, 8: 1019, 9: 294}
2018-06-23 02:30:17,662 - framestack - INFO: steps: 3196800, steps/s: 94.11
  epoch reward:       3.3070/13.6604/47.5631 [n=10] (min/mean/max)
  running reward:     0.1048/10.5845/48.6114 [n=500]

2018-06-23 02:31:02,254 - framestack - INFO: action counts: {0: 245, 1: 52, 2: 1180, 3: 32, 4: 1198, 5: 110, 6: 146, 7: 58, 8: 99

2018-06-23 02:47:52,591 - framestack - INFO: action counts: {0: 267, 1: 52, 2: 1194, 3: 30, 4: 1110, 5: 127, 6: 150, 7: 49, 8: 1032, 9: 309}
2018-06-23 02:47:52,614 - framestack - INFO: steps: 3296160, steps/s: 94.12
  epoch reward:       1.5754/15.8868/48.9278 [n=10] (min/mean/max)
  running reward:     0.1048/12.3085/48.9278 [n=500]

2018-06-23 02:48:39,210 - framestack - INFO: action counts: {0: 281, 1: 44, 2: 1137, 3: 48, 4: 1089, 5: 129, 6: 184, 7: 59, 8: 1013, 9: 336}
2018-06-23 02:48:39,232 - framestack - INFO: steps: 3300480, steps/s: 94.11
  epoch reward:       1.5754/16.6301/48.9278 [n=10] (min/mean/max)
  running reward:     0.1048/12.3744/48.9278 [n=500]

2018-06-23 02:49:25,858 - framestack - INFO: action counts: {0: 254, 1: 42, 2: 1128, 3: 47, 4: 1048, 5: 118, 6: 147, 7: 66, 8: 1117, 9: 353}
2018-06-23 02:49:25,883 - framestack - INFO: steps: 3304800, steps/s: 94.10
  epoch reward:       1.5754/12.0423/47.6067 [n=10] (min/mean/max)
  running reward:     0.1048/12.3485/48.

2018-06-23 03:06:18,378 - framestack - INFO: steps: 3399840, steps/s: 94.09
  epoch reward:       1.4097/9.0373/24.9710 [n=10] (min/mean/max)
  running reward:     1.4097/13.1981/48.9278 [n=500]

2018-06-23 03:07:04,117 - framestack - INFO: action counts: {0: 269, 1: 48, 2: 1220, 3: 39, 4: 1041, 5: 125, 6: 157, 7: 61, 8: 1002, 9: 358}
2018-06-23 03:07:04,144 - framestack - INFO: steps: 3404160, steps/s: 94.09
  epoch reward:       1.4097/9.3764/24.9710 [n=10] (min/mean/max)
  running reward:     1.4097/13.1211/48.9278 [n=500]

2018-06-23 03:07:50,680 - framestack - INFO: action counts: {0: 277, 1: 53, 2: 1264, 3: 33, 4: 1019, 5: 100, 6: 163, 7: 59, 8: 1022, 9: 330}
2018-06-23 03:07:50,706 - framestack - INFO: steps: 3408480, steps/s: 94.08
  epoch reward:       1.4097/9.9494/24.9710 [n=10] (min/mean/max)
  running reward:     1.4097/13.0543/48.9278 [n=500]

2018-06-23 03:08:37,765 - framestack - INFO: action counts: {0: 264, 1: 57, 2: 1226, 3: 43, 4: 1040, 5: 108, 6: 169, 7: 51, 8: 103

2018-06-23 03:25:36,722 - framestack - INFO: action counts: {0: 238, 1: 43, 2: 1153, 3: 43, 4: 1036, 5: 94, 6: 164, 7: 53, 8: 1166, 9: 330}
2018-06-23 03:25:36,747 - framestack - INFO: steps: 3507840, steps/s: 94.05
  epoch reward:       2.4636/7.4659/16.9571 [n=10] (min/mean/max)
  running reward:     0.7183/11.3252/48.9278 [n=500]

2018-06-23 03:26:23,966 - framestack - INFO: action counts: {0: 265, 1: 40, 2: 1171, 3: 43, 4: 1030, 5: 116, 6: 163, 7: 49, 8: 1078, 9: 365}
2018-06-23 03:26:23,995 - framestack - INFO: steps: 3512160, steps/s: 94.05
  epoch reward:       2.4636/7.4659/16.9571 [n=10] (min/mean/max)
  running reward:     0.7183/11.1568/48.9278 [n=500]

2018-06-23 03:27:10,337 - framestack - INFO: action counts: {0: 228, 1: 54, 2: 1099, 3: 40, 4: 1053, 5: 123, 6: 153, 7: 59, 8: 1143, 9: 368}
2018-06-23 03:27:10,366 - framestack - INFO: steps: 3516480, steps/s: 94.05
  epoch reward:       2.0410/7.6540/19.2211 [n=10] (min/mean/max)
  running reward:     0.7183/10.9773/48.6564

2018-06-23 03:44:05,349 - framestack - INFO: steps: 3611520, steps/s: 94.04
  epoch reward:       0.7183/10.7522/24.4562 [n=10] (min/mean/max)
  running reward:     0.7183/10.9851/48.6564 [n=500]

2018-06-23 03:44:52,344 - framestack - INFO: action counts: {0: 269, 1: 42, 2: 1097, 3: 45, 4: 1053, 5: 135, 6: 176, 7: 44, 8: 1124, 9: 335}
2018-06-23 03:44:52,368 - framestack - INFO: steps: 3615840, steps/s: 94.03
  epoch reward:       0.7183/11.5700/24.4562 [n=10] (min/mean/max)
  running reward:     0.7183/11.0357/48.6564 [n=500]

2018-06-23 03:45:38,911 - framestack - INFO: action counts: {0: 225, 1: 47, 2: 1142, 3: 43, 4: 1042, 5: 114, 6: 156, 7: 71, 8: 1140, 9: 340}
2018-06-23 03:45:38,937 - framestack - INFO: steps: 3620160, steps/s: 94.02
  epoch reward:       0.7183/13.5435/28.4084 [n=10] (min/mean/max)
  running reward:     0.7183/11.1191/48.6564 [n=500]

2018-06-23 03:46:23,587 - framestack - INFO: action counts: {0: 245, 1: 47, 2: 1165, 3: 40, 4: 1042, 5: 108, 6: 146, 7: 48, 8: 

2018-06-23 04:03:13,635 - framestack - INFO: action counts: {0: 224, 1: 31, 2: 1115, 3: 33, 4: 1048, 5: 105, 6: 138, 7: 46, 8: 1212, 9: 368}
2018-06-23 04:03:13,665 - framestack - INFO: steps: 3719520, steps/s: 94.03
  epoch reward:       1.5994/6.1801/11.6642 [n=10] (min/mean/max)
  running reward:     0.7183/11.4816/47.3078 [n=500]

2018-06-23 04:03:59,700 - framestack - INFO: action counts: {0: 214, 1: 49, 2: 1131, 3: 37, 4: 1061, 5: 94, 6: 156, 7: 49, 8: 1215, 9: 314}
2018-06-23 04:03:59,730 - framestack - INFO: steps: 3723840, steps/s: 94.04
  epoch reward:       1.5994/6.1801/11.6642 [n=10] (min/mean/max)
  running reward:     0.7183/11.4559/47.3078 [n=500]

2018-06-23 04:04:45,050 - framestack - INFO: action counts: {0: 230, 1: 41, 2: 1145, 3: 51, 4: 1044, 5: 92, 6: 154, 7: 56, 8: 1194, 9: 313}
2018-06-23 04:04:45,075 - framestack - INFO: steps: 3728160, steps/s: 94.03
  epoch reward:       1.5994/7.3923/17.8988 [n=10] (min/mean/max)
  running reward:     0.7183/11.4544/47.3078 

2018-06-23 04:21:30,068 - framestack - INFO: steps: 3823200, steps/s: 94.06
  epoch reward:       0.7183/10.1090/21.5764 [n=10] (min/mean/max)
  running reward:     0.1048/10.3075/47.3078 [n=500]

2018-06-23 04:22:15,718 - framestack - INFO: action counts: {0: 235, 1: 35, 2: 1017, 3: 28, 4: 1094, 5: 99, 6: 180, 7: 55, 8: 1246, 9: 331}
2018-06-23 04:22:15,741 - framestack - INFO: steps: 3827520, steps/s: 94.06
  epoch reward:       0.7183/8.4650/15.7156 [n=10] (min/mean/max)
  running reward:     0.1048/10.2617/47.3078 [n=500]

2018-06-23 04:23:02,387 - framestack - INFO: action counts: {0: 205, 1: 38, 2: 997, 3: 31, 4: 1098, 5: 95, 6: 143, 7: 55, 8: 1290, 9: 368}
2018-06-23 04:23:02,411 - framestack - INFO: steps: 3831840, steps/s: 94.05
  epoch reward:       2.9246/8.4445/15.7156 [n=10] (min/mean/max)
  running reward:     0.1048/10.1992/47.3078 [n=500]

2018-06-23 04:23:48,301 - framestack - INFO: action counts: {0: 201, 1: 40, 2: 1037, 3: 23, 4: 1100, 5: 100, 6: 132, 7: 56, 8: 1272,

2018-06-23 04:40:48,863 - framestack - INFO: action counts: {0: 222, 1: 39, 2: 1069, 3: 30, 4: 1022, 5: 88, 6: 153, 7: 52, 8: 1301, 9: 344}
2018-06-23 04:40:48,894 - framestack - INFO: steps: 3931200, steps/s: 93.98
  epoch reward:       2.1538/12.7366/27.9323 [n=10] (min/mean/max)
  running reward:     0.1048/9.1117/28.1271 [n=500]

2018-06-23 04:41:35,712 - framestack - INFO: action counts: {0: 247, 1: 52, 2: 978, 3: 41, 4: 1054, 5: 111, 6: 185, 7: 76, 8: 1245, 9: 331}
2018-06-23 04:41:35,745 - framestack - INFO: steps: 3935520, steps/s: 93.98
  epoch reward:       2.1538/12.7366/27.9323 [n=10] (min/mean/max)
  running reward:     0.1048/9.2428/28.1271 [n=500]

2018-06-23 04:42:23,278 - framestack - INFO: action counts: {0: 245, 1: 46, 2: 997, 3: 46, 4: 1057, 5: 105, 6: 177, 7: 70, 8: 1237, 9: 340}
2018-06-23 04:42:23,302 - framestack - INFO: steps: 3939840, steps/s: 93.98
  epoch reward:       1.2658/9.1300/25.2034 [n=10] (min/mean/max)
  running reward:     0.1048/9.3018/28.1271 [n

2018-06-23 05:00:21,325 - framestack - INFO: action counts: {0: 230, 1: 40, 2: 938, 3: 32, 4: 1151, 5: 92, 6: 153, 7: 48, 8: 1297, 9: 339}
2018-06-23 05:00:21,356 - framestack - INFO: steps: 4039200, steps/s: 93.90
  epoch reward:       0.7183/8.6697/19.0388 [n=10] (min/mean/max)
  running reward:     0.4673/8.4001/27.9336 [n=500]

2018-06-23 05:01:06,620 - framestack - INFO: action counts: {0: 230, 1: 38, 2: 918, 3: 37, 4: 1089, 5: 102, 6: 164, 7: 58, 8: 1326, 9: 358}
2018-06-23 05:01:06,645 - framestack - INFO: steps: 4043520, steps/s: 93.90
  epoch reward:       0.7183/11.4678/24.6672 [n=10] (min/mean/max)
  running reward:     0.4673/8.4601/27.9336 [n=500]

2018-06-23 05:01:50,613 - framestack - INFO: action counts: {0: 243, 1: 63, 2: 923, 3: 40, 4: 1125, 5: 113, 6: 164, 7: 56, 8: 1227, 9: 366}
2018-06-23 05:01:50,633 - framestack - INFO: steps: 4047840, steps/s: 93.91
  epoch reward:       0.7183/11.4678/24.6672 [n=10] (min/mean/max)
  running reward:     0.4673/8.5206/27.9336 [n=

2018-06-23 05:19:30,580 - framestack - INFO: action counts: {0: 216, 1: 43, 2: 873, 3: 49, 4: 1166, 5: 104, 6: 167, 7: 53, 8: 1258, 9: 391}
2018-06-23 05:19:30,609 - framestack - INFO: steps: 4147200, steps/s: 93.85
  epoch reward:       2.0826/7.2215/14.6863 [n=10] (min/mean/max)
  running reward:     0.4673/8.3370/27.9323 [n=500]

2018-06-23 05:20:19,099 - framestack - INFO: action counts: {0: 219, 1: 46, 2: 912, 3: 36, 4: 1170, 5: 108, 6: 153, 7: 68, 8: 1214, 9: 394}
2018-06-23 05:20:19,124 - framestack - INFO: steps: 4151520, steps/s: 93.84
  epoch reward:       2.0826/6.8310/16.4828 [n=10] (min/mean/max)
  running reward:     0.4673/8.2189/26.4134 [n=500]

2018-06-23 05:21:04,453 - framestack - INFO: action counts: {0: 225, 1: 62, 2: 912, 3: 45, 4: 1157, 5: 112, 6: 158, 7: 62, 8: 1220, 9: 367}
2018-06-23 05:21:04,474 - framestack - INFO: steps: 4155840, steps/s: 93.84
  epoch reward:       2.1210/6.5140/13.3801 [n=10] (min/mean/max)
  running reward:     0.4673/8.1666/26.4134 [n=5

2018-06-23 05:38:35,208 - framestack - INFO: action counts: {0: 284, 1: 76, 2: 838, 3: 54, 4: 1074, 5: 121, 6: 184, 7: 82, 8: 1178, 9: 429}
2018-06-23 05:38:35,231 - framestack - INFO: steps: 4255200, steps/s: 93.88
  epoch reward:       3.8667/10.8237/27.9323 [n=10] (min/mean/max)
  running reward:     0.1048/9.1613/27.9323 [n=500]

2018-06-23 05:39:21,446 - framestack - INFO: action counts: {0: 226, 1: 46, 2: 825, 3: 47, 4: 1144, 5: 116, 6: 173, 7: 74, 8: 1232, 9: 437}
2018-06-23 05:39:21,472 - framestack - INFO: steps: 4259520, steps/s: 93.88
  epoch reward:       3.8667/15.1256/48.6586 [n=10] (min/mean/max)
  running reward:     0.1048/9.2344/48.6586 [n=500]

2018-06-23 05:40:06,781 - framestack - INFO: action counts: {0: 243, 1: 80, 2: 866, 3: 44, 4: 1075, 5: 106, 6: 183, 7: 73, 8: 1228, 9: 422}
2018-06-23 05:40:06,806 - framestack - INFO: steps: 4263840, steps/s: 93.88
  epoch reward:       3.8667/15.1256/48.6586 [n=10] (min/mean/max)
  running reward:     0.1048/9.3076/48.6586 [

2018-06-23 05:57:48,136 - framestack - INFO: action counts: {0: 208, 1: 66, 2: 847, 3: 49, 4: 1115, 5: 133, 6: 195, 7: 89, 8: 1222, 9: 396}
2018-06-23 05:57:48,157 - framestack - INFO: steps: 4363200, steps/s: 93.87
  epoch reward:       2.1264/10.4592/27.9301 [n=10] (min/mean/max)
  running reward:     0.1048/10.1973/48.6586 [n=500]

2018-06-23 05:58:34,308 - framestack - INFO: action counts: {0: 189, 1: 70, 2: 827, 3: 50, 4: 1159, 5: 119, 6: 192, 7: 89, 8: 1204, 9: 421}
2018-06-23 05:58:34,334 - framestack - INFO: steps: 4367520, steps/s: 93.87
  epoch reward:       2.1264/10.8357/27.9301 [n=10] (min/mean/max)
  running reward:     0.1048/10.2774/48.6586 [n=500]

2018-06-23 05:59:19,746 - framestack - INFO: action counts: {0: 222, 1: 71, 2: 822, 3: 38, 4: 1189, 5: 108, 6: 181, 7: 65, 8: 1232, 9: 392}
2018-06-23 05:59:19,773 - framestack - INFO: steps: 4371840, steps/s: 93.87
  epoch reward:       2.1264/10.8357/27.9301 [n=10] (min/mean/max)
  running reward:     0.1048/10.3638/48.658

2018-06-23 06:16:14,930 - framestack - INFO: steps: 4466880, steps/s: 93.86
  epoch reward:       0.7183/6.3069/13.3295 [n=10] (min/mean/max)
  running reward:     0.4673/9.7986/48.6586 [n=500]

2018-06-23 06:17:00,506 - framestack - INFO: action counts: {0: 275, 1: 63, 2: 883, 3: 56, 4: 1047, 5: 118, 6: 182, 7: 75, 8: 1243, 9: 378}
2018-06-23 06:17:00,532 - framestack - INFO: steps: 4471200, steps/s: 93.86
  epoch reward:       3.0923/6.5755/13.3295 [n=10] (min/mean/max)
  running reward:     0.4673/9.7136/48.6586 [n=500]

2018-06-23 06:17:46,668 - framestack - INFO: action counts: {0: 248, 1: 66, 2: 864, 3: 72, 4: 1048, 5: 137, 6: 210, 7: 84, 8: 1213, 9: 378}
2018-06-23 06:17:46,692 - framestack - INFO: steps: 4475520, steps/s: 93.86
  epoch reward:       0.7183/6.2928/13.3295 [n=10] (min/mean/max)
  running reward:     0.4673/9.5370/48.6586 [n=500]

2018-06-23 06:18:31,454 - framestack - INFO: action counts: {0: 262, 1: 80, 2: 832, 3: 56, 4: 1045, 5: 116, 6: 204, 7: 100, 8: 1212, 9:

2018-06-23 06:35:23,438 - framestack - INFO: action counts: {0: 177, 1: 46, 2: 892, 3: 36, 4: 1100, 5: 103, 6: 150, 7: 74, 8: 1350, 9: 392}
2018-06-23 06:35:23,463 - framestack - INFO: steps: 4574880, steps/s: 93.85
  epoch reward:       3.7920/7.1657/11.0493 [n=10] (min/mean/max)
  running reward:     0.4673/8.4325/47.3181 [n=500]

2018-06-23 06:36:10,165 - framestack - INFO: action counts: {0: 213, 1: 40, 2: 911, 3: 21, 4: 1057, 5: 91, 6: 144, 7: 65, 8: 1349, 9: 429}
2018-06-23 06:36:10,189 - framestack - INFO: steps: 4579200, steps/s: 93.85
  epoch reward:       3.7687/6.9703/11.0493 [n=10] (min/mean/max)
  running reward:     0.4673/8.3628/47.3181 [n=500]

2018-06-23 06:36:55,383 - framestack - INFO: action counts: {0: 208, 1: 32, 2: 865, 3: 41, 4: 1177, 5: 84, 6: 130, 7: 54, 8: 1301, 9: 428}
2018-06-23 06:36:55,405 - framestack - INFO: steps: 4583520, steps/s: 93.85
  epoch reward:       3.7687/8.2125/15.1198 [n=10] (min/mean/max)
  running reward:     0.4673/8.3103/47.3181 [n=500

2018-06-23 06:54:35,779 - framestack - INFO: action counts: {0: 178, 1: 47, 2: 897, 3: 9, 4: 1168, 5: 97, 6: 107, 7: 65, 8: 1379, 9: 373}
2018-06-23 06:54:35,810 - framestack - INFO: steps: 4682880, steps/s: 93.84
  epoch reward:       2.9164/10.7811/17.0392 [n=10] (min/mean/max)
  running reward:     0.7183/10.5438/47.3181 [n=500]

2018-06-23 06:55:21,796 - framestack - INFO: action counts: {0: 213, 1: 33, 2: 902, 3: 28, 4: 1124, 5: 74, 6: 133, 7: 59, 8: 1357, 9: 397}
2018-06-23 06:55:21,827 - framestack - INFO: steps: 4687200, steps/s: 93.84
  epoch reward:       2.9164/10.4952/17.0392 [n=10] (min/mean/max)
  running reward:     0.7183/10.6222/47.3181 [n=500]

2018-06-23 06:56:06,781 - framestack - INFO: action counts: {0: 187, 1: 39, 2: 856, 3: 16, 4: 1161, 5: 87, 6: 131, 7: 59, 8: 1448, 9: 336}
2018-06-23 06:56:06,806 - framestack - INFO: steps: 4691520, steps/s: 93.85
  epoch reward:       2.9164/10.7811/17.0392 [n=10] (min/mean/max)
  running reward:     0.7183/10.7119/47.3181 [n

2018-06-23 07:13:02,053 - framestack - INFO: steps: 4786560, steps/s: 93.86
  epoch reward:       1.2658/7.7379/13.8998 [n=10] (min/mean/max)
  running reward:     1.2658/12.7642/48.6778 [n=500]

2018-06-23 07:13:49,437 - framestack - INFO: action counts: {0: 164, 1: 43, 2: 767, 3: 27, 4: 1277, 5: 72, 6: 105, 7: 45, 8: 1496, 9: 324}
2018-06-23 07:13:49,463 - framestack - INFO: steps: 4790880, steps/s: 93.86
  epoch reward:       2.7226/8.1668/13.8998 [n=10] (min/mean/max)
  running reward:     1.2658/12.7842/48.6778 [n=500]

2018-06-23 07:14:35,514 - framestack - INFO: action counts: {0: 177, 1: 31, 2: 763, 3: 17, 4: 1255, 5: 76, 6: 104, 7: 39, 8: 1510, 9: 348}
2018-06-23 07:14:35,539 - framestack - INFO: steps: 4795200, steps/s: 93.86
  epoch reward:       1.4130/7.6641/13.8998 [n=10] (min/mean/max)
  running reward:     1.2658/12.7981/48.6778 [n=500]

2018-06-23 07:15:20,252 - framestack - INFO: action counts: {0: 169, 1: 29, 2: 796, 3: 25, 4: 1234, 5: 71, 6: 101, 7: 40, 8: 1524, 9: 

2018-06-23 07:32:03,118 - framestack - INFO: action counts: {0: 175, 1: 28, 2: 831, 3: 18, 4: 1231, 5: 76, 6: 109, 7: 41, 8: 1428, 9: 383}
2018-06-23 07:32:03,144 - framestack - INFO: steps: 4894560, steps/s: 93.89
  epoch reward:       0.4673/8.7938/19.6062 [n=10] (min/mean/max)
  running reward:     0.4673/11.9698/48.6778 [n=500]

2018-06-23 07:32:50,602 - framestack - INFO: action counts: {0: 165, 1: 29, 2: 828, 3: 18, 4: 1240, 5: 80, 6: 115, 7: 51, 8: 1440, 9: 354}
2018-06-23 07:32:50,627 - framestack - INFO: steps: 4898880, steps/s: 93.89
  epoch reward:       0.4673/11.5153/27.9336 [n=10] (min/mean/max)
  running reward:     0.4673/11.9844/48.6778 [n=500]

2018-06-23 07:33:34,882 - framestack - INFO: action counts: {0: 156, 1: 36, 2: 811, 3: 24, 4: 1270, 5: 77, 6: 115, 7: 33, 8: 1477, 9: 321}
2018-06-23 07:33:34,904 - framestack - INFO: steps: 4903200, steps/s: 93.89
  epoch reward:       0.4673/11.5153/27.9336 [n=10] (min/mean/max)
  running reward:     0.4673/12.0048/48.6778 [n

2018-06-23 07:51:11,052 - framestack - INFO: action counts: {0: 207, 1: 35, 2: 989, 3: 26, 4: 1172, 5: 70, 6: 100, 7: 41, 8: 1346, 9: 334}
2018-06-23 07:51:11,076 - framestack - INFO: steps: 5002560, steps/s: 93.89
  epoch reward:       0.1954/10.8946/26.4118 [n=10] (min/mean/max)
  running reward:     0.1954/11.3419/48.9253 [n=500]

2018-06-23 07:51:57,241 - framestack - INFO: action counts: {0: 216, 1: 40, 2: 903, 3: 21, 4: 1212, 5: 94, 6: 107, 7: 56, 8: 1326, 9: 345}
2018-06-23 07:51:57,266 - framestack - INFO: steps: 5006880, steps/s: 93.89
  epoch reward:       0.1954/10.8109/26.4118 [n=10] (min/mean/max)
  running reward:     0.1954/11.3948/48.9253 [n=500]

2018-06-23 07:52:42,427 - framestack - INFO: action counts: {0: 213, 1: 18, 2: 962, 3: 17, 4: 1137, 5: 68, 6: 97, 7: 58, 8: 1403, 9: 347}
2018-06-23 07:52:42,453 - framestack - INFO: steps: 5011200, steps/s: 93.89
  epoch reward:       0.1954/8.9773/26.4118 [n=10] (min/mean/max)
  running reward:     0.1954/11.4211/48.9253 [n=

2018-06-23 08:10:31,095 - framestack - INFO: action counts: {0: 225, 1: 30, 2: 1061, 3: 20, 4: 1087, 5: 66, 6: 110, 7: 51, 8: 1327, 9: 343}
2018-06-23 08:10:31,121 - framestack - INFO: steps: 5110560, steps/s: 93.86
  epoch reward:       6.5114/15.9400/26.2387 [n=10] (min/mean/max)
  running reward:     0.1954/10.9005/48.9253 [n=500]

2018-06-23 08:11:17,565 - framestack - INFO: action counts: {0: 190, 1: 28, 2: 1037, 3: 16, 4: 1075, 5: 61, 6: 91, 7: 56, 8: 1417, 9: 349}
2018-06-23 08:11:17,590 - framestack - INFO: steps: 5114880, steps/s: 93.86
  epoch reward:       6.5114/15.9400/26.2387 [n=10] (min/mean/max)
  running reward:     0.1954/10.9890/48.9253 [n=500]

2018-06-23 08:12:03,101 - framestack - INFO: action counts: {0: 217, 1: 29, 2: 980, 3: 18, 4: 1141, 5: 74, 6: 93, 7: 46, 8: 1456, 9: 266}
2018-06-23 08:12:03,129 - framestack - INFO: steps: 5119200, steps/s: 93.86
  epoch reward:       6.5114/14.6011/26.2387 [n=10] (min/mean/max)
  running reward:     0.1954/11.0507/48.9253 [

2018-06-23 08:29:41,885 - framestack - INFO: action counts: {0: 192, 1: 34, 2: 1008, 3: 18, 4: 1207, 5: 69, 6: 86, 7: 40, 8: 1369, 9: 297}
2018-06-23 08:29:41,914 - framestack - INFO: steps: 5218560, steps/s: 93.85
  epoch reward:       3.3070/11.1067/18.3592 [n=10] (min/mean/max)
  running reward:     0.1020/10.0769/33.0766 [n=500]

2018-06-23 08:30:26,849 - framestack - INFO: action counts: {0: 215, 1: 37, 2: 980, 3: 29, 4: 1124, 5: 75, 6: 116, 7: 60, 8: 1364, 9: 320}
2018-06-23 08:30:26,871 - framestack - INFO: steps: 5222880, steps/s: 93.86
  epoch reward:       3.3070/11.1067/18.3592 [n=10] (min/mean/max)
  running reward:     0.1020/10.0828/33.0766 [n=500]

2018-06-23 08:31:12,746 - framestack - INFO: action counts: {0: 225, 1: 32, 2: 1008, 3: 32, 4: 1141, 5: 82, 6: 105, 7: 57, 8: 1311, 9: 327}
2018-06-23 08:31:12,773 - framestack - INFO: steps: 5227200, steps/s: 93.86
  epoch reward:       3.3070/11.1067/18.3592 [n=10] (min/mean/max)
  running reward:     0.1020/10.1254/33.0766 

2018-06-23 08:47:51,409 - framestack - INFO: steps: 5322240, steps/s: 93.89
  epoch reward:       2.1511/9.6703/24.7542 [n=10] (min/mean/max)
  running reward:     0.1020/10.3509/33.0766 [n=500]

2018-06-23 08:48:36,936 - framestack - INFO: action counts: {0: 263, 1: 71, 2: 959, 3: 66, 4: 966, 5: 139, 6: 183, 7: 102, 8: 1186, 9: 385}
2018-06-23 08:48:36,962 - framestack - INFO: steps: 5326560, steps/s: 93.88
  epoch reward:       2.1511/9.6703/24.7542 [n=10] (min/mean/max)
  running reward:     0.1020/10.2255/33.0766 [n=500]

2018-06-23 08:49:23,384 - framestack - INFO: action counts: {0: 301, 1: 69, 2: 875, 3: 54, 4: 964, 5: 140, 6: 163, 7: 109, 8: 1217, 9: 428}
2018-06-23 08:49:23,409 - framestack - INFO: steps: 5330880, steps/s: 93.87
  epoch reward:       2.1511/9.4631/24.7542 [n=10] (min/mean/max)
  running reward:     0.1020/10.0959/33.0766 [n=500]

2018-06-23 08:50:08,139 - framestack - INFO: action counts: {0: 294, 1: 70, 2: 955, 3: 45, 4: 949, 5: 117, 6: 175, 7: 101, 8: 1247, 

2018-06-23 09:06:41,474 - framestack - INFO: action counts: {0: 262, 1: 55, 2: 1029, 3: 31, 4: 974, 5: 111, 6: 156, 7: 64, 8: 1266, 9: 372}
2018-06-23 09:06:41,499 - framestack - INFO: steps: 5430240, steps/s: 93.93
  epoch reward:       1.3901/16.1809/48.6972 [n=10] (min/mean/max)
  running reward:     0.4645/10.9358/48.6972 [n=500]

2018-06-23 09:07:27,335 - framestack - INFO: action counts: {0: 300, 1: 65, 2: 935, 3: 43, 4: 995, 5: 91, 6: 145, 7: 95, 8: 1255, 9: 396}
2018-06-23 09:07:27,365 - framestack - INFO: steps: 5434560, steps/s: 93.94
  epoch reward:       1.3901/16.1809/48.6972 [n=10] (min/mean/max)
  running reward:     0.4645/11.0373/48.6972 [n=500]

2018-06-23 09:08:12,335 - framestack - INFO: action counts: {0: 250, 1: 72, 2: 967, 3: 44, 4: 1037, 5: 118, 6: 151, 7: 85, 8: 1210, 9: 386}
2018-06-23 09:08:12,362 - framestack - INFO: steps: 5438880, steps/s: 93.95
  epoch reward:       1.3901/14.7669/48.6972 [n=10] (min/mean/max)
  running reward:     0.4645/11.1105/48.6972 

2018-06-23 09:25:03,492 - framestack - INFO: steps: 5533920, steps/s: 93.95
  epoch reward:       1.9893/9.3548/14.8004 [n=10] (min/mean/max)
  running reward:     0.4645/11.9642/48.6972 [n=500]

2018-06-23 09:25:51,773 - framestack - INFO: action counts: {0: 223, 1: 48, 2: 993, 3: 31, 4: 1029, 5: 80, 6: 124, 7: 79, 8: 1374, 9: 339}
2018-06-23 09:25:51,800 - framestack - INFO: steps: 5538240, steps/s: 93.94
  epoch reward:       1.9893/7.2721/14.0978 [n=10] (min/mean/max)
  running reward:     0.4645/11.9163/48.6972 [n=500]

2018-06-23 09:26:36,598 - framestack - INFO: action counts: {0: 239, 1: 45, 2: 995, 3: 23, 4: 1017, 5: 86, 6: 125, 7: 57, 8: 1382, 9: 351}
2018-06-23 09:26:36,625 - framestack - INFO: steps: 5542560, steps/s: 93.94
  epoch reward:       1.9893/9.7799/29.4183 [n=10] (min/mean/max)
  running reward:     0.4645/11.9185/48.6972 [n=500]

2018-06-23 09:27:22,926 - framestack - INFO: action counts: {0: 242, 1: 45, 2: 1004, 3: 31, 4: 1032, 5: 71, 6: 115, 7: 70, 8: 1373, 9:

2018-06-23 09:44:11,292 - framestack - INFO: action counts: {0: 284, 1: 50, 2: 976, 3: 45, 4: 956, 5: 118, 6: 183, 7: 95, 8: 1275, 9: 338}
2018-06-23 09:44:11,316 - framestack - INFO: steps: 5641920, steps/s: 93.99
  epoch reward:       3.1904/11.8686/28.4084 [n=10] (min/mean/max)
  running reward:     1.2854/12.2900/48.6972 [n=500]

2018-06-23 09:44:56,600 - framestack - INFO: action counts: {0: 266, 1: 62, 2: 973, 3: 48, 4: 954, 5: 114, 6: 167, 7: 91, 8: 1280, 9: 365}
2018-06-23 09:44:56,625 - framestack - INFO: steps: 5646240, steps/s: 93.98
  epoch reward:       3.3058/10.3592/28.4084 [n=10] (min/mean/max)
  running reward:     1.2854/12.1736/48.6972 [n=500]

2018-06-23 09:45:42,734 - framestack - INFO: action counts: {0: 269, 1: 50, 2: 1070, 3: 39, 4: 967, 5: 96, 6: 163, 7: 87, 8: 1219, 9: 360}
2018-06-23 09:45:42,762 - framestack - INFO: steps: 5650560, steps/s: 93.98
  epoch reward:       2.8985/10.5985/28.4084 [n=10] (min/mean/max)
  running reward:     1.2854/12.0620/48.6972 [

2018-06-23 10:02:31,551 - framestack - INFO: steps: 5745600, steps/s: 94.03
  epoch reward:       0.7183/9.2786/27.9323 [n=10] (min/mean/max)
  running reward:     0.7183/10.9200/30.8483 [n=500]

2018-06-23 10:03:18,864 - framestack - INFO: action counts: {0: 301, 1: 60, 2: 984, 3: 55, 4: 994, 5: 109, 6: 170, 7: 85, 8: 1187, 9: 375}
2018-06-23 10:03:18,890 - framestack - INFO: steps: 5749920, steps/s: 94.02
  epoch reward:       0.7183/8.4358/27.9323 [n=10] (min/mean/max)
  running reward:     0.7183/10.9016/30.8483 [n=500]

2018-06-23 10:04:04,910 - framestack - INFO: action counts: {0: 336, 1: 67, 2: 1004, 3: 71, 4: 916, 5: 118, 6: 190, 7: 117, 8: 1118, 9: 383}
2018-06-23 10:04:04,936 - framestack - INFO: steps: 5754240, steps/s: 94.02
  epoch reward:       0.7183/8.8152/27.9323 [n=10] (min/mean/max)
  running reward:     0.7183/10.9325/30.8483 [n=500]

2018-06-23 10:04:51,770 - framestack - INFO: action counts: {0: 306, 1: 74, 2: 953, 3: 62, 4: 938, 5: 114, 6: 186, 7: 109, 8: 1177, 

2018-06-23 10:21:20,180 - framestack - INFO: action counts: {0: 316, 1: 57, 2: 1013, 3: 42, 4: 916, 5: 108, 6: 184, 7: 96, 8: 1191, 9: 397}
2018-06-23 10:21:20,216 - framestack - INFO: steps: 5853600, steps/s: 94.12
  epoch reward:       0.1048/6.8298/13.2158 [n=10] (min/mean/max)
  running reward:     0.1048/9.8293/28.4084 [n=500]

2018-06-23 10:22:04,982 - framestack - INFO: action counts: {0: 282, 1: 61, 2: 1014, 3: 51, 4: 995, 5: 97, 6: 153, 7: 103, 8: 1193, 9: 371}
2018-06-23 10:22:05,010 - framestack - INFO: steps: 5857920, steps/s: 94.12
  epoch reward:       0.1048/8.8811/28.4084 [n=10] (min/mean/max)
  running reward:     0.1048/9.7695/28.4084 [n=500]

2018-06-23 10:22:49,844 - framestack - INFO: action counts: {0: 299, 1: 78, 2: 982, 3: 49, 4: 947, 5: 116, 6: 183, 7: 94, 8: 1181, 9: 391}
2018-06-23 10:22:49,870 - framestack - INFO: steps: 5862240, steps/s: 94.12
  epoch reward:       0.1048/11.3744/28.4084 [n=10] (min/mean/max)
  running reward:     0.1048/9.7898/28.4084 [n=5

2018-06-23 10:39:39,133 - framestack - INFO: steps: 5957280, steps/s: 94.11
  epoch reward:       2.1292/7.6350/21.6056 [n=10] (min/mean/max)
  running reward:     0.1048/9.9306/48.5208 [n=500]

2018-06-23 10:40:24,383 - framestack - INFO: action counts: {0: 324, 1: 71, 2: 976, 3: 46, 4: 886, 5: 116, 6: 168, 7: 98, 8: 1235, 9: 400}
2018-06-23 10:40:24,410 - framestack - INFO: steps: 5961600, steps/s: 94.12
  epoch reward:       2.1292/8.7851/21.6056 [n=10] (min/mean/max)
  running reward:     0.1048/9.9208/48.5208 [n=500]

2018-06-23 10:41:11,314 - framestack - INFO: action counts: {0: 288, 1: 74, 2: 1013, 3: 45, 4: 874, 5: 110, 6: 181, 7: 87, 8: 1227, 9: 421}
2018-06-23 10:41:11,338 - framestack - INFO: steps: 5965920, steps/s: 94.11
  epoch reward:       1.8528/8.4814/21.6056 [n=10] (min/mean/max)
  running reward:     0.1048/9.9217/48.5208 [n=500]

2018-06-23 10:41:56,483 - framestack - INFO: action counts: {0: 296, 1: 66, 2: 1057, 3: 47, 4: 913, 5: 111, 6: 142, 7: 78, 8: 1261, 9: 3

2018-06-23 10:58:58,879 - framestack - INFO: action counts: {0: 268, 1: 45, 2: 1057, 3: 40, 4: 900, 5: 98, 6: 134, 7: 77, 8: 1309, 9: 392}
2018-06-23 10:58:58,906 - framestack - INFO: steps: 6065280, steps/s: 94.09
  epoch reward:       0.4673/14.7976/27.9122 [n=10] (min/mean/max)
  running reward:     0.1048/10.6093/48.5208 [n=500]

2018-06-23 10:59:44,822 - framestack - INFO: action counts: {0: 255, 1: 74, 2: 1082, 3: 28, 4: 844, 5: 104, 6: 110, 7: 85, 8: 1367, 9: 371}
2018-06-23 10:59:44,846 - framestack - INFO: steps: 6069600, steps/s: 94.09
  epoch reward:       0.4673/13.3564/27.9122 [n=10] (min/mean/max)
  running reward:     0.1048/10.7398/48.5208 [n=500]

2018-06-23 11:00:31,473 - framestack - INFO: action counts: {0: 274, 1: 49, 2: 1035, 3: 33, 4: 878, 5: 103, 6: 142, 7: 83, 8: 1340, 9: 383}
2018-06-23 11:00:31,495 - framestack - INFO: steps: 6073920, steps/s: 94.09
  epoch reward:       0.4673/13.9084/27.9122 [n=10] (min/mean/max)
  running reward:     0.1048/10.8403/48.5208

2018-06-23 11:17:36,943 - framestack - INFO: steps: 6168960, steps/s: 94.12
  epoch reward:       1.9350/15.5883/48.6678 [n=10] (min/mean/max)
  running reward:     0.4673/12.3386/48.6678 [n=500]

2018-06-23 11:18:22,360 - framestack - INFO: action counts: {0: 271, 1: 42, 2: 1091, 3: 31, 4: 897, 5: 93, 6: 123, 7: 63, 8: 1359, 9: 350}
2018-06-23 11:18:22,387 - framestack - INFO: steps: 6173280, steps/s: 94.12
  epoch reward:       7.8225/17.0228/48.6678 [n=10] (min/mean/max)
  running reward:     0.4673/12.5264/48.6678 [n=500]

2018-06-23 11:19:08,789 - framestack - INFO: action counts: {0: 253, 1: 33, 2: 1133, 3: 36, 4: 835, 5: 95, 6: 119, 7: 64, 8: 1422, 9: 330}
2018-06-23 11:19:08,817 - framestack - INFO: steps: 6177600, steps/s: 94.12
  epoch reward:       1.4130/15.2540/48.6678 [n=10] (min/mean/max)
  running reward:     0.4673/12.6558/48.6678 [n=500]

2018-06-23 11:19:54,163 - framestack - INFO: action counts: {0: 234, 1: 46, 2: 1087, 3: 17, 4: 893, 5: 90, 6: 115, 7: 69, 8: 1427, 

2018-06-23 11:36:44,574 - framestack - INFO: action counts: {0: 239, 1: 42, 2: 1065, 3: 21, 4: 918, 5: 82, 6: 116, 7: 76, 8: 1473, 9: 288}
2018-06-23 11:36:44,598 - framestack - INFO: steps: 6276960, steps/s: 94.12
  epoch reward:       2.0912/8.4340/25.6350 [n=10] (min/mean/max)
  running reward:     0.4673/11.8734/49.1214 [n=500]

2018-06-23 11:37:31,420 - framestack - INFO: action counts: {0: 236, 1: 40, 2: 1066, 3: 26, 4: 861, 5: 70, 6: 118, 7: 63, 8: 1505, 9: 335}
2018-06-23 11:37:31,445 - framestack - INFO: steps: 6281280, steps/s: 94.12
  epoch reward:       2.0912/9.2172/25.6350 [n=10] (min/mean/max)
  running reward:     0.4673/11.7618/49.1214 [n=500]

2018-06-23 11:38:16,037 - framestack - INFO: action counts: {0: 219, 1: 33, 2: 1042, 3: 37, 4: 841, 5: 89, 6: 120, 7: 83, 8: 1523, 9: 333}
2018-06-23 11:38:16,062 - framestack - INFO: steps: 6285600, steps/s: 94.13
  epoch reward:       2.0912/7.4729/13.8736 [n=10] (min/mean/max)
  running reward:     0.4673/11.6442/49.1214 [n=5

2018-06-23 11:55:15,548 - framestack - INFO: steps: 6380640, steps/s: 94.08
  epoch reward:       2.1593/11.3032/26.6269 [n=10] (min/mean/max)
  running reward:     0.4673/10.7216/49.1214 [n=500]

2018-06-23 11:56:02,320 - framestack - INFO: action counts: {0: 291, 1: 42, 2: 1018, 3: 46, 4: 841, 5: 122, 6: 129, 7: 75, 8: 1373, 9: 383}
2018-06-23 11:56:02,343 - framestack - INFO: steps: 6384960, steps/s: 94.08
  epoch reward:       2.1593/9.7085/26.6269 [n=10] (min/mean/max)
  running reward:     0.4673/10.6040/49.1214 [n=500]

2018-06-23 11:56:49,014 - framestack - INFO: action counts: {0: 259, 1: 52, 2: 1037, 3: 44, 4: 848, 5: 110, 6: 155, 7: 59, 8: 1416, 9: 340}
2018-06-23 11:56:49,040 - framestack - INFO: steps: 6389280, steps/s: 94.07
  epoch reward:       1.9854/9.5720/26.6269 [n=10] (min/mean/max)
  running reward:     0.4673/10.4550/49.1214 [n=500]

2018-06-23 11:57:36,337 - framestack - INFO: action counts: {0: 245, 1: 43, 2: 1067, 3: 41, 4: 885, 5: 114, 6: 134, 7: 77, 8: 1369,

2018-06-23 12:14:46,990 - framestack - INFO: action counts: {0: 207, 1: 28, 2: 1094, 3: 27, 4: 855, 5: 78, 6: 102, 7: 44, 8: 1599, 9: 286}
2018-06-23 12:14:47,015 - framestack - INFO: steps: 6488640, steps/s: 94.02
  epoch reward:       2.7224/12.4103/38.9428 [n=10] (min/mean/max)
  running reward:     0.4673/10.5446/47.9911 [n=500]

2018-06-23 12:15:36,980 - framestack - INFO: action counts: {0: 212, 1: 26, 2: 1063, 3: 22, 4: 919, 5: 62, 6: 84, 7: 56, 8: 1611, 9: 265}
2018-06-23 12:15:37,007 - framestack - INFO: steps: 6492960, steps/s: 94.00
  epoch reward:       2.7224/12.4465/38.9428 [n=10] (min/mean/max)
  running reward:     0.4673/10.6248/47.9911 [n=500]

2018-06-23 12:16:22,900 - framestack - INFO: action counts: {0: 206, 1: 32, 2: 1107, 3: 19, 4: 870, 5: 72, 6: 110, 7: 47, 8: 1577, 9: 280}
2018-06-23 12:16:22,927 - framestack - INFO: steps: 6497280, steps/s: 94.00
  epoch reward:       5.1114/15.1880/38.9428 [n=10] (min/mean/max)
  running reward:     0.4673/10.7442/47.9911 [n

2018-06-23 12:33:09,853 - framestack - INFO: steps: 6592320, steps/s: 94.04
  epoch reward:       3.2603/13.9663/27.9336 [n=10] (min/mean/max)
  running reward:     0.1048/10.9260/38.9428 [n=500]

2018-06-23 12:33:55,732 - framestack - INFO: action counts: {0: 216, 1: 25, 2: 1027, 3: 31, 4: 864, 5: 73, 6: 94, 7: 47, 8: 1648, 9: 295}
2018-06-23 12:33:55,762 - framestack - INFO: steps: 6596640, steps/s: 94.04
  epoch reward:       3.7451/14.0148/27.9336 [n=10] (min/mean/max)
  running reward:     0.1048/10.9802/38.9428 [n=500]

2018-06-23 12:34:41,641 - framestack - INFO: action counts: {0: 202, 1: 32, 2: 1000, 3: 27, 4: 865, 5: 79, 6: 108, 7: 52, 8: 1631, 9: 324}
2018-06-23 12:34:41,666 - framestack - INFO: steps: 6600960, steps/s: 94.03
  epoch reward:       3.7451/11.6850/27.9336 [n=10] (min/mean/max)
  running reward:     0.1048/11.0198/38.9428 [n=500]

2018-06-23 12:35:27,564 - framestack - INFO: action counts: {0: 235, 1: 36, 2: 964, 3: 34, 4: 859, 5: 89, 6: 117, 7: 73, 8: 1589, 9:

2018-06-23 12:52:11,818 - framestack - INFO: action counts: {0: 219, 1: 26, 2: 965, 3: 35, 4: 849, 5: 67, 6: 119, 7: 51, 8: 1668, 9: 321}
2018-06-23 12:52:11,847 - framestack - INFO: steps: 6700320, steps/s: 94.06
  epoch reward:       0.7183/11.9100/23.6364 [n=10] (min/mean/max)
  running reward:     0.1048/11.7592/49.1569 [n=500]

2018-06-23 12:52:56,439 - framestack - INFO: action counts: {0: 222, 1: 42, 2: 966, 3: 27, 4: 836, 5: 77, 6: 125, 7: 60, 8: 1607, 9: 358}
2018-06-23 12:52:56,464 - framestack - INFO: steps: 6704640, steps/s: 94.06
  epoch reward:       0.7183/10.0163/18.6868 [n=10] (min/mean/max)
  running reward:     0.1048/11.7113/49.1569 [n=500]

2018-06-23 12:53:40,789 - framestack - INFO: action counts: {0: 238, 1: 36, 2: 994, 3: 24, 4: 856, 5: 84, 6: 105, 7: 57, 8: 1600, 9: 326}
2018-06-23 12:53:40,812 - framestack - INFO: steps: 6708960, steps/s: 94.07
  epoch reward:       0.7183/12.2817/18.6868 [n=10] (min/mean/max)
  running reward:     0.1048/11.7080/49.1569 [n=5

2018-06-23 13:11:18,593 - framestack - INFO: action counts: {0: 242, 1: 31, 2: 978, 3: 36, 4: 892, 5: 82, 6: 146, 7: 55, 8: 1503, 9: 355}
2018-06-23 13:11:18,618 - framestack - INFO: steps: 6808320, steps/s: 94.08
  epoch reward:       2.9343/13.8244/47.3322 [n=10] (min/mean/max)
  running reward:     0.7183/11.0191/49.1569 [n=500]

2018-06-23 13:12:04,122 - framestack - INFO: action counts: {0: 281, 1: 49, 2: 942, 3: 27, 4: 920, 5: 107, 6: 113, 7: 73, 8: 1437, 9: 371}
2018-06-23 13:12:04,146 - framestack - INFO: steps: 6812640, steps/s: 94.08
  epoch reward:       2.9343/10.4640/19.0814 [n=10] (min/mean/max)
  running reward:     0.7183/10.9481/49.1569 [n=500]

2018-06-23 13:12:49,271 - framestack - INFO: action counts: {0: 253, 1: 52, 2: 971, 3: 29, 4: 940, 5: 100, 6: 127, 7: 63, 8: 1394, 9: 391}
2018-06-23 13:12:49,297 - framestack - INFO: steps: 6816960, steps/s: 94.08
  epoch reward:       2.7823/9.3503/19.0814 [n=10] (min/mean/max)
  running reward:     0.7183/10.9014/49.1569 [n=

2018-06-23 13:30:17,243 - framestack - INFO: action counts: {0: 232, 1: 38, 2: 1063, 3: 34, 4: 825, 5: 86, 6: 122, 7: 63, 8: 1473, 9: 384}
2018-06-23 13:30:17,268 - framestack - INFO: steps: 6916320, steps/s: 94.14
  epoch reward:       3.3070/14.2135/47.3114 [n=10] (min/mean/max)
  running reward:     0.7183/10.2566/47.3322 [n=500]

2018-06-23 13:31:02,388 - framestack - INFO: action counts: {0: 217, 1: 33, 2: 1122, 3: 27, 4: 847, 5: 90, 6: 97, 7: 45, 8: 1465, 9: 377}
2018-06-23 13:31:02,416 - framestack - INFO: steps: 6920640, steps/s: 94.14
  epoch reward:       3.3070/17.5661/47.3114 [n=10] (min/mean/max)
  running reward:     0.7183/10.4075/47.3322 [n=500]

2018-06-23 13:31:46,568 - framestack - INFO: action counts: {0: 244, 1: 30, 2: 1052, 3: 33, 4: 850, 5: 76, 6: 115, 7: 40, 8: 1469, 9: 411}
2018-06-23 13:31:46,592 - framestack - INFO: steps: 6924960, steps/s: 94.15
  epoch reward:       2.0252/14.0873/47.3114 [n=10] (min/mean/max)
  running reward:     0.7183/10.4437/47.3322 [n

2018-06-23 13:48:40,047 - framestack - INFO: steps: 7020000, steps/s: 94.12
  epoch reward:       0.1048/9.0272/15.0117 [n=10] (min/mean/max)
  running reward:     0.1048/10.5057/47.3322 [n=500]

2018-06-23 13:49:23,746 - framestack - INFO: action counts: {0: 219, 1: 51, 2: 1006, 3: 48, 4: 842, 5: 100, 6: 143, 7: 74, 8: 1430, 9: 407}
2018-06-23 13:49:23,771 - framestack - INFO: steps: 7024320, steps/s: 94.12
  epoch reward:       0.1048/9.0272/15.0117 [n=10] (min/mean/max)
  running reward:     0.1048/10.4097/47.3114 [n=500]

2018-06-23 13:50:07,858 - framestack - INFO: action counts: {0: 251, 1: 47, 2: 1063, 3: 24, 4: 837, 5: 100, 6: 147, 7: 69, 8: 1340, 9: 442}
2018-06-23 13:50:07,883 - framestack - INFO: steps: 7028640, steps/s: 94.12
  epoch reward:       0.1048/9.2655/15.0117 [n=10] (min/mean/max)
  running reward:     0.1048/10.3858/47.3114 [n=500]

2018-06-23 13:50:53,410 - framestack - INFO: action counts: {0: 265, 1: 45, 2: 959, 3: 57, 4: 812, 5: 111, 6: 141, 7: 79, 8: 1393, 9

2018-06-23 14:07:44,380 - framestack - INFO: action counts: {0: 274, 1: 69, 2: 981, 3: 40, 4: 920, 5: 133, 6: 155, 7: 79, 8: 1257, 9: 412}
2018-06-23 14:07:44,404 - framestack - INFO: steps: 7128000, steps/s: 94.12
  epoch reward:       2.9849/9.5316/25.9623 [n=10] (min/mean/max)
  running reward:     0.1048/11.1994/47.3114 [n=500]

2018-06-23 14:08:29,103 - framestack - INFO: action counts: {0: 265, 1: 48, 2: 985, 3: 50, 4: 865, 5: 130, 6: 158, 7: 73, 8: 1315, 9: 431}
2018-06-23 14:08:29,126 - framestack - INFO: steps: 7132320, steps/s: 94.13
  epoch reward:       2.9849/9.5316/25.9623 [n=10] (min/mean/max)
  running reward:     0.1048/11.1058/47.3114 [n=500]

2018-06-23 14:09:13,385 - framestack - INFO: action counts: {0: 252, 1: 49, 2: 1013, 3: 49, 4: 882, 5: 126, 6: 140, 7: 71, 8: 1308, 9: 430}
2018-06-23 14:09:13,425 - framestack - INFO: steps: 7136640, steps/s: 94.14
  epoch reward:       3.3365/13.1948/45.7050 [n=10] (min/mean/max)
  running reward:     0.1048/11.0184/47.3114 [n

2018-06-23 14:26:02,640 - framestack - INFO: steps: 7231680, steps/s: 94.17
  epoch reward:       0.9357/8.4203/20.2559 [n=10] (min/mean/max)
  running reward:     0.1048/12.2662/45.7050 [n=500]

2018-06-23 14:26:48,088 - framestack - INFO: action counts: {0: 287, 1: 54, 2: 954, 3: 42, 4: 909, 5: 133, 6: 155, 7: 75, 8: 1280, 9: 431}
2018-06-23 14:26:48,114 - framestack - INFO: steps: 7236000, steps/s: 94.17
  epoch reward:       0.9357/6.8757/20.2559 [n=10] (min/mean/max)
  running reward:     0.1048/12.2231/45.7050 [n=500]

2018-06-23 14:27:33,510 - framestack - INFO: action counts: {0: 257, 1: 74, 2: 928, 3: 64, 4: 852, 5: 156, 6: 158, 7: 87, 8: 1330, 9: 414}
2018-06-23 14:27:33,536 - framestack - INFO: steps: 7240320, steps/s: 94.18
  epoch reward:       0.9357/6.9713/20.2559 [n=10] (min/mean/max)
  running reward:     0.1048/12.1820/45.7050 [n=500]

2018-06-23 14:28:20,590 - framestack - INFO: action counts: {0: 269, 1: 58, 2: 966, 3: 52, 4: 852, 5: 127, 6: 154, 7: 94, 8: 1297, 9: 

2018-06-23 14:45:27,694 - framestack - INFO: action counts: {0: 300, 1: 66, 2: 932, 3: 68, 4: 873, 5: 153, 6: 162, 7: 87, 8: 1238, 9: 441}
2018-06-23 14:45:27,719 - framestack - INFO: steps: 7339680, steps/s: 94.16
  epoch reward:       1.1489/9.1935/26.2370 [n=10] (min/mean/max)
  running reward:     0.7183/11.8294/49.1031 [n=500]

2018-06-23 14:46:13,377 - framestack - INFO: action counts: {0: 289, 1: 72, 2: 952, 3: 52, 4: 893, 5: 139, 6: 185, 7: 84, 8: 1232, 9: 422}
2018-06-23 14:46:13,402 - framestack - INFO: steps: 7344000, steps/s: 94.16
  epoch reward:       1.1489/12.5570/48.2217 [n=10] (min/mean/max)
  running reward:     0.7183/11.8899/49.1031 [n=500]

2018-06-23 14:46:59,205 - framestack - INFO: action counts: {0: 322, 1: 70, 2: 949, 3: 51, 4: 898, 5: 115, 6: 165, 7: 83, 8: 1243, 9: 424}
2018-06-23 14:46:59,224 - framestack - INFO: steps: 7348320, steps/s: 94.16
  epoch reward:       1.1489/12.5570/48.2217 [n=10] (min/mean/max)
  running reward:     0.7183/11.9504/49.1031 [n

2018-06-23 15:04:45,333 - framestack - INFO: action counts: {0: 334, 1: 65, 2: 922, 3: 50, 4: 858, 5: 149, 6: 160, 7: 76, 8: 1226, 9: 480}
2018-06-23 15:04:45,355 - framestack - INFO: steps: 7447680, steps/s: 94.07
  epoch reward:       1.2822/11.0915/27.9336 [n=10] (min/mean/max)
  running reward:     0.7183/10.0843/49.1031 [n=500]

2018-06-23 15:05:30,277 - framestack - INFO: action counts: {0: 318, 1: 53, 2: 873, 3: 53, 4: 894, 5: 140, 6: 161, 7: 86, 8: 1294, 9: 448}
2018-06-23 15:05:30,300 - framestack - INFO: steps: 7452000, steps/s: 94.07
  epoch reward:       1.2822/10.3977/27.9336 [n=10] (min/mean/max)
  running reward:     0.7183/10.1547/49.1031 [n=500]

2018-06-23 15:06:15,905 - framestack - INFO: action counts: {0: 295, 1: 56, 2: 949, 3: 45, 4: 915, 5: 111, 6: 150, 7: 76, 8: 1271, 9: 452}
2018-06-23 15:06:15,932 - framestack - INFO: steps: 7456320, steps/s: 94.07
  epoch reward:       6.5114/12.9252/27.9336 [n=10] (min/mean/max)
  running reward:     0.7183/10.2738/49.1031 [

2018-06-23 15:23:06,577 - framestack - INFO: steps: 7551360, steps/s: 93.99
  epoch reward:       1.3999/10.8670/30.1345 [n=10] (min/mean/max)
  running reward:     0.7183/11.1366/48.2217 [n=500]

2018-06-23 15:23:51,400 - framestack - INFO: action counts: {0: 286, 1: 58, 2: 936, 3: 41, 4: 955, 5: 115, 6: 147, 7: 56, 8: 1328, 9: 398}
2018-06-23 15:23:51,425 - framestack - INFO: steps: 7555680, steps/s: 93.99
  epoch reward:       1.3999/11.5023/30.1345 [n=10] (min/mean/max)
  running reward:     0.7183/11.1828/48.2217 [n=500]

2018-06-23 15:24:34,846 - framestack - INFO: action counts: {0: 307, 1: 56, 2: 900, 3: 43, 4: 924, 5: 120, 6: 169, 7: 80, 8: 1311, 9: 410}
2018-06-23 15:24:34,870 - framestack - INFO: steps: 7560000, steps/s: 93.99
  epoch reward:       3.3070/11.7627/30.1345 [n=10] (min/mean/max)
  running reward:     0.7183/11.1669/48.2217 [n=500]

2018-06-23 15:25:21,782 - framestack - INFO: action counts: {0: 308, 1: 56, 2: 884, 3: 43, 4: 936, 5: 142, 6: 150, 7: 94, 8: 1305, 

2018-06-23 15:42:17,734 - framestack - INFO: action counts: {0: 242, 1: 41, 2: 989, 3: 44, 4: 995, 5: 97, 6: 113, 7: 60, 8: 1359, 9: 380}
2018-06-23 15:42:17,761 - framestack - INFO: steps: 7659360, steps/s: 93.96
  epoch reward:       1.3934/7.7370/27.9265 [n=10] (min/mean/max)
  running reward:     0.7183/11.9027/47.2714 [n=500]

2018-06-23 15:43:02,121 - framestack - INFO: action counts: {0: 248, 1: 29, 2: 992, 3: 39, 4: 971, 5: 102, 6: 113, 7: 53, 8: 1372, 9: 401}
2018-06-23 15:43:02,146 - framestack - INFO: steps: 7663680, steps/s: 93.96
  epoch reward:       1.3934/7.7370/27.9265 [n=10] (min/mean/max)
  running reward:     0.7183/11.8356/47.2714 [n=500]

2018-06-23 15:43:48,854 - framestack - INFO: action counts: {0: 276, 1: 49, 2: 947, 3: 38, 4: 998, 5: 85, 6: 135, 7: 68, 8: 1321, 9: 403}
2018-06-23 15:43:48,880 - framestack - INFO: steps: 7668000, steps/s: 93.96
  epoch reward:       1.6186/9.0730/27.9265 [n=10] (min/mean/max)
  running reward:     0.7183/11.8091/47.2714 [n=500

2018-06-23 16:01:24,161 - framestack - INFO: action counts: {0: 254, 1: 58, 2: 904, 3: 42, 4: 946, 5: 89, 6: 123, 7: 71, 8: 1438, 9: 395}
2018-06-23 16:01:24,192 - framestack - INFO: steps: 7767360, steps/s: 93.97
  epoch reward:       3.0624/8.8993/15.2427 [n=10] (min/mean/max)
  running reward:     0.4673/9.7741/30.8297 [n=500]

2018-06-23 16:02:09,245 - framestack - INFO: action counts: {0: 216, 1: 36, 2: 981, 3: 28, 4: 1019, 5: 89, 6: 129, 7: 55, 8: 1406, 9: 361}
2018-06-23 16:02:09,273 - framestack - INFO: steps: 7771680, steps/s: 93.98
  epoch reward:       3.0624/8.3974/15.2427 [n=10] (min/mean/max)
  running reward:     0.4673/9.7120/30.8297 [n=500]

2018-06-23 16:02:56,348 - framestack - INFO: action counts: {0: 226, 1: 50, 2: 920, 3: 33, 4: 1048, 5: 82, 6: 118, 7: 55, 8: 1417, 9: 371}
2018-06-23 16:02:56,377 - framestack - INFO: steps: 7776000, steps/s: 93.97
  epoch reward:       3.0624/11.1379/27.9336 [n=10] (min/mean/max)
  running reward:     0.4673/9.6995/30.8297 [n=500]

2018-06-23 16:19:40,211 - framestack - INFO: steps: 7871040, steps/s: 94.03
  epoch reward:       2.5548/10.3255/28.4084 [n=10] (min/mean/max)
  running reward:     0.1048/10.3434/47.0356 [n=500]



saving ./outputs/RNN_v3b_128im_512z_1512_v6k_VAE5_all_noframestack/PPO_512z_all_g.pkl
saving backup ./outputs/RNN_v3b_128im_512z_1512_v6k_VAE5_all_noframestack/PPO_512z_all_g-20180623_08-20-24.pkl


KeyboardInterrupt: 

    2018-06-20 09:40:21,906 - framestack - INFO: action counts: {0: 127, 1: 41, 2: 196, 3: 22, 4: 185, 5: 96, 6: 97, 7: 35, 8: 238, 9: 83}
    2018-06-20 09:40:21,928 - framestack - INFO: steps: 23520, steps/s: 47.68
      world model losses: 40.4108 rnn=26.8570, inv= 0.1192=0.0050 * 23.8446, vae=13.4345=0.1250 * (93.5157 + 0.0833 * 167.5278)
      epoch reward:       0.0000/1.2111/12.7297 [n=40] (min/mean/max)
      running reward:     0.0000/0.9334/12.7297 [n=500]

    2018-06-21 07:12:46,362 - framestack - INFO: action counts: {0: 82, 1: 20, 2: 252, 3: 19, 4: 217, 5: 61, 6: 129, 7: 48, 8: 224, 9: 68}
    2018-06-21 07:12:46,379 - framestack - INFO: steps: 3764320, steps/s: 48.60
      world model losses: 34.7660 rnn=24.8725, inv= 0.0004=0.0050 * 0.0753, vae=9.8931=0.1250 * (63.7831 + 0.0833 * 184.3440)
      epoch reward:       0.7183/10.4729/38.4701 [n=40] (min/mean/max)
      running reward:     0.7183/10.6002/38.4701 [n=500]


In [None]:
agent.save(ppo_save_file)
torch.save(config.intrinsic_reward_normalizer.state_dict(), ppo_save_file.replace('.pkl', '-intrinsic_reward_normalizer.pkl'))
torch.save(config.reward_normalizer.state_dict(), ppo_save_file.replace('.pkl', '-reward_normalizer.pkl'))

In [None]:
agent.save(ppo_save_file)
# torch.save(config.intrinsic_reward_normalizer.state_dict(), ppo_save_file.replace('.pkl', '-intrinsic_reward_normalizer.pkl'))
# torch.save(config.reward_normalizer.state_dict(), ppo_save_file.replace('.pkl', '-reward_normalizer.pkl'))

# Summarize model

In [None]:
agent.network.world_model.mdnrnn.training

In [None]:
from IPython.display import display

with torch.no_grad():
    img = np.random.randn(image_size, image_size, 3)
    action = np.array(np.random.randint(0,action_dim))[np.newaxis]
    action = Variable(torch.from_numpy(action)).float().cuda()[np.newaxis]
    gpu_img = Variable(torch.from_numpy(img[np.newaxis].transpose(0, 3, 1, 2))).float().cuda()
    if cuda:
        gpu_img = gpu_img.cuda()
    with TorchSummarizeDf(vae) as tdf:
        x, mu_vae, logvar_vae = vae.forward(gpu_img)
        z = vae.sample(mu_vae, logvar_vae)
        df_vae = tdf.make_df()

    display(df_vae[df_vae.level<2])
    
    with TorchSummarizeDf(mdnrnn) as tdf: 
        pi, mu, sigma, hidden_state = mdnrnn.forward(z.unsqueeze(1).repeat((1,2,1)))
        z_next = mdnrnn.sample(pi, mu, sigma)
        df_mdnrnn = tdf.make_df()
    
    display(df_mdnrnn)
    

    with TorchSummarizeDf(finv) as tdf:
        finv(z.repeat((1,2,1)), z_next)   
        df_finv = tdf.make_df()
    display(df_finv)

    with TorchSummarizeDf(world_model) as tdf:
        world_model(gpu_img, action)
        df_world_model = tdf.make_df()
    display(df_world_model[df_world_model.level<2])
    
    del img, action, gpu_img, x, mu, z, z_next, mu_vae, pi, sigma, logvar_vae