## 7章　ブロック崩しBreakoutの学習プログラム    

In [20]:
# パッケージのimport
import numpy as np
from collections import deque
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import gym
from gym import spaces
from gym.spaces.box import Box


In [21]:
# 実行環境の設定
# 参考：https://github.com/openai/baselines/blob/master/baselines/common/atari_wrappers.py

import cv2
cv2.ocl.setUseOpenCL(False)


class NoopResetEnv(gym.Wrapper):
    def __init__(self, env, noop_max=30):
        '''工夫1のNo-Operationです。リセット後適当なステップの間何もしないようにし、
        ゲーム開始の初期状態を様々にすることｆで、特定の開始状態のみで学習するのを防ぐ'''

        gym.Wrapper.__init__(self, env)
        self.noop_max = noop_max
        self.override_num_noops = None
        self.noop_action = 0
        assert env.unwrapped.get_action_meanings()[0] == 'NOOP'

    def reset(self, **kwargs):
        """ Do no-op action for a number of steps in [1, noop_max]."""
        self.env.reset(**kwargs)
        if self.override_num_noops is not None:
            noops = self.override_num_noops
        else:
            noops = self.unwrapped.np_random.randint(
                1, self.noop_max + 1)  # pylint: disable=E1101
        assert noops > 0
        obs = None
        for _ in range(noops):
            obs, _, done, _ = self.env.step(self.noop_action)
            if done:
                obs = self.env.reset(**kwargs)
        return obs

    def step(self, ac):
        return self.env.step(ac)


class EpisodicLifeEnv(gym.Wrapper):
    def __init__(self, env):
        '''工夫2のEpisodic Lifeです。1機失敗したときにリセットし、失敗時の状態から次を始める'''
        gym.Wrapper.__init__(self, env)
        self.lives = 0
        self.was_real_done = True

    def step(self, action):
        obs, reward, done, info = self.env.step(action)
        self.was_real_done = done
        # check current lives, make loss of life terminal,
        # then update lives to handle bonus lives
        lives = self.env.unwrapped.ale.lives()
        if lives < self.lives and lives > 0:
            # for Qbert sometimes we stay in lives == 0 condtion for a few frames
            # so its important to keep lives > 0, so that we only reset once
            # the environment advertises done.
            done = True
        self.lives = lives
        return obs, reward, done, info

    def reset(self, **kwargs):
        '''5機とも失敗したら、本当にリセット'''
        if self.was_real_done:
            obs = self.env.reset(**kwargs)
        else:
            # no-op step to advance from terminal/lost life state
            obs, _, _, _ = self.env.step(0)
        self.lives = self.env.unwrapped.ale.lives()
        return obs


class MaxAndSkipEnv(gym.Wrapper):
    def __init__(self, env, skip=4):
        '''工夫3のMax and Skipです。4フレーム連続で同じ行動を実施し、最後の3、4フレームの最大値をとった画像をobsにする'''
        gym.Wrapper.__init__(self, env)
        # most recent raw observations (for max pooling across time steps)
        self._obs_buffer = np.zeros(
            (2,)+env.observation_space.shape, dtype=np.uint8)
        self._skip = skip

    def step(self, action):
        """Repeat action, sum reward, and max over last observations."""
        total_reward = 0.0
        done = None
        for i in range(self._skip):
            obs, reward, done, info = self.env.step(action)
            if i == self._skip - 2:
                self._obs_buffer[0] = obs
            if i == self._skip - 1:
                self._obs_buffer[1] = obs
            total_reward += reward
            if done:
                break
        # Note that the observation on the done=True frame
        # doesn't matter
        max_frame = self._obs_buffer.max(axis=0)

        return max_frame, total_reward, done, info

    def reset(self, **kwargs):
        return self.env.reset(**kwargs)


class WarpFrame(gym.ObservationWrapper):
    def __init__(self, env):
        '''工夫4のWarp frameです。画像サイズをNatureのDQN論文と同じ84x84の白黒にします'''
        gym.ObservationWrapper.__init__(self, env)
        self.width = 84
        self.height = 84
        self.observation_space = spaces.Box(low=0, high=255,
                                            shape=(self.height, self.width, 1), dtype=np.uint8)

    def observation(self, frame):
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
        frame = cv2.resize(frame, (self.width, self.height),
                           interpolation=cv2.INTER_AREA)
        return frame[:, :, None]


class WrapPyTorch(gym.ObservationWrapper):
    def __init__(self, env=None):
        '''PyTorchのミニバッチのインデックス順に変更するラッパー'''
        super(WrapPyTorch, self).__init__(env)
        obs_shape = self.observation_space.shape
        self.observation_space = Box(
            self.observation_space.low[0, 0, 0],
            self.observation_space.high[0, 0, 0],
            [obs_shape[2], obs_shape[1], obs_shape[0]],
            dtype=self.observation_space.dtype)

    def observation(self, observation):
        return observation.transpose(2, 0, 1)


In [22]:
# 実行環境生成関数の定義

# 並列実行環境
from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv


def make_env(env_id, seed, rank):
    def _thunk():
        '''_thunk()がマルチプロセス環境のSubprocVecEnvを実行するのに必要'''

        env = gym.make(env_id)
        env = NoopResetEnv(env, noop_max=30)
        env = MaxAndSkipEnv(env, skip=4)
        env.seed(seed + rank)  # 乱数シードの設定
        env = EpisodicLifeEnv(env)
        env = WarpFrame(env)
        env = WrapPyTorch(env)

        return env

    return _thunk


In [23]:
# 定数の設定

ENV_NAME = 'BreakoutNoFrameskip-v4' 
# Breakout-v0ではなく、BreakoutNoFrameskip-v4を使用
# v0はフレームが自動的に2-4のランダムにskipされますが、今回はフレームスキップはさせないバージョンを使用
# 参考URL https://becominghuman.ai/lets-build-an-atari-ai-part-1-dqn-df57e8ff3b26
# https://github.com/openai/gym/blob/5cb12296274020db9bb6378ce54276b31e7002da/gym/envs/__init__.py#L371
    
NUM_SKIP_FRAME = 4 # skipするframe数です
NUM_STACK_FRAME = 4  # 状態として連続的に保持するframe数です
NOOP_MAX = 30  #  reset時に何もしないフレームを挟む（No-operation）フレーム数の乱数上限です
NUM_PROCESSES = 16 #  並列して同時実行するプロセス数です
NUM_ADVANCED_STEP = 5  # 何ステップ進めて報酬和を計算するのか設定
GAMMA = 0.99  # 時間割引率

TOTAL_FRAMES=10e6  #  学習に使用する総フレーム数
NUM_UPDATES = int(TOTAL_FRAMES / NUM_ADVANCED_STEP / NUM_PROCESSES)  # ネットワークの総更新回数
# NUM_UPDATESは125,000となる


In [24]:
# A2Cの損失関数の計算のための定数設定
value_loss_coef = 0.5
entropy_coef = 0.01
max_grad_norm = 0.5

# 学習手法RMSpropの設定
lr = 7e-4
eps = 1e-5
alpha = 0.99


In [25]:
# GPUの使用の設定
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)


cuda


In [26]:
# メモリオブジェクトの定義


class RolloutStorage(object):
    '''Advantage学習するためのメモリクラスです'''

    def __init__(self, num_steps, num_processes, obs_shape):

        self.observations = torch.zeros(
            num_steps + 1, num_processes, *obs_shape).to(device)
        # *を使うと()リストの中身を取り出す
        # obs_shape→(4,84,84)
        # *obs_shape→ 4 84 84

        self.masks = torch.ones(num_steps + 1, num_processes, 1).to(device)
        self.rewards = torch.zeros(num_steps, num_processes, 1).to(device)
        self.actions = torch.zeros(
            num_steps, num_processes, 1).long().to(device)

        # 割引報酬和を格納
        self.returns = torch.zeros(num_steps + 1, num_processes, 1).to(device)
        self.index = 0  # insertするインデックス

    def insert(self, current_obs, action, reward, mask):
        '''次のindexにtransitionを格納する'''
        self.observations[self.index + 1].copy_(current_obs)
        self.masks[self.index + 1].copy_(mask)
        self.rewards[self.index].copy_(reward)
        self.actions[self.index].copy_(action)

        self.index = (self.index + 1) % NUM_ADVANCED_STEP  # インデックスの更新

    def after_update(self):
        '''Advantageするstep数が完了したら、最新のものをindex0に格納'''
        self.observations[0].copy_(self.observations[-1])
        self.masks[0].copy_(self.masks[-1])

    def compute_returns(self, next_value):
        '''Advantageするステップ中の各ステップの割引報酬和を計算する'''

        # 注意：5step目から逆向きに計算しています
        # 注意：5step目はAdvantage1となる。4ステップ目はAdvantage2となる。・・・
        self.returns[-1] = next_value
        for ad_step in reversed(range(self.rewards.size(0))):
            self.returns[ad_step] = self.returns[ad_step + 1] * \
                GAMMA * self.masks[ad_step + 1] + self.rewards[ad_step]


In [27]:
# A2Cのディープ・ニューラルネットワークの構築


def init(module, gain):
    '''層の結合パラメータを初期化する関数を定義'''
    nn.init.orthogonal_(module.weight.data, gain=gain)
    nn.init.constant_(module.bias.data, 0)
    return module


class Flatten(nn.Module):
    '''コンボリューション層の出力画像を1次元に変換する層を定義'''

    def forward(self, x):
        return x.view(x.size(0), -1)


class Net(nn.Module):
    def __init__(self, n_out):
        super(Net, self).__init__()

        # 結合パラメータの初期化関数
        def init_(module): return init(
            module, gain=nn.init.calculate_gain('relu'))

        # コンボリューション層の定義
        self.conv = nn.Sequential(
            # 画像サイズの変化84*84→20*20
            init_(nn.Conv2d(NUM_STACK_FRAME, 32, kernel_size=8, stride=4)),
            # stackするflameは4画像なのでinput=NUM_STACK_FRAME=4である、出力は32とする、
            # sizeの計算  size = (Input_size - Kernel_size + 2*Padding_size)/ Stride_size + 1

            nn.ReLU(),
            # 画像サイズの変化20*20→9*9
            init_(nn.Conv2d(32, 64, kernel_size=4, stride=2)),
            nn.ReLU(),
            init_(nn.Conv2d(64, 64, kernel_size=3, stride=1)),  # 画像サイズの変化9*9→7*7
            nn.ReLU(),
            Flatten(),  # 画像形式を1次元に変換
            init_(nn.Linear(64 * 7 * 7, 512)),  # 64枚の7×7の画像を、512次元のoutputへ
            nn.ReLU()
        )

        # 結合パラメータの初期化関数
        def init_(module): return init(module, gain=1.0)

        # Criticの定義
        self.critic = init_(nn.Linear(512, 1))  # 状態価値なので出力は1つ

        # 結合パラメータの初期化関数
        def init_(module): return init(module, gain=0.01)

        # Actorの定義
        self.actor = init_(nn.Linear(512, n_out))  # 行動を決めるので出力は行動の種類数

        # ネットワークを訓練モードに設定
        self.train()

    def forward(self, x):
        '''ネットワークのフォワード計算を定義します'''
        input = x / 255.0  # 画像のピクセル値0-255を0-1に正規化する
        conv_output = self.conv(input)  # Convolution層の計算
        critic_output = self.critic(conv_output)  # 状態価値の計算
        actor_output = self.actor(conv_output)  # 行動の計算

        return critic_output, actor_output

    def act(self, x):
        '''状態xから行動を確率的に求めます'''
        value, actor_output = self(x)
        probs = F.softmax(actor_output, dim=1)    # dim=1で行動の種類方向に計算
        action = probs.multinomial(num_samples=1)

        return action

    def get_value(self, x):
        '''状態xから状態価値を求めます'''
        value, actor_output = self(x)

        return value

    def evaluate_actions(self, x, actions):
        '''状態xから状態価値、実際の行動actionsのlog確率とエントロピーを求めます'''
        value, actor_output = self(x)

        log_probs = F.log_softmax(actor_output, dim=1)  # dim=1で行動の種類方向に計算
        action_log_probs = log_probs.gather(1, actions)  # 実際の行動のlog_probsを求める

        probs = F.softmax(actor_output, dim=1)  # dim=1で行動の種類方向に計算
        dist_entropy = -(log_probs * probs).sum(-1).mean()

        return value, action_log_probs, dist_entropy


In [28]:
# エージェントが持つ頭脳となるクラスを定義、全エージェントで共有する


class Brain(object):
    def __init__(self, actor_critic):

        self.actor_critic = actor_critic  # actor_criticはクラスNetのディープ・ニューラルネットワーク

        # 結合パラメータをロードする場合
        #filename = 'weight.pth'
        #param = torch.load(filename, map_location='cpu')
        # self.actor_critic.load_state_dict(param)

        # パラメータ更新の勾配法の設定
        self.optimizer = optim.RMSprop(
            actor_critic.parameters(), lr=lr, eps=eps, alpha=alpha)

    def update(self, rollouts):
        '''advanced計算した5つのstepの全てを使って更新します'''
        obs_shape = rollouts.observations.size()[2:]  # torch.Size([4, 84, 84])
        num_steps = NUM_ADVANCED_STEP
        num_processes = NUM_PROCESSES

        values, action_log_probs, dist_entropy = self.actor_critic.evaluate_actions(
            rollouts.observations[:-1].view(-1, *obs_shape),
            rollouts.actions.view(-1, 1))

        # 注意：各変数のサイズ
        # rollouts.observations[:-1].view(-1, *obs_shape) torch.Size([80, 4, 84, 84])
        # rollouts.actions.view(-1, 1) torch.Size([80, 1])
        # values torch.Size([80, 1])
        # action_log_probs torch.Size([80, 1])
        # dist_entropy torch.Size([])

        values = values.view(num_steps, num_processes,
                             1)  # torch.Size([5, 16, 1])
        action_log_probs = action_log_probs.view(num_steps, num_processes, 1)

        advantages = rollouts.returns[:-1] - values  # torch.Size([5, 16, 1])
        value_loss = advantages.pow(2).mean()

        action_gain = (advantages.detach() * action_log_probs).mean()
        # detachしてadvantagesを定数として扱う

        total_loss = (value_loss * value_loss_coef -
                      action_gain - dist_entropy * entropy_coef)

        self.optimizer.zero_grad()  # 勾配をリセット
        total_loss.backward()  # バックプロパゲーションを計算
        nn.utils.clip_grad_norm_(self.actor_critic.parameters(), max_grad_norm)
        #  一気に結合パラメータが変化しすぎないように、勾配の大きさは最大0.5までにする

        self.optimizer.step()  # 結合パラメータを更新


In [29]:
# Breakoutを実行する環境のクラス


class Environment:
    def run(self):

        # seedの設定
        seed_num = 1
        torch.manual_seed(seed_num)
        if use_cuda:
            torch.cuda.manual_seed(seed_num)

        # 実行環境を構築
        torch.set_num_threads(seed_num)
        envs = [make_env(ENV_NAME, seed_num, i) for i in range(NUM_PROCESSES)]
        envs = SubprocVecEnv(envs)  # マルチプロセスの実行環境にする

        # 全エージェントが共有して持つ頭脳Brainを生成
        n_out = envs.action_space.n  # 行動の種類は4
        actor_critic = Net(n_out).to(device)  # GPUへ
        global_brain = Brain(actor_critic)

        # 格納用変数の生成
        obs_shape = envs.observation_space.shape  # (1, 84, 84)
        obs_shape = (obs_shape[0] * NUM_STACK_FRAME,
                     *obs_shape[1:])  # (4, 84, 84)
        # torch.Size([16, 4, 84, 84])
        current_obs = torch.zeros(NUM_PROCESSES, *obs_shape).to(device)
        rollouts = RolloutStorage(
            NUM_ADVANCED_STEP, NUM_PROCESSES, obs_shape)  # rolloutsのオブジェクト
        episode_rewards = torch.zeros([NUM_PROCESSES, 1])  # 現在の試行の報酬を保持
        final_rewards = torch.zeros([NUM_PROCESSES, 1])  # 最後の試行の報酬和を保持

        # 初期状態の開始
        obs = envs.reset()
        obs = torch.from_numpy(obs).float()  # torch.Size([16, 1, 84, 84])
        current_obs[:, -1:] = obs  # flameの4番目に最新のobsを格納

        # advanced学習用のオブジェクトrolloutsの状態の1つ目に、現在の状態を保存
        rollouts.observations[0].copy_(current_obs)

        # 実行ループ
        for j in tqdm(range(NUM_UPDATES)):
            # advanced学習するstep数ごとに計算
            for step in range(NUM_ADVANCED_STEP):

                # 行動を求める
                with torch.no_grad():
                    action = actor_critic.act(rollouts.observations[step])

                cpu_actions = action.squeeze(1).cpu().numpy()  # tensorをNumPyに

                # 1stepの並列実行、なお返り値のobsのsizeは(16, 1, 84, 84)
                obs, reward, done, info = envs.step(cpu_actions)

                # 報酬をtensorに変換し、試行の総報酬に足す
                # sizeが(16,)になっているのを(16, 1)に変換
                reward = np.expand_dims(np.stack(reward), 1)
                reward = torch.from_numpy(reward).float()
                episode_rewards += reward

                # 各実行環境それぞれについて、doneならmaskは0に、継続中ならmaskは1にする
                masks = torch.FloatTensor(
                    [[0.0] if done_ else [1.0] for done_ in done])

                # 最後の試行の総報酬を更新する
                final_rewards *= masks  # 継続中の場合は1をかけ算してそのまま、done時には0を掛けてリセット
                # 継続中は0を足す、done時にはepisode_rewardsを足す
                final_rewards += (1 - masks) * episode_rewards

                # 試行の総報酬を更新する
                episode_rewards *= masks  # 継続中のmaskは1なのでそのまま、doneの場合は0に

                # masksをGPUへ
                masks = masks.to(device)

                # 現在の状態をdone時には全部0にする
                # maskのサイズをtorch.Size([16, 1])→torch.Size([16, 1, 1 ,1])へ変換して、かけ算
                current_obs *= masks.unsqueeze(2).unsqueeze(2)

                # frameをstackする
                # torch.Size([16, 1, 84, 84])
                obs = torch.from_numpy(obs).float()
                current_obs[:, :-1] = current_obs[:, 1:]  # 0～2番目に1～3番目を上書き
                current_obs[:, -1:] = obs  # 4番目に最新のobsを格納

                # メモリオブジェクトに今stepのtransitionを挿入
                rollouts.insert(current_obs, action.data, reward, masks)

            # advancedのfor loop終了

            # advancedした最終stepの状態から予想する状態価値を計算
            with torch.no_grad():
                next_value = actor_critic.get_value(
                    rollouts.observations[-1]).detach()

            # 全stepの割引報酬和を計算して、rolloutsの変数returnsを更新
            rollouts.compute_returns(next_value)

            # ネットワークとrolloutの更新
            global_brain.update(rollouts)
            rollouts.after_update()

            # ログ：途中経過の出力
            if j % 100 == 0:
                print("finished frames {}, mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}".
                      format(j*NUM_PROCESSES*NUM_ADVANCED_STEP,
                             final_rewards.mean(),
                             final_rewards.median(),
                             final_rewards.min(),
                             final_rewards.max()))

            # 結合パラメータの保存
            if j % 12500 == 0:
                torch.save(global_brain.actor_critic.state_dict(),
                           'weight_'+str(j)+'.pth')
        
        # 実行ループの終了
        torch.save(global_brain.actor_critic.state_dict(), 'weight_end.pth')
        

In [30]:
# 実行
breakout_env = Environment()
breakout_env.run()


  0%|          | 0/125000 [00:00<?, ?it/s]

finished frames 0, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


  0%|          | 100/125000 [00:06<1:32:30, 22.50it/s]

finished frames 8000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


  0%|          | 199/125000 [00:11<1:37:54, 21.24it/s]

finished frames 16000, mean/median reward 0.1/0.0, min/max reward 0.0/2.0


  0%|          | 298/125000 [00:15<1:31:24, 22.74it/s]

finished frames 24000, mean/median reward 0.1/0.0, min/max reward 0.0/2.0


  0%|          | 400/125000 [00:20<1:36:29, 21.52it/s]

finished frames 32000, mean/median reward 0.6/0.0, min/max reward 0.0/3.0


  0%|          | 499/125000 [00:24<1:31:04, 22.78it/s]

finished frames 40000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


  0%|          | 598/125000 [00:29<1:33:43, 22.12it/s]

finished frames 48000, mean/median reward 0.5/0.0, min/max reward 0.0/3.0


  1%|          | 700/125000 [00:33<1:34:15, 21.98it/s]

finished frames 56000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


  1%|          | 799/125000 [00:38<1:35:16, 21.72it/s]

finished frames 64000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


  1%|          | 898/125000 [00:42<1:38:02, 21.10it/s]

finished frames 72000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


  1%|          | 1000/125000 [00:47<1:30:53, 22.74it/s]

finished frames 80000, mean/median reward 0.8/0.0, min/max reward 0.0/4.0


  1%|          | 1099/125000 [00:51<1:34:59, 21.74it/s]

finished frames 88000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


  1%|          | 1198/125000 [00:56<1:29:50, 22.97it/s]

finished frames 96000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


  1%|          | 1300/125000 [01:00<1:31:33, 22.52it/s]

finished frames 104000, mean/median reward 0.6/0.0, min/max reward 0.0/4.0


  1%|          | 1399/125000 [01:05<1:32:20, 22.31it/s]

finished frames 112000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


  1%|          | 1498/125000 [01:09<1:31:25, 22.52it/s]

finished frames 120000, mean/median reward 0.6/1.0, min/max reward 0.0/1.0


  1%|▏         | 1600/125000 [01:13<1:30:58, 22.61it/s]

finished frames 128000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


  1%|▏         | 1699/125000 [01:18<1:32:35, 22.19it/s]

finished frames 136000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


  1%|▏         | 1798/125000 [01:22<1:30:58, 22.57it/s]

finished frames 144000, mean/median reward 0.4/0.0, min/max reward 0.0/4.0


  2%|▏         | 1900/125000 [01:27<1:29:40, 22.88it/s]

finished frames 152000, mean/median reward 1.1/0.0, min/max reward 0.0/5.0


  2%|▏         | 1999/125000 [01:31<1:31:34, 22.39it/s]

finished frames 160000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


  2%|▏         | 2098/125000 [01:35<1:31:31, 22.38it/s]

finished frames 168000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


  2%|▏         | 2200/125000 [01:40<1:31:21, 22.40it/s]

finished frames 176000, mean/median reward 0.8/0.0, min/max reward 0.0/3.0


  2%|▏         | 2299/125000 [01:44<1:27:21, 23.41it/s]

finished frames 184000, mean/median reward 0.7/1.0, min/max reward 0.0/2.0


  2%|▏         | 2398/125000 [01:49<1:28:06, 23.19it/s]

finished frames 192000, mean/median reward 0.3/0.0, min/max reward 0.0/3.0


  2%|▏         | 2500/125000 [01:53<1:32:15, 22.13it/s]

finished frames 200000, mean/median reward 0.5/0.0, min/max reward 0.0/3.0


  2%|▏         | 2599/125000 [01:57<1:27:38, 23.28it/s]

finished frames 208000, mean/median reward 0.8/0.0, min/max reward 0.0/5.0


  2%|▏         | 2698/125000 [02:02<1:26:17, 23.62it/s]

finished frames 216000, mean/median reward 0.7/0.0, min/max reward 0.0/3.0


  2%|▏         | 2800/125000 [02:06<1:31:55, 22.16it/s]

finished frames 224000, mean/median reward 1.1/1.0, min/max reward 0.0/4.0


  2%|▏         | 2899/125000 [02:10<1:26:26, 23.54it/s]

finished frames 232000, mean/median reward 1.1/1.0, min/max reward 0.0/4.0


  2%|▏         | 2998/125000 [02:15<1:26:48, 23.42it/s]

finished frames 240000, mean/median reward 1.1/1.0, min/max reward 0.0/5.0


  2%|▏         | 3100/125000 [02:19<1:29:50, 22.61it/s]

finished frames 248000, mean/median reward 1.3/1.0, min/max reward 0.0/4.0


  3%|▎         | 3199/125000 [02:23<1:26:28, 23.47it/s]

finished frames 256000, mean/median reward 0.9/1.0, min/max reward 0.0/3.0


  3%|▎         | 3298/125000 [02:28<1:25:48, 23.64it/s]

finished frames 264000, mean/median reward 1.3/1.0, min/max reward 0.0/4.0


  3%|▎         | 3400/125000 [02:32<1:27:02, 23.28it/s]

finished frames 272000, mean/median reward 2.1/2.0, min/max reward 0.0/4.0


  3%|▎         | 3499/125000 [02:36<1:24:52, 23.86it/s]

finished frames 280000, mean/median reward 1.8/1.0, min/max reward 0.0/6.0


  3%|▎         | 3598/125000 [02:40<1:25:27, 23.68it/s]

finished frames 288000, mean/median reward 1.4/1.0, min/max reward 0.0/4.0


  3%|▎         | 3700/125000 [02:45<1:28:05, 22.95it/s]

finished frames 296000, mean/median reward 1.8/1.0, min/max reward 0.0/8.0


  3%|▎         | 3799/125000 [02:49<1:25:20, 23.67it/s]

finished frames 304000, mean/median reward 1.4/1.0, min/max reward 0.0/4.0


  3%|▎         | 3898/125000 [02:53<1:26:08, 23.43it/s]

finished frames 312000, mean/median reward 1.7/1.0, min/max reward 0.0/5.0


  3%|▎         | 4000/125000 [02:58<1:24:34, 23.84it/s]

finished frames 320000, mean/median reward 1.8/1.0, min/max reward 0.0/5.0


  3%|▎         | 4099/125000 [03:02<1:26:19, 23.34it/s]

finished frames 328000, mean/median reward 2.0/1.0, min/max reward 0.0/9.0


  3%|▎         | 4198/125000 [03:06<1:26:36, 23.25it/s]

finished frames 336000, mean/median reward 1.6/1.0, min/max reward 0.0/4.0


  3%|▎         | 4300/125000 [03:10<1:27:36, 22.96it/s]

finished frames 344000, mean/median reward 2.8/2.0, min/max reward 1.0/7.0


  4%|▎         | 4399/125000 [03:15<1:23:33, 24.06it/s]

finished frames 352000, mean/median reward 2.6/2.0, min/max reward 0.0/6.0


  4%|▎         | 4498/125000 [03:19<1:23:07, 24.16it/s]

finished frames 360000, mean/median reward 3.3/4.0, min/max reward 0.0/6.0


  4%|▎         | 4600/125000 [03:23<1:27:13, 23.01it/s]

finished frames 368000, mean/median reward 2.1/2.0, min/max reward 1.0/4.0


  4%|▍         | 4699/125000 [03:27<1:25:34, 23.43it/s]

finished frames 376000, mean/median reward 3.1/2.0, min/max reward 1.0/7.0


  4%|▍         | 4798/125000 [03:32<1:24:34, 23.69it/s]

finished frames 384000, mean/median reward 2.8/2.0, min/max reward 0.0/6.0


  4%|▍         | 4900/125000 [03:36<1:24:21, 23.73it/s]

finished frames 392000, mean/median reward 3.0/2.0, min/max reward 0.0/8.0


  4%|▍         | 4999/125000 [03:40<1:23:24, 23.98it/s]

finished frames 400000, mean/median reward 2.1/1.0, min/max reward 0.0/7.0


  4%|▍         | 5098/125000 [03:44<1:23:52, 23.83it/s]

finished frames 408000, mean/median reward 3.2/1.0, min/max reward 0.0/11.0


  4%|▍         | 5200/125000 [03:48<1:23:05, 24.03it/s]

finished frames 416000, mean/median reward 3.2/3.0, min/max reward 0.0/9.0


  4%|▍         | 5299/125000 [03:53<1:23:37, 23.86it/s]

finished frames 424000, mean/median reward 3.3/3.0, min/max reward 1.0/9.0


  4%|▍         | 5398/125000 [03:57<1:22:24, 24.19it/s]

finished frames 432000, mean/median reward 4.4/4.0, min/max reward 2.0/11.0


  4%|▍         | 5500/125000 [04:01<1:23:34, 23.83it/s]

finished frames 440000, mean/median reward 2.5/2.0, min/max reward 1.0/5.0


  4%|▍         | 5599/125000 [04:05<1:22:41, 24.07it/s]

finished frames 448000, mean/median reward 3.2/2.0, min/max reward 0.0/7.0


  5%|▍         | 5698/125000 [04:09<1:24:16, 23.59it/s]

finished frames 456000, mean/median reward 4.0/4.0, min/max reward 1.0/11.0


  5%|▍         | 5800/125000 [04:14<1:24:50, 23.42it/s]

finished frames 464000, mean/median reward 3.8/2.0, min/max reward 0.0/12.0


  5%|▍         | 5899/125000 [04:18<1:24:12, 23.57it/s]

finished frames 472000, mean/median reward 3.9/3.0, min/max reward 0.0/11.0


  5%|▍         | 5998/125000 [04:22<1:22:50, 23.94it/s]

finished frames 480000, mean/median reward 4.2/4.0, min/max reward 1.0/9.0


  5%|▍         | 6100/125000 [04:26<1:22:58, 23.88it/s]

finished frames 488000, mean/median reward 3.2/3.0, min/max reward 0.0/6.0


  5%|▍         | 6199/125000 [04:31<1:22:18, 24.05it/s]

finished frames 496000, mean/median reward 4.8/3.0, min/max reward 1.0/19.0


  5%|▌         | 6298/125000 [04:35<1:21:22, 24.31it/s]

finished frames 504000, mean/median reward 3.8/3.0, min/max reward 1.0/9.0


  5%|▌         | 6400/125000 [04:39<1:26:08, 22.95it/s]

finished frames 512000, mean/median reward 6.3/4.0, min/max reward 1.0/22.0


  5%|▌         | 6499/125000 [04:43<1:25:04, 23.22it/s]

finished frames 520000, mean/median reward 3.2/3.0, min/max reward 0.0/9.0


  5%|▌         | 6598/125000 [04:47<1:22:10, 24.01it/s]

finished frames 528000, mean/median reward 4.4/3.0, min/max reward 0.0/15.0


  5%|▌         | 6700/125000 [04:52<1:22:29, 23.90it/s]

finished frames 536000, mean/median reward 6.6/6.0, min/max reward 0.0/18.0


  5%|▌         | 6799/125000 [04:56<1:22:16, 23.94it/s]

finished frames 544000, mean/median reward 4.2/3.0, min/max reward 1.0/11.0


  6%|▌         | 6898/125000 [05:00<1:24:01, 23.43it/s]

finished frames 552000, mean/median reward 4.8/4.0, min/max reward 0.0/19.0


  6%|▌         | 7000/125000 [05:04<1:21:48, 24.04it/s]

finished frames 560000, mean/median reward 5.3/4.0, min/max reward 0.0/22.0


  6%|▌         | 7099/125000 [05:08<1:21:28, 24.12it/s]

finished frames 568000, mean/median reward 4.5/5.0, min/max reward 0.0/8.0


  6%|▌         | 7198/125000 [05:13<1:22:46, 23.72it/s]

finished frames 576000, mean/median reward 6.2/6.0, min/max reward 1.0/16.0


  6%|▌         | 7300/125000 [05:17<1:21:24, 24.10it/s]

finished frames 584000, mean/median reward 4.1/3.0, min/max reward 0.0/16.0


  6%|▌         | 7399/125000 [05:21<1:21:44, 23.98it/s]

finished frames 592000, mean/median reward 5.4/4.0, min/max reward 1.0/15.0


  6%|▌         | 7498/125000 [05:25<1:20:56, 24.20it/s]

finished frames 600000, mean/median reward 5.5/4.0, min/max reward 1.0/19.0


  6%|▌         | 7600/125000 [05:29<1:23:03, 23.56it/s]

finished frames 608000, mean/median reward 7.1/5.0, min/max reward 2.0/17.0


  6%|▌         | 7699/125000 [05:34<1:21:19, 24.04it/s]

finished frames 616000, mean/median reward 4.4/4.0, min/max reward 0.0/12.0


  6%|▌         | 7798/125000 [05:38<1:23:21, 23.43it/s]

finished frames 624000, mean/median reward 5.2/5.0, min/max reward 1.0/12.0


  6%|▋         | 7900/125000 [05:42<1:23:53, 23.26it/s]

finished frames 632000, mean/median reward 5.3/4.0, min/max reward 1.0/20.0


  6%|▋         | 7999/125000 [05:46<1:20:18, 24.28it/s]

finished frames 640000, mean/median reward 5.1/4.0, min/max reward 0.0/13.0


  6%|▋         | 8098/125000 [05:50<1:20:03, 24.34it/s]

finished frames 648000, mean/median reward 6.8/5.0, min/max reward 1.0/15.0


  7%|▋         | 8200/125000 [05:55<1:20:18, 24.24it/s]

finished frames 656000, mean/median reward 5.3/4.0, min/max reward 0.0/13.0


  7%|▋         | 8299/125000 [05:59<1:20:06, 24.28it/s]

finished frames 664000, mean/median reward 7.0/5.0, min/max reward 1.0/18.0


  7%|▋         | 8398/125000 [06:03<1:24:07, 23.10it/s]

finished frames 672000, mean/median reward 5.7/5.0, min/max reward 0.0/16.0


  7%|▋         | 8500/125000 [06:07<1:20:17, 24.18it/s]

finished frames 680000, mean/median reward 5.8/5.0, min/max reward 0.0/13.0


  7%|▋         | 8599/125000 [06:11<1:21:30, 23.80it/s]

finished frames 688000, mean/median reward 6.6/4.0, min/max reward 0.0/29.0


  7%|▋         | 8698/125000 [06:16<1:22:29, 23.50it/s]

finished frames 696000, mean/median reward 6.1/4.0, min/max reward 2.0/21.0


  7%|▋         | 8800/125000 [06:20<1:22:07, 23.58it/s]

finished frames 704000, mean/median reward 7.5/5.0, min/max reward 1.0/27.0


  7%|▋         | 8899/125000 [06:24<1:20:41, 23.98it/s]

finished frames 712000, mean/median reward 6.7/6.0, min/max reward 1.0/17.0


  7%|▋         | 8998/125000 [06:28<1:22:39, 23.39it/s]

finished frames 720000, mean/median reward 9.8/5.0, min/max reward 1.0/29.0


  7%|▋         | 9100/125000 [06:32<1:20:47, 23.91it/s]

finished frames 728000, mean/median reward 7.1/6.0, min/max reward 1.0/18.0


  7%|▋         | 9199/125000 [06:37<1:19:53, 24.16it/s]

finished frames 736000, mean/median reward 5.7/5.0, min/max reward 0.0/14.0


  7%|▋         | 9298/125000 [06:41<1:19:28, 24.27it/s]

finished frames 744000, mean/median reward 6.6/6.0, min/max reward 1.0/16.0


  8%|▊         | 9400/125000 [06:45<1:19:49, 24.13it/s]

finished frames 752000, mean/median reward 5.6/5.0, min/max reward 1.0/14.0


  8%|▊         | 9499/125000 [06:49<1:20:12, 24.00it/s]

finished frames 760000, mean/median reward 7.9/7.0, min/max reward 2.0/15.0


  8%|▊         | 9598/125000 [06:53<1:19:41, 24.13it/s]

finished frames 768000, mean/median reward 5.8/5.0, min/max reward 2.0/14.0


  8%|▊         | 9700/125000 [06:58<1:20:51, 23.76it/s]

finished frames 776000, mean/median reward 8.8/6.0, min/max reward 1.0/35.0


  8%|▊         | 9799/125000 [07:02<1:19:35, 24.12it/s]

finished frames 784000, mean/median reward 5.5/5.0, min/max reward 1.0/11.0


  8%|▊         | 9898/125000 [07:06<1:19:19, 24.18it/s]

finished frames 792000, mean/median reward 7.1/5.0, min/max reward 3.0/15.0


  8%|▊         | 10000/125000 [07:10<1:19:40, 24.06it/s]

finished frames 800000, mean/median reward 5.2/5.0, min/max reward 0.0/15.0


  8%|▊         | 10099/125000 [07:14<1:22:14, 23.28it/s]

finished frames 808000, mean/median reward 5.3/4.0, min/max reward 1.0/11.0


  8%|▊         | 10198/125000 [07:18<1:19:43, 24.00it/s]

finished frames 816000, mean/median reward 5.8/4.0, min/max reward 1.0/22.0


  8%|▊         | 10300/125000 [07:23<1:20:43, 23.68it/s]

finished frames 824000, mean/median reward 6.7/6.0, min/max reward 1.0/15.0


  8%|▊         | 10399/125000 [07:27<1:20:39, 23.68it/s]

finished frames 832000, mean/median reward 8.6/6.0, min/max reward 0.0/25.0


  8%|▊         | 10498/125000 [07:31<1:23:05, 22.97it/s]

finished frames 840000, mean/median reward 9.6/7.0, min/max reward 1.0/20.0


  8%|▊         | 10600/125000 [07:35<1:21:03, 23.52it/s]

finished frames 848000, mean/median reward 8.2/6.0, min/max reward 1.0/21.0


  9%|▊         | 10699/125000 [07:40<1:20:24, 23.69it/s]

finished frames 856000, mean/median reward 6.9/6.0, min/max reward 1.0/24.0


  9%|▊         | 10798/125000 [07:44<1:19:54, 23.82it/s]

finished frames 864000, mean/median reward 8.2/8.0, min/max reward 1.0/16.0


  9%|▊         | 10900/125000 [07:48<1:21:18, 23.39it/s]

finished frames 872000, mean/median reward 6.6/5.0, min/max reward 3.0/19.0


  9%|▉         | 10999/125000 [07:52<1:19:35, 23.87it/s]

finished frames 880000, mean/median reward 8.2/8.0, min/max reward 1.0/27.0


  9%|▉         | 11098/125000 [07:56<1:18:36, 24.15it/s]

finished frames 888000, mean/median reward 4.3/4.0, min/max reward 0.0/11.0


  9%|▉         | 11200/125000 [08:01<1:18:46, 24.08it/s]

finished frames 896000, mean/median reward 6.3/4.0, min/max reward 1.0/21.0


  9%|▉         | 11299/125000 [08:05<1:17:53, 24.33it/s]

finished frames 904000, mean/median reward 7.4/6.0, min/max reward 0.0/20.0


  9%|▉         | 11398/125000 [08:09<1:18:29, 24.12it/s]

finished frames 912000, mean/median reward 12.5/10.0, min/max reward 2.0/26.0


  9%|▉         | 11500/125000 [08:13<1:18:51, 23.99it/s]

finished frames 920000, mean/median reward 8.9/8.0, min/max reward 1.0/18.0


  9%|▉         | 11599/125000 [08:17<1:18:21, 24.12it/s]

finished frames 928000, mean/median reward 10.6/7.0, min/max reward 4.0/31.0


  9%|▉         | 11698/125000 [08:21<1:18:36, 24.02it/s]

finished frames 936000, mean/median reward 12.0/8.0, min/max reward 1.0/31.0


  9%|▉         | 11800/125000 [08:26<1:18:23, 24.07it/s]

finished frames 944000, mean/median reward 9.7/8.0, min/max reward 0.0/32.0


 10%|▉         | 11899/125000 [08:30<1:18:15, 24.09it/s]

finished frames 952000, mean/median reward 7.2/4.0, min/max reward 1.0/21.0


 10%|▉         | 11998/125000 [08:34<1:18:40, 23.94it/s]

finished frames 960000, mean/median reward 20.4/4.0, min/max reward 1.0/249.0


 10%|▉         | 12100/125000 [08:38<1:18:47, 23.88it/s]

finished frames 968000, mean/median reward 8.1/7.0, min/max reward 2.0/14.0


 10%|▉         | 12199/125000 [08:42<1:18:14, 24.03it/s]

finished frames 976000, mean/median reward 10.1/7.0, min/max reward 1.0/29.0


 10%|▉         | 12298/125000 [08:47<1:16:54, 24.43it/s]

finished frames 984000, mean/median reward 24.8/8.0, min/max reward 0.0/232.0


 10%|▉         | 12400/125000 [08:51<1:18:01, 24.05it/s]

finished frames 992000, mean/median reward 8.3/7.0, min/max reward 1.0/19.0


 10%|▉         | 12499/125000 [08:55<1:17:43, 24.12it/s]

finished frames 1000000, mean/median reward 8.4/5.0, min/max reward 1.0/29.0


 10%|█         | 12598/125000 [08:59<1:19:59, 23.42it/s]

finished frames 1008000, mean/median reward 9.6/8.0, min/max reward 2.0/20.0


 10%|█         | 12700/125000 [09:04<1:18:03, 23.98it/s]

finished frames 1016000, mean/median reward 6.9/5.0, min/max reward 0.0/18.0


 10%|█         | 12799/125000 [09:08<1:17:12, 24.22it/s]

finished frames 1024000, mean/median reward 14.1/8.0, min/max reward 2.0/38.0


 10%|█         | 12898/125000 [09:12<1:20:06, 23.32it/s]

finished frames 1032000, mean/median reward 7.6/4.0, min/max reward 1.0/18.0


 10%|█         | 13000/125000 [09:16<1:17:37, 24.05it/s]

finished frames 1040000, mean/median reward 4.8/4.0, min/max reward 0.0/14.0


 10%|█         | 13099/125000 [09:20<1:18:10, 23.86it/s]

finished frames 1048000, mean/median reward 13.4/11.0, min/max reward 1.0/36.0


 11%|█         | 13198/125000 [09:24<1:16:43, 24.29it/s]

finished frames 1056000, mean/median reward 14.1/11.0, min/max reward 0.0/51.0


 11%|█         | 13300/125000 [09:29<1:18:13, 23.80it/s]

finished frames 1064000, mean/median reward 9.9/7.0, min/max reward 3.0/27.0


 11%|█         | 13399/125000 [09:33<1:17:11, 24.10it/s]

finished frames 1072000, mean/median reward 10.4/7.0, min/max reward 1.0/38.0


 11%|█         | 13498/125000 [09:37<1:18:38, 23.63it/s]

finished frames 1080000, mean/median reward 13.8/9.0, min/max reward 3.0/33.0


 11%|█         | 13600/125000 [09:41<1:17:41, 23.90it/s]

finished frames 1088000, mean/median reward 9.4/8.0, min/max reward 2.0/24.0


 11%|█         | 13699/125000 [09:45<1:17:47, 23.85it/s]

finished frames 1096000, mean/median reward 9.2/9.0, min/max reward 0.0/21.0


 11%|█         | 13798/125000 [09:50<1:19:59, 23.17it/s]

finished frames 1104000, mean/median reward 10.6/9.0, min/max reward 0.0/26.0


 11%|█         | 13900/125000 [09:54<1:24:36, 21.88it/s]

finished frames 1112000, mean/median reward 8.8/5.0, min/max reward 0.0/24.0


 11%|█         | 13999/125000 [09:58<1:20:09, 23.08it/s]

finished frames 1120000, mean/median reward 17.0/3.0, min/max reward 0.0/187.0


 11%|█▏        | 14098/125000 [10:03<1:22:03, 22.53it/s]

finished frames 1128000, mean/median reward 6.0/6.0, min/max reward 1.0/14.0


 11%|█▏        | 14200/125000 [10:07<1:15:32, 24.45it/s]

finished frames 1136000, mean/median reward 10.1/5.0, min/max reward 1.0/39.0


 11%|█▏        | 14299/125000 [10:11<1:18:37, 23.46it/s]

finished frames 1144000, mean/median reward 7.3/5.0, min/max reward 1.0/32.0


 12%|█▏        | 14398/125000 [10:15<1:17:33, 23.77it/s]

finished frames 1152000, mean/median reward 9.9/7.0, min/max reward 1.0/25.0


 12%|█▏        | 14500/125000 [10:20<1:24:16, 21.85it/s]

finished frames 1160000, mean/median reward 11.9/8.0, min/max reward 0.0/44.0


 12%|█▏        | 14599/125000 [10:24<1:17:57, 23.60it/s]

finished frames 1168000, mean/median reward 8.7/7.0, min/max reward 1.0/34.0


 12%|█▏        | 14698/125000 [10:28<1:19:58, 22.99it/s]

finished frames 1176000, mean/median reward 9.0/5.0, min/max reward 0.0/31.0


 12%|█▏        | 14800/125000 [10:32<1:16:45, 23.93it/s]

finished frames 1184000, mean/median reward 10.6/7.0, min/max reward 1.0/31.0


 12%|█▏        | 14899/125000 [10:37<1:21:10, 22.61it/s]

finished frames 1192000, mean/median reward 12.9/12.0, min/max reward 0.0/27.0


 12%|█▏        | 14998/125000 [10:41<1:16:06, 24.09it/s]

finished frames 1200000, mean/median reward 19.2/9.0, min/max reward 2.0/54.0


 12%|█▏        | 15100/125000 [10:45<1:17:16, 23.70it/s]

finished frames 1208000, mean/median reward 11.3/5.0, min/max reward 1.0/42.0


 12%|█▏        | 15199/125000 [10:49<1:16:02, 24.07it/s]

finished frames 1216000, mean/median reward 7.4/6.0, min/max reward 0.0/17.0


 12%|█▏        | 15298/125000 [10:54<1:19:23, 23.03it/s]

finished frames 1224000, mean/median reward 9.6/7.0, min/max reward 1.0/37.0


 12%|█▏        | 15400/125000 [10:58<1:18:08, 23.37it/s]

finished frames 1232000, mean/median reward 6.1/5.0, min/max reward 0.0/20.0


 12%|█▏        | 15499/125000 [11:02<1:16:52, 23.74it/s]

finished frames 1240000, mean/median reward 6.7/5.0, min/max reward 0.0/24.0


 12%|█▏        | 15598/125000 [11:06<1:16:04, 23.97it/s]

finished frames 1248000, mean/median reward 3.5/1.0, min/max reward 0.0/22.0


 13%|█▎        | 15700/125000 [11:11<1:18:07, 23.32it/s]

finished frames 1256000, mean/median reward 9.8/5.0, min/max reward 1.0/27.0


 13%|█▎        | 15799/125000 [11:15<1:18:32, 23.17it/s]

finished frames 1264000, mean/median reward 11.1/4.0, min/max reward 2.0/68.0


 13%|█▎        | 15898/125000 [11:19<1:16:34, 23.75it/s]

finished frames 1272000, mean/median reward 10.9/7.0, min/max reward 0.0/37.0


 13%|█▎        | 16000/125000 [11:23<1:16:47, 23.66it/s]

finished frames 1280000, mean/median reward 12.2/9.0, min/max reward 0.0/42.0


 13%|█▎        | 16099/125000 [11:28<1:16:19, 23.78it/s]

finished frames 1288000, mean/median reward 15.5/5.0, min/max reward 1.0/80.0


 13%|█▎        | 16198/125000 [11:32<1:18:21, 23.14it/s]

finished frames 1296000, mean/median reward 10.8/10.0, min/max reward 1.0/25.0


 13%|█▎        | 16300/125000 [11:36<1:20:34, 22.48it/s]

finished frames 1304000, mean/median reward 10.4/8.0, min/max reward 0.0/37.0


 13%|█▎        | 16399/125000 [11:40<1:14:23, 24.33it/s]

finished frames 1312000, mean/median reward 8.1/6.0, min/max reward 1.0/24.0


 13%|█▎        | 16498/125000 [11:45<1:18:33, 23.02it/s]

finished frames 1320000, mean/median reward 12.5/7.0, min/max reward 1.0/39.0


 13%|█▎        | 16600/125000 [11:49<1:15:32, 23.91it/s]

finished frames 1328000, mean/median reward 11.6/8.0, min/max reward 1.0/27.0


 13%|█▎        | 16699/125000 [11:53<1:17:22, 23.33it/s]

finished frames 1336000, mean/median reward 32.7/11.0, min/max reward 4.0/304.0


 13%|█▎        | 16798/125000 [11:58<1:17:20, 23.32it/s]

finished frames 1344000, mean/median reward 5.2/4.0, min/max reward 0.0/26.0


 14%|█▎        | 16900/125000 [12:02<1:16:35, 23.52it/s]

finished frames 1352000, mean/median reward 5.1/4.0, min/max reward 0.0/17.0


 14%|█▎        | 16999/125000 [12:06<1:14:20, 24.21it/s]

finished frames 1360000, mean/median reward 10.5/9.0, min/max reward 1.0/20.0


 14%|█▎        | 17098/125000 [12:10<1:14:02, 24.29it/s]

finished frames 1368000, mean/median reward 15.1/11.0, min/max reward 4.0/33.0


 14%|█▍        | 17200/125000 [12:15<1:14:10, 24.22it/s]

finished frames 1376000, mean/median reward 13.7/10.0, min/max reward 1.0/32.0


 14%|█▍        | 17299/125000 [12:19<1:16:14, 23.54it/s]

finished frames 1384000, mean/median reward 17.8/11.0, min/max reward 4.0/55.0


 14%|█▍        | 17398/125000 [12:23<1:16:57, 23.30it/s]

finished frames 1392000, mean/median reward 17.9/6.0, min/max reward 1.0/155.0


 14%|█▍        | 17500/125000 [12:27<1:18:13, 22.91it/s]

finished frames 1400000, mean/median reward 19.8/8.0, min/max reward 3.0/155.0


 14%|█▍        | 17599/125000 [12:31<1:15:10, 23.81it/s]

finished frames 1408000, mean/median reward 16.6/13.0, min/max reward 1.0/47.0


 14%|█▍        | 17698/125000 [12:36<1:16:47, 23.29it/s]

finished frames 1416000, mean/median reward 6.8/5.0, min/max reward 1.0/24.0


 14%|█▍        | 17800/125000 [12:40<1:15:26, 23.68it/s]

finished frames 1424000, mean/median reward 11.6/10.0, min/max reward 5.0/32.0


 14%|█▍        | 17899/125000 [12:44<1:15:22, 23.68it/s]

finished frames 1432000, mean/median reward 10.1/7.0, min/max reward 1.0/37.0


 14%|█▍        | 17998/125000 [12:48<1:14:53, 23.82it/s]

finished frames 1440000, mean/median reward 17.2/13.0, min/max reward 3.0/68.0


 14%|█▍        | 18100/125000 [12:53<1:17:56, 22.86it/s]

finished frames 1448000, mean/median reward 8.9/8.0, min/max reward 1.0/19.0


 15%|█▍        | 18199/125000 [12:57<1:15:00, 23.73it/s]

finished frames 1456000, mean/median reward 19.5/15.0, min/max reward 2.0/60.0


 15%|█▍        | 18298/125000 [13:01<1:15:29, 23.56it/s]

finished frames 1464000, mean/median reward 11.5/11.0, min/max reward 4.0/25.0


 15%|█▍        | 18400/125000 [13:05<1:16:55, 23.10it/s]

finished frames 1472000, mean/median reward 14.9/12.0, min/max reward 3.0/36.0


 15%|█▍        | 18499/125000 [13:10<1:16:20, 23.25it/s]

finished frames 1480000, mean/median reward 14.0/11.0, min/max reward 1.0/34.0


 15%|█▍        | 18598/125000 [13:14<1:15:34, 23.46it/s]

finished frames 1488000, mean/median reward 11.3/6.0, min/max reward 1.0/37.0


 15%|█▍        | 18700/125000 [13:18<1:16:16, 23.23it/s]

finished frames 1496000, mean/median reward 12.2/9.0, min/max reward 1.0/30.0


 15%|█▌        | 18799/125000 [13:22<1:14:27, 23.77it/s]

finished frames 1504000, mean/median reward 18.5/12.0, min/max reward 1.0/68.0


 15%|█▌        | 18898/125000 [13:27<1:15:56, 23.29it/s]

finished frames 1512000, mean/median reward 11.8/7.0, min/max reward 1.0/47.0


 15%|█▌        | 19000/125000 [13:31<1:18:30, 22.50it/s]

finished frames 1520000, mean/median reward 9.6/5.0, min/max reward 0.0/35.0


 15%|█▌        | 19099/125000 [13:35<1:15:21, 23.42it/s]

finished frames 1528000, mean/median reward 10.8/8.0, min/max reward 2.0/33.0


 15%|█▌        | 19198/125000 [13:39<1:13:44, 23.91it/s]

finished frames 1536000, mean/median reward 21.4/12.0, min/max reward 0.0/95.0


 15%|█▌        | 19300/125000 [13:44<1:13:44, 23.89it/s]

finished frames 1544000, mean/median reward 6.2/4.0, min/max reward 0.0/20.0


 16%|█▌        | 19399/125000 [13:48<1:14:05, 23.76it/s]

finished frames 1552000, mean/median reward 9.1/5.0, min/max reward 1.0/28.0


 16%|█▌        | 19498/125000 [13:52<1:14:33, 23.59it/s]

finished frames 1560000, mean/median reward 10.7/7.0, min/max reward 0.0/57.0


 16%|█▌        | 19600/125000 [13:56<1:14:12, 23.67it/s]

finished frames 1568000, mean/median reward 14.1/6.0, min/max reward 1.0/70.0


 16%|█▌        | 19699/125000 [14:01<1:14:02, 23.70it/s]

finished frames 1576000, mean/median reward 11.4/8.0, min/max reward 1.0/31.0


 16%|█▌        | 19798/125000 [14:05<1:13:09, 23.97it/s]

finished frames 1584000, mean/median reward 8.9/5.0, min/max reward 1.0/35.0


 16%|█▌        | 19900/125000 [14:09<1:14:09, 23.62it/s]

finished frames 1592000, mean/median reward 12.4/8.0, min/max reward 2.0/43.0


 16%|█▌        | 19999/125000 [14:13<1:13:00, 23.97it/s]

finished frames 1600000, mean/median reward 12.6/10.0, min/max reward 1.0/44.0


 16%|█▌        | 20098/125000 [14:17<1:14:55, 23.34it/s]

finished frames 1608000, mean/median reward 14.8/11.0, min/max reward 1.0/44.0


 16%|█▌        | 20200/125000 [14:22<1:12:26, 24.11it/s]

finished frames 1616000, mean/median reward 12.8/8.0, min/max reward 2.0/42.0


 16%|█▌        | 20299/125000 [14:26<1:13:13, 23.83it/s]

finished frames 1624000, mean/median reward 12.2/10.0, min/max reward 1.0/35.0


 16%|█▋        | 20398/125000 [14:30<1:12:09, 24.16it/s]

finished frames 1632000, mean/median reward 12.4/6.0, min/max reward 1.0/56.0


 16%|█▋        | 20500/125000 [14:34<1:12:23, 24.06it/s]

finished frames 1640000, mean/median reward 9.9/5.0, min/max reward 1.0/32.0


 16%|█▋        | 20599/125000 [14:38<1:12:16, 24.08it/s]

finished frames 1648000, mean/median reward 6.2/4.0, min/max reward 1.0/18.0


 17%|█▋        | 20698/125000 [14:43<1:12:30, 23.97it/s]

finished frames 1656000, mean/median reward 10.6/10.0, min/max reward 1.0/25.0


 17%|█▋        | 20800/125000 [14:47<1:12:54, 23.82it/s]

finished frames 1664000, mean/median reward 15.1/7.0, min/max reward 1.0/43.0


 17%|█▋        | 20899/125000 [14:51<1:17:12, 22.47it/s]

finished frames 1672000, mean/median reward 3.5/1.0, min/max reward 0.0/18.0


 17%|█▋        | 20998/125000 [14:55<1:13:04, 23.72it/s]

finished frames 1680000, mean/median reward 10.9/7.0, min/max reward 0.0/27.0


 17%|█▋        | 21100/125000 [15:00<1:12:19, 23.94it/s]

finished frames 1688000, mean/median reward 9.6/5.0, min/max reward 0.0/29.0


 17%|█▋        | 21199/125000 [15:04<1:12:42, 23.79it/s]

finished frames 1696000, mean/median reward 21.3/5.0, min/max reward 2.0/198.0


 17%|█▋        | 21298/125000 [15:08<1:12:11, 23.94it/s]

finished frames 1704000, mean/median reward 19.5/5.0, min/max reward 1.0/198.0


 17%|█▋        | 21400/125000 [15:12<1:11:53, 24.02it/s]

finished frames 1712000, mean/median reward 8.9/9.0, min/max reward 0.0/22.0


 17%|█▋        | 21499/125000 [15:16<1:12:37, 23.75it/s]

finished frames 1720000, mean/median reward 18.9/5.0, min/max reward 0.0/61.0


 17%|█▋        | 21598/125000 [15:21<1:10:43, 24.37it/s]

finished frames 1728000, mean/median reward 17.1/12.0, min/max reward 1.0/45.0


 17%|█▋        | 21700/125000 [15:25<1:12:19, 23.80it/s]

finished frames 1736000, mean/median reward 18.5/12.0, min/max reward 0.0/56.0


 17%|█▋        | 21799/125000 [15:29<1:11:14, 24.14it/s]

finished frames 1744000, mean/median reward 15.6/9.0, min/max reward 0.0/82.0


 18%|█▊        | 21898/125000 [15:33<1:13:42, 23.32it/s]

finished frames 1752000, mean/median reward 4.9/2.0, min/max reward 0.0/30.0


 18%|█▊        | 22000/125000 [15:38<1:12:20, 23.73it/s]

finished frames 1760000, mean/median reward 8.8/5.0, min/max reward 0.0/41.0


 18%|█▊        | 22099/125000 [15:42<1:11:36, 23.95it/s]

finished frames 1768000, mean/median reward 11.2/11.0, min/max reward 1.0/23.0


 18%|█▊        | 22198/125000 [15:46<1:11:55, 23.82it/s]

finished frames 1776000, mean/median reward 5.4/3.0, min/max reward 0.0/16.0


 18%|█▊        | 22300/125000 [15:50<1:14:03, 23.11it/s]

finished frames 1784000, mean/median reward 14.1/8.0, min/max reward 0.0/50.0


 18%|█▊        | 22399/125000 [15:55<1:17:26, 22.08it/s]

finished frames 1792000, mean/median reward 19.6/15.0, min/max reward 2.0/69.0


 18%|█▊        | 22498/125000 [15:59<1:11:31, 23.89it/s]

finished frames 1800000, mean/median reward 30.1/6.0, min/max reward 0.0/192.0


 18%|█▊        | 22600/125000 [16:04<1:18:05, 21.85it/s]

finished frames 1808000, mean/median reward 21.9/10.0, min/max reward 0.0/180.0


 18%|█▊        | 22699/125000 [16:08<1:16:21, 22.33it/s]

finished frames 1816000, mean/median reward 26.4/12.0, min/max reward 1.0/174.0


 18%|█▊        | 22798/125000 [16:12<1:09:43, 24.43it/s]

finished frames 1824000, mean/median reward 20.3/8.0, min/max reward 1.0/123.0


 18%|█▊        | 22900/125000 [16:16<1:10:37, 24.09it/s]

finished frames 1832000, mean/median reward 11.8/9.0, min/max reward 0.0/35.0


 18%|█▊        | 22999/125000 [16:20<1:09:31, 24.45it/s]

finished frames 1840000, mean/median reward 22.3/11.0, min/max reward 3.0/137.0


 18%|█▊        | 23098/125000 [16:24<1:09:43, 24.36it/s]

finished frames 1848000, mean/median reward 24.4/11.0, min/max reward 2.0/137.0


 19%|█▊        | 23200/125000 [16:29<1:09:17, 24.48it/s]

finished frames 1856000, mean/median reward 26.1/17.0, min/max reward 4.0/145.0


 19%|█▊        | 23299/125000 [16:33<1:10:31, 24.03it/s]

finished frames 1864000, mean/median reward 18.7/9.0, min/max reward 2.0/67.0


 19%|█▊        | 23398/125000 [16:37<1:09:13, 24.46it/s]

finished frames 1872000, mean/median reward 18.0/11.0, min/max reward 0.0/53.0


 19%|█▉        | 23500/125000 [16:41<1:14:13, 22.79it/s]

finished frames 1880000, mean/median reward 8.1/3.0, min/max reward 1.0/36.0


 19%|█▉        | 23599/125000 [16:46<1:09:37, 24.27it/s]

finished frames 1888000, mean/median reward 11.5/7.0, min/max reward 1.0/30.0


 19%|█▉        | 23698/125000 [16:50<1:13:09, 23.08it/s]

finished frames 1896000, mean/median reward 9.8/7.0, min/max reward 0.0/22.0


 19%|█▉        | 23800/125000 [16:54<1:09:02, 24.43it/s]

finished frames 1904000, mean/median reward 17.4/13.0, min/max reward 4.0/47.0


 19%|█▉        | 23899/125000 [16:58<1:12:16, 23.31it/s]

finished frames 1912000, mean/median reward 21.8/17.0, min/max reward 1.0/87.0


 19%|█▉        | 23998/125000 [17:03<1:15:43, 22.23it/s]

finished frames 1920000, mean/median reward 2.5/0.0, min/max reward 0.0/23.0


 19%|█▉        | 24100/125000 [17:07<1:10:02, 24.01it/s]

finished frames 1928000, mean/median reward 5.9/4.0, min/max reward 0.0/22.0


 19%|█▉        | 24199/125000 [17:11<1:10:50, 23.72it/s]

finished frames 1936000, mean/median reward 19.6/18.0, min/max reward 1.0/48.0


 19%|█▉        | 24298/125000 [17:15<1:11:11, 23.57it/s]

finished frames 1944000, mean/median reward 9.8/8.0, min/max reward 1.0/32.0


 20%|█▉        | 24400/125000 [17:20<1:10:21, 23.83it/s]

finished frames 1952000, mean/median reward 26.1/13.0, min/max reward 2.0/157.0


 20%|█▉        | 24499/125000 [17:24<1:11:49, 23.32it/s]

finished frames 1960000, mean/median reward 12.6/10.0, min/max reward 0.0/45.0


 20%|█▉        | 24598/125000 [17:28<1:09:35, 24.05it/s]

finished frames 1968000, mean/median reward 16.2/13.0, min/max reward 1.0/70.0


 20%|█▉        | 24700/125000 [17:32<1:09:45, 23.97it/s]

finished frames 1976000, mean/median reward 17.1/10.0, min/max reward 0.0/50.0


 20%|█▉        | 24799/125000 [17:36<1:12:17, 23.10it/s]

finished frames 1984000, mean/median reward 17.8/17.0, min/max reward 0.0/39.0


 20%|█▉        | 24898/125000 [17:40<1:09:10, 24.12it/s]

finished frames 1992000, mean/median reward 19.4/11.0, min/max reward 0.0/98.0


 20%|██        | 25000/125000 [17:45<1:09:09, 24.10it/s]

finished frames 2000000, mean/median reward 8.7/5.0, min/max reward 2.0/30.0


 20%|██        | 25099/125000 [17:49<1:10:44, 23.54it/s]

finished frames 2008000, mean/median reward 13.8/12.0, min/max reward 0.0/40.0


 20%|██        | 25198/125000 [17:53<1:09:09, 24.05it/s]

finished frames 2016000, mean/median reward 21.2/13.0, min/max reward 4.0/130.0


 20%|██        | 25300/125000 [17:57<1:11:45, 23.16it/s]

finished frames 2024000, mean/median reward 13.4/13.0, min/max reward 1.0/35.0


 20%|██        | 25399/125000 [18:02<1:08:59, 24.06it/s]

finished frames 2032000, mean/median reward 12.2/9.0, min/max reward 0.0/44.0


 20%|██        | 25498/125000 [18:06<1:10:48, 23.42it/s]

finished frames 2040000, mean/median reward 14.6/12.0, min/max reward 1.0/52.0


 20%|██        | 25600/125000 [18:10<1:13:34, 22.51it/s]

finished frames 2048000, mean/median reward 16.8/7.0, min/max reward 0.0/150.0


 21%|██        | 25699/125000 [18:14<1:10:55, 23.34it/s]

finished frames 2056000, mean/median reward 13.0/8.0, min/max reward 2.0/34.0


 21%|██        | 25798/125000 [18:19<1:11:10, 23.23it/s]

finished frames 2064000, mean/median reward 21.5/9.0, min/max reward 2.0/161.0


 21%|██        | 25900/125000 [18:23<1:08:37, 24.07it/s]

finished frames 2072000, mean/median reward 10.0/7.0, min/max reward 2.0/31.0


 21%|██        | 25999/125000 [18:27<1:09:56, 23.59it/s]

finished frames 2080000, mean/median reward 17.2/4.0, min/max reward 0.0/173.0


 21%|██        | 26098/125000 [18:31<1:08:23, 24.10it/s]

finished frames 2088000, mean/median reward 8.8/7.0, min/max reward 0.0/18.0


 21%|██        | 26200/125000 [18:36<1:12:01, 22.86it/s]

finished frames 2096000, mean/median reward 13.8/13.0, min/max reward 0.0/25.0


 21%|██        | 26299/125000 [18:40<1:09:33, 23.65it/s]

finished frames 2104000, mean/median reward 17.9/16.0, min/max reward 1.0/51.0


 21%|██        | 26398/125000 [18:44<1:09:34, 23.62it/s]

finished frames 2112000, mean/median reward 16.1/9.0, min/max reward 0.0/105.0


 21%|██        | 26500/125000 [18:49<1:10:49, 23.18it/s]

finished frames 2120000, mean/median reward 0.8/1.0, min/max reward 0.0/2.0


 21%|██▏       | 26599/125000 [18:53<1:09:21, 23.65it/s]

finished frames 2128000, mean/median reward 2.4/1.0, min/max reward 0.0/21.0


 21%|██▏       | 26698/125000 [18:57<1:12:27, 22.61it/s]

finished frames 2136000, mean/median reward 16.8/12.0, min/max reward 0.0/60.0


 21%|██▏       | 26800/125000 [19:01<1:11:34, 22.87it/s]

finished frames 2144000, mean/median reward 16.2/10.0, min/max reward 0.0/67.0


 22%|██▏       | 26899/125000 [19:06<1:09:12, 23.63it/s]

finished frames 2152000, mean/median reward 10.6/6.0, min/max reward 0.0/29.0


 22%|██▏       | 26998/125000 [19:10<1:08:03, 24.00it/s]

finished frames 2160000, mean/median reward 14.4/12.0, min/max reward 0.0/33.0


 22%|██▏       | 27100/125000 [19:14<1:09:37, 23.43it/s]

finished frames 2168000, mean/median reward 18.1/12.0, min/max reward 0.0/59.0


 22%|██▏       | 27199/125000 [19:18<1:10:37, 23.08it/s]

finished frames 2176000, mean/median reward 9.4/7.0, min/max reward 0.0/27.0


 22%|██▏       | 27298/125000 [19:23<1:09:07, 23.56it/s]

finished frames 2184000, mean/median reward 10.2/7.0, min/max reward 0.0/40.0


 22%|██▏       | 27400/125000 [19:27<1:07:19, 24.16it/s]

finished frames 2192000, mean/median reward 35.9/14.0, min/max reward 0.0/329.0


 22%|██▏       | 27499/125000 [19:31<1:08:46, 23.63it/s]

finished frames 2200000, mean/median reward 14.0/9.0, min/max reward 1.0/40.0


 22%|██▏       | 27598/125000 [19:35<1:08:50, 23.58it/s]

finished frames 2208000, mean/median reward 8.3/5.0, min/max reward 0.0/30.0


 22%|██▏       | 27700/125000 [19:40<1:09:16, 23.41it/s]

finished frames 2216000, mean/median reward 24.9/4.0, min/max reward 1.0/242.0


 22%|██▏       | 27799/125000 [19:44<1:07:37, 23.96it/s]

finished frames 2224000, mean/median reward 11.0/7.0, min/max reward 0.0/53.0


 22%|██▏       | 27898/125000 [19:48<1:08:18, 23.69it/s]

finished frames 2232000, mean/median reward 13.0/7.0, min/max reward 0.0/54.0


 22%|██▏       | 28000/125000 [19:52<1:08:28, 23.61it/s]

finished frames 2240000, mean/median reward 16.4/8.0, min/max reward 0.0/61.0


 22%|██▏       | 28099/125000 [19:57<1:09:02, 23.39it/s]

finished frames 2248000, mean/median reward 19.4/14.0, min/max reward 0.0/81.0


 23%|██▎       | 28198/125000 [20:01<1:09:45, 23.13it/s]

finished frames 2256000, mean/median reward 7.4/0.0, min/max reward 0.0/63.0


 23%|██▎       | 28300/125000 [20:05<1:07:19, 23.94it/s]

finished frames 2264000, mean/median reward 7.7/5.0, min/max reward 0.0/24.0


 23%|██▎       | 28399/125000 [20:09<1:06:57, 24.04it/s]

finished frames 2272000, mean/median reward 15.8/10.0, min/max reward 5.0/52.0


 23%|██▎       | 28498/125000 [20:13<1:06:24, 24.22it/s]

finished frames 2280000, mean/median reward 14.9/12.0, min/max reward 0.0/34.0


 23%|██▎       | 28600/125000 [20:18<1:09:31, 23.11it/s]

finished frames 2288000, mean/median reward 18.9/16.0, min/max reward 2.0/45.0


 23%|██▎       | 28699/125000 [20:22<1:12:00, 22.29it/s]

finished frames 2296000, mean/median reward 16.9/10.0, min/max reward 0.0/49.0


 23%|██▎       | 28798/125000 [20:26<1:09:37, 23.03it/s]

finished frames 2304000, mean/median reward 19.5/11.0, min/max reward 0.0/165.0


 23%|██▎       | 28900/125000 [20:31<1:10:55, 22.58it/s]

finished frames 2312000, mean/median reward 23.6/6.0, min/max reward 0.0/226.0


 23%|██▎       | 28999/125000 [20:35<1:09:54, 22.89it/s]

finished frames 2320000, mean/median reward 17.3/14.0, min/max reward 0.0/58.0


 23%|██▎       | 29098/125000 [20:40<1:06:51, 23.90it/s]

finished frames 2328000, mean/median reward 14.4/9.0, min/max reward 0.0/58.0


 23%|██▎       | 29200/125000 [20:44<1:10:35, 22.62it/s]

finished frames 2336000, mean/median reward 15.4/12.0, min/max reward 3.0/43.0


 23%|██▎       | 29299/125000 [20:48<1:08:17, 23.35it/s]

finished frames 2344000, mean/median reward 18.4/12.0, min/max reward 5.0/71.0


 24%|██▎       | 29398/125000 [20:53<1:14:14, 21.46it/s]

finished frames 2352000, mean/median reward 24.2/15.0, min/max reward 0.0/138.0


 24%|██▎       | 29500/125000 [20:57<1:09:21, 22.95it/s]

finished frames 2360000, mean/median reward 9.9/7.0, min/max reward 0.0/46.0


 24%|██▎       | 29599/125000 [21:02<1:12:07, 22.05it/s]

finished frames 2368000, mean/median reward 9.9/4.0, min/max reward 0.0/45.0


 24%|██▍       | 29698/125000 [21:06<1:15:31, 21.03it/s]

finished frames 2376000, mean/median reward 4.8/4.0, min/max reward 0.0/13.0


 24%|██▍       | 29800/125000 [21:10<1:06:38, 23.81it/s]

finished frames 2384000, mean/median reward 11.1/9.0, min/max reward 3.0/23.0


 24%|██▍       | 29899/125000 [21:15<1:08:41, 23.08it/s]

finished frames 2392000, mean/median reward 17.1/15.0, min/max reward 2.0/41.0


 24%|██▍       | 29998/125000 [21:19<1:08:10, 23.23it/s]

finished frames 2400000, mean/median reward 16.3/9.0, min/max reward 0.0/63.0


 24%|██▍       | 30100/125000 [21:23<1:06:43, 23.70it/s]

finished frames 2408000, mean/median reward 8.1/5.0, min/max reward 0.0/27.0


 24%|██▍       | 30199/125000 [21:28<1:08:41, 23.00it/s]

finished frames 2416000, mean/median reward 7.4/3.0, min/max reward 0.0/33.0


 24%|██▍       | 30298/125000 [21:32<1:06:24, 23.77it/s]

finished frames 2424000, mean/median reward 3.4/1.0, min/max reward 0.0/18.0


 24%|██▍       | 30400/125000 [21:36<1:07:08, 23.48it/s]

finished frames 2432000, mean/median reward 22.5/12.0, min/max reward 2.0/73.0


 24%|██▍       | 30499/125000 [21:40<1:08:43, 22.92it/s]

finished frames 2440000, mean/median reward 7.8/4.0, min/max reward 0.0/55.0


 24%|██▍       | 30598/125000 [21:45<1:12:43, 21.63it/s]

finished frames 2448000, mean/median reward 4.0/4.0, min/max reward 0.0/9.0


 25%|██▍       | 30700/125000 [21:49<1:07:28, 23.29it/s]

finished frames 2456000, mean/median reward 10.7/9.0, min/max reward 0.0/38.0


 25%|██▍       | 30799/125000 [21:54<1:06:29, 23.61it/s]

finished frames 2464000, mean/median reward 25.8/18.0, min/max reward 5.0/96.0


 25%|██▍       | 30898/125000 [21:58<1:06:35, 23.55it/s]

finished frames 2472000, mean/median reward 33.2/15.0, min/max reward 0.0/275.0


 25%|██▍       | 31000/125000 [22:02<1:05:25, 23.95it/s]

finished frames 2480000, mean/median reward 10.9/8.0, min/max reward 1.0/36.0


 25%|██▍       | 31099/125000 [22:06<1:08:17, 22.92it/s]

finished frames 2488000, mean/median reward 33.9/12.0, min/max reward 0.0/201.0


 25%|██▍       | 31198/125000 [22:11<1:07:10, 23.27it/s]

finished frames 2496000, mean/median reward 26.6/8.0, min/max reward 1.0/201.0


 25%|██▌       | 31300/125000 [22:15<1:06:22, 23.53it/s]

finished frames 2504000, mean/median reward 16.9/12.0, min/max reward 0.0/67.0


 25%|██▌       | 31399/125000 [22:19<1:08:27, 22.79it/s]

finished frames 2512000, mean/median reward 9.2/8.0, min/max reward 0.0/39.0


 25%|██▌       | 31498/125000 [22:23<1:05:53, 23.65it/s]

finished frames 2520000, mean/median reward 18.2/15.0, min/max reward 1.0/47.0


 25%|██▌       | 31600/125000 [22:28<1:05:32, 23.75it/s]

finished frames 2528000, mean/median reward 21.0/16.0, min/max reward 0.0/59.0


 25%|██▌       | 31699/125000 [22:32<1:06:17, 23.46it/s]

finished frames 2536000, mean/median reward 36.6/31.0, min/max reward 0.0/104.0


 25%|██▌       | 31798/125000 [22:36<1:05:46, 23.62it/s]

finished frames 2544000, mean/median reward 32.9/29.0, min/max reward 2.0/95.0


 26%|██▌       | 31900/125000 [22:41<1:06:13, 23.43it/s]

finished frames 2552000, mean/median reward 14.6/6.0, min/max reward 0.0/59.0


 26%|██▌       | 31999/125000 [22:45<1:05:51, 23.53it/s]

finished frames 2560000, mean/median reward 14.0/10.0, min/max reward 0.0/53.0


 26%|██▌       | 32098/125000 [22:49<1:05:13, 23.74it/s]

finished frames 2568000, mean/median reward 17.6/9.0, min/max reward 5.0/62.0


 26%|██▌       | 32200/125000 [22:54<1:09:34, 22.23it/s]

finished frames 2576000, mean/median reward 33.1/10.0, min/max reward 1.0/283.0


 26%|██▌       | 32299/125000 [22:58<1:05:22, 23.63it/s]

finished frames 2584000, mean/median reward 22.6/9.0, min/max reward 0.0/137.0


 26%|██▌       | 32398/125000 [23:02<1:04:01, 24.10it/s]

finished frames 2592000, mean/median reward 16.1/6.0, min/max reward 0.0/58.0


 26%|██▌       | 32500/125000 [23:06<1:04:34, 23.87it/s]

finished frames 2600000, mean/median reward 16.8/13.0, min/max reward 2.0/52.0


 26%|██▌       | 32599/125000 [23:10<1:03:25, 24.28it/s]

finished frames 2608000, mean/median reward 7.5/5.0, min/max reward 0.0/25.0


 26%|██▌       | 32698/125000 [23:15<1:05:00, 23.66it/s]

finished frames 2616000, mean/median reward 17.4/7.0, min/max reward 0.0/56.0


 26%|██▌       | 32800/125000 [23:19<1:03:54, 24.05it/s]

finished frames 2624000, mean/median reward 22.5/9.0, min/max reward 4.0/102.0


 26%|██▋       | 32899/125000 [23:23<1:06:01, 23.25it/s]

finished frames 2632000, mean/median reward 14.2/6.0, min/max reward 0.0/46.0


 26%|██▋       | 32998/125000 [23:27<1:03:33, 24.13it/s]

finished frames 2640000, mean/median reward 13.6/7.0, min/max reward 0.0/51.0


 26%|██▋       | 33100/125000 [23:31<1:03:35, 24.08it/s]

finished frames 2648000, mean/median reward 32.7/12.0, min/max reward 0.0/201.0


 27%|██▋       | 33199/125000 [23:36<1:03:32, 24.08it/s]

finished frames 2656000, mean/median reward 32.4/12.0, min/max reward 6.0/201.0


 27%|██▋       | 33298/125000 [23:40<1:05:17, 23.41it/s]

finished frames 2664000, mean/median reward 21.7/13.0, min/max reward 4.0/114.0


 27%|██▋       | 33400/125000 [23:44<1:04:38, 23.62it/s]

finished frames 2672000, mean/median reward 22.4/5.0, min/max reward 0.0/170.0


 27%|██▋       | 33499/125000 [23:48<1:04:15, 23.73it/s]

finished frames 2680000, mean/median reward 27.8/9.0, min/max reward 0.0/170.0


 27%|██▋       | 33598/125000 [23:52<1:02:44, 24.28it/s]

finished frames 2688000, mean/median reward 20.2/13.0, min/max reward 0.0/127.0


 27%|██▋       | 33700/125000 [23:57<1:02:24, 24.38it/s]

finished frames 2696000, mean/median reward 14.7/9.0, min/max reward 1.0/87.0


 27%|██▋       | 33799/125000 [24:01<1:03:24, 23.97it/s]

finished frames 2704000, mean/median reward 9.7/5.0, min/max reward 0.0/35.0


 27%|██▋       | 33898/125000 [24:05<1:03:30, 23.91it/s]

finished frames 2712000, mean/median reward 10.7/7.0, min/max reward 0.0/28.0


 27%|██▋       | 34000/125000 [24:09<1:02:57, 24.09it/s]

finished frames 2720000, mean/median reward 30.6/11.0, min/max reward 3.0/156.0


 27%|██▋       | 34099/125000 [24:14<1:08:32, 22.11it/s]

finished frames 2728000, mean/median reward 0.8/1.0, min/max reward 0.0/4.0


 27%|██▋       | 34198/125000 [24:18<1:03:04, 24.00it/s]

finished frames 2736000, mean/median reward 2.9/3.0, min/max reward 0.0/7.0


 27%|██▋       | 34300/125000 [24:22<1:03:13, 23.91it/s]

finished frames 2744000, mean/median reward 10.7/7.0, min/max reward 3.0/37.0


 28%|██▊       | 34399/125000 [24:26<1:03:46, 23.68it/s]

finished frames 2752000, mean/median reward 20.4/11.0, min/max reward 0.0/64.0


 28%|██▊       | 34498/125000 [24:31<1:04:39, 23.33it/s]

finished frames 2760000, mean/median reward 12.7/7.0, min/max reward 0.0/64.0


 28%|██▊       | 34600/125000 [24:35<1:03:16, 23.81it/s]

finished frames 2768000, mean/median reward 10.6/6.0, min/max reward 0.0/40.0


 28%|██▊       | 34699/125000 [24:39<1:03:43, 23.62it/s]

finished frames 2776000, mean/median reward 17.2/10.0, min/max reward 0.0/44.0


 28%|██▊       | 34798/125000 [24:43<1:07:47, 22.18it/s]

finished frames 2784000, mean/median reward 28.1/14.0, min/max reward 1.0/105.0


 28%|██▊       | 34900/125000 [24:48<1:02:56, 23.86it/s]

finished frames 2792000, mean/median reward 6.4/4.0, min/max reward 0.0/26.0


 28%|██▊       | 34999/125000 [24:52<1:04:05, 23.41it/s]

finished frames 2800000, mean/median reward 14.7/8.0, min/max reward 0.0/43.0


 28%|██▊       | 35098/125000 [24:56<1:02:58, 23.79it/s]

finished frames 2808000, mean/median reward 20.4/14.0, min/max reward 0.0/72.0


 28%|██▊       | 35200/125000 [25:00<1:01:59, 24.14it/s]

finished frames 2816000, mean/median reward 15.5/12.0, min/max reward 0.0/49.0


 28%|██▊       | 35299/125000 [25:04<1:04:02, 23.34it/s]

finished frames 2824000, mean/median reward 22.8/9.0, min/max reward 0.0/208.0


 28%|██▊       | 35398/125000 [25:08<1:01:27, 24.30it/s]

finished frames 2832000, mean/median reward 9.9/10.0, min/max reward 2.0/23.0


 28%|██▊       | 35500/125000 [25:13<1:01:48, 24.14it/s]

finished frames 2840000, mean/median reward 28.2/14.0, min/max reward 5.0/77.0


 28%|██▊       | 35599/125000 [25:17<1:02:58, 23.66it/s]

finished frames 2848000, mean/median reward 5.2/4.0, min/max reward 0.0/14.0


 29%|██▊       | 35698/125000 [25:21<1:01:45, 24.10it/s]

finished frames 2856000, mean/median reward 8.5/6.0, min/max reward 0.0/26.0


 29%|██▊       | 35800/125000 [25:25<1:02:35, 23.75it/s]

finished frames 2864000, mean/median reward 20.6/10.0, min/max reward 1.0/75.0


 29%|██▊       | 35899/125000 [25:29<1:02:16, 23.85it/s]

finished frames 2872000, mean/median reward 9.4/4.0, min/max reward 0.0/35.0


 29%|██▉       | 35998/125000 [25:34<1:01:46, 24.02it/s]

finished frames 2880000, mean/median reward 17.4/16.0, min/max reward 0.0/35.0


 29%|██▉       | 36100/125000 [25:38<1:02:00, 23.89it/s]

finished frames 2888000, mean/median reward 17.6/12.0, min/max reward 1.0/53.0


 29%|██▉       | 36199/125000 [25:42<1:02:08, 23.82it/s]

finished frames 2896000, mean/median reward 17.4/16.0, min/max reward 4.0/40.0


 29%|██▉       | 36298/125000 [25:46<1:01:35, 24.00it/s]

finished frames 2904000, mean/median reward 15.4/14.0, min/max reward 1.0/35.0


 29%|██▉       | 36400/125000 [25:50<1:01:45, 23.91it/s]

finished frames 2912000, mean/median reward 28.6/15.0, min/max reward 2.0/151.0


 29%|██▉       | 36499/125000 [25:55<1:00:41, 24.30it/s]

finished frames 2920000, mean/median reward 14.1/13.0, min/max reward 3.0/35.0


 29%|██▉       | 36598/125000 [25:59<1:01:25, 23.99it/s]

finished frames 2928000, mean/median reward 15.8/12.0, min/max reward 0.0/47.0


 29%|██▉       | 36700/125000 [26:03<1:05:32, 22.46it/s]

finished frames 2936000, mean/median reward 13.8/10.0, min/max reward 0.0/46.0


 29%|██▉       | 36799/125000 [26:07<1:00:44, 24.20it/s]

finished frames 2944000, mean/median reward 8.8/6.0, min/max reward 0.0/26.0


 30%|██▉       | 36898/125000 [26:11<1:01:52, 23.73it/s]

finished frames 2952000, mean/median reward 17.9/15.0, min/max reward 1.0/46.0


 30%|██▉       | 37000/125000 [26:16<1:02:31, 23.46it/s]

finished frames 2960000, mean/median reward 31.8/19.0, min/max reward 1.0/165.0


 30%|██▉       | 37099/125000 [26:20<1:01:46, 23.72it/s]

finished frames 2968000, mean/median reward 25.5/13.0, min/max reward 0.0/119.0


 30%|██▉       | 37198/125000 [26:24<1:00:26, 24.21it/s]

finished frames 2976000, mean/median reward 22.4/8.0, min/max reward 0.0/119.0


 30%|██▉       | 37300/125000 [26:28<1:00:56, 23.99it/s]

finished frames 2984000, mean/median reward 30.1/25.0, min/max reward 0.0/92.0


 30%|██▉       | 37399/125000 [26:32<1:00:20, 24.19it/s]

finished frames 2992000, mean/median reward 31.6/18.0, min/max reward 0.0/71.0


 30%|██▉       | 37498/125000 [26:36<1:00:07, 24.25it/s]

finished frames 3000000, mean/median reward 30.8/9.0, min/max reward 0.0/195.0


 30%|███       | 37600/125000 [26:41<1:01:03, 23.86it/s]

finished frames 3008000, mean/median reward 19.1/16.0, min/max reward 0.0/65.0


 30%|███       | 37699/125000 [26:45<1:02:26, 23.30it/s]

finished frames 3016000, mean/median reward 16.9/8.0, min/max reward 0.0/65.0


 30%|███       | 37798/125000 [26:49<59:46, 24.32it/s]  

finished frames 3024000, mean/median reward 35.4/12.0, min/max reward 0.0/268.0


 30%|███       | 37900/125000 [26:53<1:01:05, 23.76it/s]

finished frames 3032000, mean/median reward 25.9/20.0, min/max reward 0.0/91.0


 30%|███       | 37999/125000 [26:57<1:01:04, 23.74it/s]

finished frames 3040000, mean/median reward 8.7/5.0, min/max reward 0.0/45.0


 30%|███       | 38098/125000 [27:02<1:00:44, 23.84it/s]

finished frames 3048000, mean/median reward 39.9/15.0, min/max reward 4.0/259.0


 31%|███       | 38200/125000 [27:06<1:01:45, 23.43it/s]

finished frames 3056000, mean/median reward 31.7/18.0, min/max reward 6.0/119.0


 31%|███       | 38299/125000 [27:10<1:00:07, 24.03it/s]

finished frames 3064000, mean/median reward 29.5/14.0, min/max reward 0.0/170.0


 31%|███       | 38398/125000 [27:14<1:00:38, 23.80it/s]

finished frames 3072000, mean/median reward 20.3/13.0, min/max reward 3.0/55.0


 31%|███       | 38500/125000 [27:18<1:00:00, 24.02it/s]

finished frames 3080000, mean/median reward 34.0/15.0, min/max reward 0.0/177.0


 31%|███       | 38599/125000 [27:23<1:00:49, 23.68it/s]

finished frames 3088000, mean/median reward 33.1/15.0, min/max reward 0.0/110.0


 31%|███       | 38698/125000 [27:27<59:49, 24.04it/s]  

finished frames 3096000, mean/median reward 45.3/15.0, min/max reward 7.0/212.0


 31%|███       | 38800/125000 [27:31<1:00:06, 23.90it/s]

finished frames 3104000, mean/median reward 22.1/14.0, min/max reward 0.0/147.0


 31%|███       | 38899/125000 [27:35<1:02:08, 23.09it/s]

finished frames 3112000, mean/median reward 18.2/15.0, min/max reward 2.0/51.0


 31%|███       | 38998/125000 [27:39<1:00:37, 23.64it/s]

finished frames 3120000, mean/median reward 7.1/5.0, min/max reward 0.0/21.0


 31%|███▏      | 39100/125000 [27:44<1:00:10, 23.79it/s]

finished frames 3128000, mean/median reward 23.8/12.0, min/max reward 0.0/123.0


 31%|███▏      | 39199/125000 [27:48<1:00:11, 23.76it/s]

finished frames 3136000, mean/median reward 10.8/5.0, min/max reward 1.0/31.0


 31%|███▏      | 39298/125000 [27:52<59:21, 24.06it/s]  

finished frames 3144000, mean/median reward 18.2/10.0, min/max reward 4.0/56.0


 32%|███▏      | 39400/125000 [27:56<58:29, 24.39it/s]  

finished frames 3152000, mean/median reward 47.7/18.0, min/max reward 4.0/226.0


 32%|███▏      | 39499/125000 [28:01<59:54, 23.79it/s]  

finished frames 3160000, mean/median reward 25.0/7.0, min/max reward 3.0/228.0


 32%|███▏      | 39598/125000 [28:05<58:31, 24.32it/s]  

finished frames 3168000, mean/median reward 23.7/16.0, min/max reward 0.0/72.0


 32%|███▏      | 39700/125000 [28:09<59:23, 23.94it/s]  

finished frames 3176000, mean/median reward 14.1/11.0, min/max reward 0.0/44.0


 32%|███▏      | 39799/125000 [28:13<59:03, 24.05it/s]  

finished frames 3184000, mean/median reward 22.3/11.0, min/max reward 0.0/87.0


 32%|███▏      | 39898/125000 [28:17<58:46, 24.13it/s]  

finished frames 3192000, mean/median reward 28.0/11.0, min/max reward 0.0/160.0


 32%|███▏      | 40000/125000 [28:21<59:12, 23.93it/s]  

finished frames 3200000, mean/median reward 16.4/7.0, min/max reward 0.0/53.0


 32%|███▏      | 40099/125000 [28:26<59:59, 23.59it/s]  

finished frames 3208000, mean/median reward 16.5/5.0, min/max reward 0.0/55.0


 32%|███▏      | 40198/125000 [28:30<1:00:16, 23.45it/s]

finished frames 3216000, mean/median reward 17.2/7.0, min/max reward 1.0/56.0


 32%|███▏      | 40300/125000 [28:34<59:27, 23.74it/s]  

finished frames 3224000, mean/median reward 16.9/5.0, min/max reward 0.0/52.0


 32%|███▏      | 40399/125000 [28:38<57:39, 24.46it/s]

finished frames 3232000, mean/median reward 26.9/4.0, min/max reward 1.0/196.0


 32%|███▏      | 40498/125000 [28:42<59:05, 23.83it/s]  

finished frames 3240000, mean/median reward 9.4/5.0, min/max reward 0.0/32.0


 32%|███▏      | 40600/125000 [28:47<58:30, 24.04it/s]  

finished frames 3248000, mean/median reward 6.8/5.0, min/max reward 0.0/22.0


 33%|███▎      | 40699/125000 [28:51<59:59, 23.42it/s]  

finished frames 3256000, mean/median reward 28.9/12.0, min/max reward 1.0/126.0


 33%|███▎      | 40798/125000 [28:55<58:38, 23.93it/s]

finished frames 3264000, mean/median reward 13.8/7.0, min/max reward 0.0/47.0


 33%|███▎      | 40900/125000 [28:59<59:59, 23.37it/s]  

finished frames 3272000, mean/median reward 27.5/24.0, min/max reward 0.0/89.0


 33%|███▎      | 40999/125000 [29:03<58:58, 23.74it/s]  

finished frames 3280000, mean/median reward 41.2/19.0, min/max reward 3.0/191.0


 33%|███▎      | 41098/125000 [29:07<57:41, 24.24it/s]

finished frames 3288000, mean/median reward 20.4/9.0, min/max reward 0.0/89.0


 33%|███▎      | 41200/125000 [29:12<58:03, 24.06it/s]

finished frames 3296000, mean/median reward 25.7/11.0, min/max reward 0.0/196.0


 33%|███▎      | 41299/125000 [29:16<59:08, 23.59it/s]

finished frames 3304000, mean/median reward 21.9/13.0, min/max reward 1.0/152.0


 33%|███▎      | 41398/125000 [29:20<58:30, 23.81it/s]

finished frames 3312000, mean/median reward 58.0/25.0, min/max reward 4.0/363.0


 33%|███▎      | 41500/125000 [29:24<57:59, 23.99it/s]

finished frames 3320000, mean/median reward 42.9/14.0, min/max reward 0.0/138.0


 33%|███▎      | 41599/125000 [29:28<58:35, 23.72it/s]  

finished frames 3328000, mean/median reward 33.2/21.0, min/max reward 0.0/138.0


 33%|███▎      | 41698/125000 [29:33<58:40, 23.66it/s]  

finished frames 3336000, mean/median reward 24.4/12.0, min/max reward 0.0/152.0


 33%|███▎      | 41800/125000 [29:37<57:23, 24.16it/s]

finished frames 3344000, mean/median reward 20.9/12.0, min/max reward 0.0/67.0


 34%|███▎      | 41899/125000 [29:41<58:12, 23.80it/s]

finished frames 3352000, mean/median reward 34.3/27.0, min/max reward 0.0/106.0


 34%|███▎      | 41998/125000 [29:45<59:05, 23.41it/s]

finished frames 3360000, mean/median reward 47.1/22.0, min/max reward 0.0/202.0


 34%|███▎      | 42100/125000 [29:49<57:15, 24.13it/s]  

finished frames 3368000, mean/median reward 47.9/22.0, min/max reward 6.0/211.0


 34%|███▍      | 42199/125000 [29:54<57:39, 23.94it/s]  

finished frames 3376000, mean/median reward 28.6/23.0, min/max reward 1.0/106.0


 34%|███▍      | 42298/125000 [29:58<57:30, 23.97it/s]  

finished frames 3384000, mean/median reward 18.1/5.0, min/max reward 1.0/106.0


 34%|███▍      | 42400/125000 [30:02<57:04, 24.12it/s]

finished frames 3392000, mean/median reward 21.3/7.0, min/max reward 0.0/156.0


 34%|███▍      | 42499/125000 [30:06<56:54, 24.16it/s]  

finished frames 3400000, mean/median reward 20.9/18.0, min/max reward 2.0/58.0


 34%|███▍      | 42598/125000 [30:10<57:05, 24.05it/s]

finished frames 3408000, mean/median reward 18.1/12.0, min/max reward 0.0/50.0


 34%|███▍      | 42700/125000 [30:15<56:53, 24.11it/s]

finished frames 3416000, mean/median reward 15.1/8.0, min/max reward 0.0/84.0


 34%|███▍      | 42799/125000 [30:19<58:53, 23.26it/s]

finished frames 3424000, mean/median reward 9.7/5.0, min/max reward 0.0/50.0


 34%|███▍      | 42898/125000 [30:23<57:13, 23.91it/s]  

finished frames 3432000, mean/median reward 16.2/9.0, min/max reward 0.0/65.0


 34%|███▍      | 43000/125000 [30:27<56:25, 24.22it/s]

finished frames 3440000, mean/median reward 21.6/9.0, min/max reward 0.0/88.0


 34%|███▍      | 43099/125000 [30:31<56:03, 24.35it/s]

finished frames 3448000, mean/median reward 21.9/15.0, min/max reward 0.0/95.0


 35%|███▍      | 43198/125000 [30:35<57:58, 23.52it/s]

finished frames 3456000, mean/median reward 25.4/7.0, min/max reward 0.0/161.0


 35%|███▍      | 43300/125000 [30:40<56:22, 24.15it/s]

finished frames 3464000, mean/median reward 45.2/18.0, min/max reward 4.0/319.0


 35%|███▍      | 43399/125000 [30:44<56:06, 24.24it/s]

finished frames 3472000, mean/median reward 56.4/15.0, min/max reward 4.0/341.0


 35%|███▍      | 43498/125000 [30:48<57:34, 23.59it/s]

finished frames 3480000, mean/median reward 41.2/13.0, min/max reward 0.0/281.0


 35%|███▍      | 43600/125000 [30:52<56:53, 23.85it/s]

finished frames 3488000, mean/median reward 49.2/10.0, min/max reward 1.0/281.0


 35%|███▍      | 43699/125000 [30:56<55:03, 24.61it/s]

finished frames 3496000, mean/median reward 43.2/17.0, min/max reward 0.0/336.0


 35%|███▌      | 43798/125000 [31:00<57:12, 23.66it/s]

finished frames 3504000, mean/median reward 37.2/17.0, min/max reward 1.0/206.0


 35%|███▌      | 43900/125000 [31:05<56:00, 24.14it/s]

finished frames 3512000, mean/median reward 49.2/12.0, min/max reward 1.0/285.0


 35%|███▌      | 43999/125000 [31:09<56:28, 23.91it/s]

finished frames 3520000, mean/median reward 35.1/12.0, min/max reward 5.0/159.0


 35%|███▌      | 44098/125000 [31:13<56:07, 24.02it/s]

finished frames 3528000, mean/median reward 15.4/7.0, min/max reward 0.0/91.0


 35%|███▌      | 44200/125000 [31:17<56:13, 23.95it/s]

finished frames 3536000, mean/median reward 22.0/10.0, min/max reward 1.0/64.0


 35%|███▌      | 44299/125000 [31:21<56:25, 23.84it/s]

finished frames 3544000, mean/median reward 44.2/19.0, min/max reward 0.0/270.0


 36%|███▌      | 44398/125000 [31:26<56:27, 23.80it/s]

finished frames 3552000, mean/median reward 33.3/15.0, min/max reward 3.0/211.0


 36%|███▌      | 44500/125000 [31:30<57:50, 23.20it/s]

finished frames 3560000, mean/median reward 15.1/9.0, min/max reward 3.0/38.0


 36%|███▌      | 44599/125000 [31:34<55:36, 24.10it/s]

finished frames 3568000, mean/median reward 30.7/13.0, min/max reward 4.0/238.0


 36%|███▌      | 44698/125000 [31:38<56:01, 23.89it/s]

finished frames 3576000, mean/median reward 24.6/8.0, min/max reward 0.0/218.0


 36%|███▌      | 44800/125000 [31:43<54:43, 24.42it/s]  

finished frames 3584000, mean/median reward 15.4/6.0, min/max reward 0.0/88.0


 36%|███▌      | 44899/125000 [31:47<55:12, 24.18it/s]

finished frames 3592000, mean/median reward 38.2/29.0, min/max reward 4.0/183.0


 36%|███▌      | 44998/125000 [31:51<55:12, 24.15it/s]

finished frames 3600000, mean/median reward 27.4/14.0, min/max reward 5.0/131.0


 36%|███▌      | 45100/125000 [31:55<58:26, 22.79it/s]

finished frames 3608000, mean/median reward 29.5/16.0, min/max reward 2.0/147.0


 36%|███▌      | 45199/125000 [31:59<55:15, 24.07it/s]

finished frames 3616000, mean/median reward 41.1/23.0, min/max reward 1.0/147.0


 36%|███▌      | 45298/125000 [32:03<55:05, 24.11it/s]

finished frames 3624000, mean/median reward 33.2/16.0, min/max reward 3.0/191.0


 36%|███▋      | 45400/125000 [32:08<55:11, 24.04it/s]

finished frames 3632000, mean/median reward 22.1/10.0, min/max reward 0.0/105.0


 36%|███▋      | 45499/125000 [32:12<55:42, 23.78it/s]

finished frames 3640000, mean/median reward 35.3/10.0, min/max reward 1.0/351.0


 36%|███▋      | 45598/125000 [32:16<55:33, 23.82it/s]

finished frames 3648000, mean/median reward 28.3/12.0, min/max reward 6.0/138.0


 37%|███▋      | 45700/125000 [32:20<54:58, 24.04it/s]

finished frames 3656000, mean/median reward 19.4/12.0, min/max reward 6.0/47.0


 37%|███▋      | 45799/125000 [32:24<54:04, 24.41it/s]

finished frames 3664000, mean/median reward 37.7/9.0, min/max reward 2.0/217.0


 37%|███▋      | 45898/125000 [32:28<54:24, 24.23it/s]

finished frames 3672000, mean/median reward 37.1/13.0, min/max reward 4.0/230.0


 37%|███▋      | 46000/125000 [32:33<54:00, 24.38it/s]

finished frames 3680000, mean/median reward 46.4/20.0, min/max reward 0.0/230.0


 37%|███▋      | 46099/125000 [32:37<56:00, 23.48it/s]

finished frames 3688000, mean/median reward 33.4/11.0, min/max reward 0.0/145.0


 37%|███▋      | 46198/125000 [32:41<54:48, 23.96it/s]

finished frames 3696000, mean/median reward 34.1/19.0, min/max reward 0.0/193.0


 37%|███▋      | 46300/125000 [32:45<54:35, 24.02it/s]

finished frames 3704000, mean/median reward 45.8/22.0, min/max reward 1.0/263.0


 37%|███▋      | 46399/125000 [32:49<54:28, 24.05it/s]

finished frames 3712000, mean/median reward 23.7/12.0, min/max reward 0.0/79.0


 37%|███▋      | 46498/125000 [32:54<54:06, 24.18it/s]

finished frames 3720000, mean/median reward 23.4/6.0, min/max reward 2.0/201.0


 37%|███▋      | 46600/125000 [32:58<54:50, 23.83it/s]

finished frames 3728000, mean/median reward 19.6/8.0, min/max reward 0.0/73.0


 37%|███▋      | 46699/125000 [33:02<55:39, 23.45it/s]

finished frames 3736000, mean/median reward 24.0/13.0, min/max reward 0.0/94.0


 37%|███▋      | 46798/125000 [33:06<54:50, 23.77it/s]

finished frames 3744000, mean/median reward 39.8/9.0, min/max reward 0.0/264.0


 38%|███▊      | 46900/125000 [33:10<57:37, 22.59it/s]

finished frames 3752000, mean/median reward 32.8/22.0, min/max reward 1.0/101.0


 38%|███▊      | 46999/125000 [33:15<57:20, 22.67it/s]

finished frames 3760000, mean/median reward 32.8/19.0, min/max reward 0.0/137.0


 38%|███▊      | 47098/125000 [33:19<57:29, 22.58it/s]

finished frames 3768000, mean/median reward 18.9/11.0, min/max reward 0.0/77.0


 38%|███▊      | 47200/125000 [33:24<56:13, 23.06it/s]

finished frames 3776000, mean/median reward 22.2/19.0, min/max reward 0.0/60.0


 38%|███▊      | 47299/125000 [33:28<57:54, 22.36it/s]

finished frames 3784000, mean/median reward 15.2/11.0, min/max reward 0.0/44.0


 38%|███▊      | 47398/125000 [33:32<56:08, 23.04it/s]

finished frames 3792000, mean/median reward 23.9/19.0, min/max reward 5.0/77.0


 38%|███▊      | 47500/125000 [33:37<55:38, 23.21it/s]

finished frames 3800000, mean/median reward 29.2/14.0, min/max reward 2.0/120.0


 38%|███▊      | 47599/125000 [33:41<55:37, 23.19it/s]

finished frames 3808000, mean/median reward 12.0/8.0, min/max reward 2.0/39.0


 38%|███▊      | 47698/125000 [33:45<56:54, 22.64it/s]

finished frames 3816000, mean/median reward 52.6/23.0, min/max reward 4.0/199.0


 38%|███▊      | 47800/125000 [33:50<56:15, 22.87it/s]

finished frames 3824000, mean/median reward 90.3/31.0, min/max reward 4.0/342.0


 38%|███▊      | 47899/125000 [33:54<57:47, 22.24it/s]

finished frames 3832000, mean/median reward 40.0/22.0, min/max reward 1.0/140.0


 38%|███▊      | 47998/125000 [33:58<53:03, 24.19it/s]

finished frames 3840000, mean/median reward 24.8/4.0, min/max reward 0.0/129.0


 38%|███▊      | 48100/125000 [34:03<54:30, 23.51it/s]

finished frames 3848000, mean/median reward 11.9/5.0, min/max reward 0.0/45.0


 39%|███▊      | 48199/125000 [34:07<52:59, 24.16it/s]

finished frames 3856000, mean/median reward 27.9/15.0, min/max reward 0.0/207.0


 39%|███▊      | 48298/125000 [34:11<54:03, 23.65it/s]

finished frames 3864000, mean/median reward 31.9/8.0, min/max reward 0.0/279.0


 39%|███▊      | 48400/125000 [34:15<53:23, 23.91it/s]

finished frames 3872000, mean/median reward 29.1/10.0, min/max reward 0.0/153.0


 39%|███▉      | 48499/125000 [34:19<55:32, 22.96it/s]

finished frames 3880000, mean/median reward 19.9/5.0, min/max reward 0.0/138.0


 39%|███▉      | 48598/125000 [34:24<52:40, 24.17it/s]

finished frames 3888000, mean/median reward 24.6/3.0, min/max reward 0.0/138.0


 39%|███▉      | 48700/125000 [34:28<53:20, 23.84it/s]

finished frames 3896000, mean/median reward 20.9/12.0, min/max reward 0.0/95.0


 39%|███▉      | 48799/125000 [34:32<52:44, 24.08it/s]  

finished frames 3904000, mean/median reward 36.1/18.0, min/max reward 1.0/112.0


 39%|███▉      | 48898/125000 [34:36<52:59, 23.94it/s]

finished frames 3912000, mean/median reward 44.1/23.0, min/max reward 1.0/151.0


 39%|███▉      | 49000/125000 [34:41<52:23, 24.17it/s]

finished frames 3920000, mean/median reward 42.2/23.0, min/max reward 0.0/305.0


 39%|███▉      | 49099/125000 [34:45<51:56, 24.35it/s]

finished frames 3928000, mean/median reward 29.3/10.0, min/max reward 0.0/158.0


 39%|███▉      | 49198/125000 [34:49<52:14, 24.19it/s]

finished frames 3936000, mean/median reward 32.5/14.0, min/max reward 1.0/103.0


 39%|███▉      | 49300/125000 [34:53<52:14, 24.15it/s]

finished frames 3944000, mean/median reward 22.7/8.0, min/max reward 0.0/103.0


 40%|███▉      | 49399/125000 [34:57<52:46, 23.88it/s]

finished frames 3952000, mean/median reward 19.9/13.0, min/max reward 0.0/103.0


 40%|███▉      | 49498/125000 [35:01<52:24, 24.01it/s]

finished frames 3960000, mean/median reward 60.9/23.0, min/max reward 0.0/217.0


 40%|███▉      | 49600/125000 [35:06<52:09, 24.09it/s]

finished frames 3968000, mean/median reward 27.9/7.0, min/max reward 0.0/191.0


 40%|███▉      | 49699/125000 [35:10<52:19, 23.98it/s]

finished frames 3976000, mean/median reward 36.4/29.0, min/max reward 0.0/91.0


 40%|███▉      | 49798/125000 [35:14<53:03, 23.62it/s]

finished frames 3984000, mean/median reward 57.7/25.0, min/max reward 0.0/288.0


 40%|███▉      | 49900/125000 [35:18<51:13, 24.43it/s]

finished frames 3992000, mean/median reward 72.2/50.0, min/max reward 2.0/299.0


 40%|███▉      | 49999/125000 [35:22<51:54, 24.08it/s]

finished frames 4000000, mean/median reward 26.4/7.0, min/max reward 1.0/97.0


 40%|████      | 50098/125000 [35:26<51:47, 24.10it/s]

finished frames 4008000, mean/median reward 39.5/11.0, min/max reward 0.0/261.0


 40%|████      | 50200/125000 [35:31<51:37, 24.15it/s]

finished frames 4016000, mean/median reward 27.4/11.0, min/max reward 0.0/148.0


 40%|████      | 50299/125000 [35:35<55:05, 22.60it/s]  

finished frames 4024000, mean/median reward 41.4/14.0, min/max reward 0.0/148.0


 40%|████      | 50398/125000 [35:39<53:49, 23.10it/s]

finished frames 4032000, mean/median reward 41.2/32.0, min/max reward 6.0/105.0


 40%|████      | 50500/125000 [35:44<51:24, 24.15it/s]

finished frames 4040000, mean/median reward 53.7/35.0, min/max reward 5.0/202.0


 40%|████      | 50599/125000 [35:48<49:44, 24.93it/s]

finished frames 4048000, mean/median reward 71.6/54.0, min/max reward 0.0/269.0


 41%|████      | 50698/125000 [35:52<49:44, 24.90it/s]

finished frames 4056000, mean/median reward 55.8/16.0, min/max reward 3.0/288.0


 41%|████      | 50800/125000 [35:56<50:14, 24.62it/s]

finished frames 4064000, mean/median reward 49.1/10.0, min/max reward 3.0/273.0


 41%|████      | 50899/125000 [36:00<51:42, 23.89it/s]

finished frames 4072000, mean/median reward 47.1/12.0, min/max reward 0.0/305.0


 41%|████      | 50998/125000 [36:04<51:09, 24.11it/s]

finished frames 4080000, mean/median reward 36.3/12.0, min/max reward 0.0/253.0


 41%|████      | 51100/125000 [36:08<52:15, 23.57it/s]

finished frames 4088000, mean/median reward 25.8/15.0, min/max reward 4.0/77.0


 41%|████      | 51199/125000 [36:12<49:49, 24.69it/s]

finished frames 4096000, mean/median reward 18.3/13.0, min/max reward 0.0/70.0


 41%|████      | 51298/125000 [36:17<51:30, 23.85it/s]

finished frames 4104000, mean/median reward 15.1/4.0, min/max reward 0.0/54.0


 41%|████      | 51400/125000 [36:21<50:42, 24.19it/s]

finished frames 4112000, mean/median reward 27.6/8.0, min/max reward 0.0/112.0


 41%|████      | 51499/125000 [36:25<49:57, 24.52it/s]

finished frames 4120000, mean/median reward 35.9/17.0, min/max reward 0.0/119.0


 41%|████▏     | 51598/125000 [36:29<50:25, 24.26it/s]

finished frames 4128000, mean/median reward 40.2/28.0, min/max reward 4.0/119.0


 41%|████▏     | 51700/125000 [36:33<52:08, 23.43it/s]

finished frames 4136000, mean/median reward 56.1/24.0, min/max reward 1.0/284.0


 41%|████▏     | 51799/125000 [36:38<51:03, 23.89it/s]

finished frames 4144000, mean/median reward 53.4/23.0, min/max reward 1.0/276.0


 42%|████▏     | 51898/125000 [36:42<50:16, 24.23it/s]

finished frames 4152000, mean/median reward 41.6/17.0, min/max reward 0.0/276.0


 42%|████▏     | 52000/125000 [36:46<49:56, 24.36it/s]

finished frames 4160000, mean/median reward 28.4/11.0, min/max reward 0.0/129.0


 42%|████▏     | 52099/125000 [36:50<52:03, 23.34it/s]

finished frames 4168000, mean/median reward 41.4/20.0, min/max reward 0.0/169.0


 42%|████▏     | 52198/125000 [36:54<50:43, 23.92it/s]

finished frames 4176000, mean/median reward 47.0/21.0, min/max reward 0.0/165.0


 42%|████▏     | 52300/125000 [36:58<50:15, 24.11it/s]

finished frames 4184000, mean/median reward 27.3/14.0, min/max reward 0.0/105.0


 42%|████▏     | 52399/125000 [37:03<49:39, 24.37it/s]

finished frames 4192000, mean/median reward 26.8/11.0, min/max reward 0.0/187.0


 42%|████▏     | 52498/125000 [37:07<50:21, 24.00it/s]

finished frames 4200000, mean/median reward 24.0/14.0, min/max reward 2.0/98.0


 42%|████▏     | 52600/125000 [37:11<50:05, 24.09it/s]

finished frames 4208000, mean/median reward 29.2/22.0, min/max reward 0.0/93.0


 42%|████▏     | 52699/125000 [37:15<52:08, 23.11it/s]

finished frames 4216000, mean/median reward 44.6/15.0, min/max reward 0.0/286.0


 42%|████▏     | 52798/125000 [37:19<50:38, 23.76it/s]

finished frames 4224000, mean/median reward 32.5/13.0, min/max reward 0.0/179.0


 42%|████▏     | 52900/125000 [37:23<50:16, 23.90it/s]

finished frames 4232000, mean/median reward 55.9/22.0, min/max reward 0.0/290.0


 42%|████▏     | 52999/125000 [37:27<50:27, 23.78it/s]

finished frames 4240000, mean/median reward 46.3/34.0, min/max reward 3.0/194.0


 42%|████▏     | 53098/125000 [37:32<50:11, 23.88it/s]

finished frames 4248000, mean/median reward 72.9/21.0, min/max reward 2.0/333.0


 43%|████▎     | 53200/125000 [37:36<49:58, 23.95it/s]

finished frames 4256000, mean/median reward 67.4/21.0, min/max reward 2.0/304.0


 43%|████▎     | 53299/125000 [37:40<50:10, 23.81it/s]

finished frames 4264000, mean/median reward 20.9/14.0, min/max reward 0.0/70.0


 43%|████▎     | 53398/125000 [37:44<50:10, 23.79it/s]

finished frames 4272000, mean/median reward 78.1/28.0, min/max reward 0.0/221.0


 43%|████▎     | 53500/125000 [37:49<49:20, 24.15it/s]

finished frames 4280000, mean/median reward 67.2/36.0, min/max reward 0.0/221.0


 43%|████▎     | 53599/125000 [37:53<50:10, 23.72it/s]

finished frames 4288000, mean/median reward 25.6/17.0, min/max reward 0.0/79.0


 43%|████▎     | 53698/125000 [37:57<50:13, 23.66it/s]

finished frames 4296000, mean/median reward 23.7/8.0, min/max reward 0.0/113.0


 43%|████▎     | 53800/125000 [38:01<49:01, 24.21it/s]

finished frames 4304000, mean/median reward 41.0/8.0, min/max reward 1.0/177.0


 43%|████▎     | 53899/125000 [38:05<48:35, 24.38it/s]

finished frames 4312000, mean/median reward 35.8/14.0, min/max reward 0.0/164.0


 43%|████▎     | 53998/125000 [38:09<49:42, 23.81it/s]

finished frames 4320000, mean/median reward 49.0/21.0, min/max reward 1.0/265.0


 43%|████▎     | 54100/125000 [38:14<48:40, 24.28it/s]

finished frames 4328000, mean/median reward 53.2/9.0, min/max reward 1.0/359.0


 43%|████▎     | 54199/125000 [38:18<49:09, 24.00it/s]

finished frames 4336000, mean/median reward 11.1/4.0, min/max reward 0.0/55.0


 43%|████▎     | 54298/125000 [38:22<49:03, 24.02it/s]

finished frames 4344000, mean/median reward 23.4/18.0, min/max reward 0.0/70.0


 44%|████▎     | 54400/125000 [38:26<48:04, 24.47it/s]

finished frames 4352000, mean/median reward 43.1/16.0, min/max reward 0.0/358.0


 44%|████▎     | 54499/125000 [38:30<47:46, 24.60it/s]

finished frames 4360000, mean/median reward 47.5/14.0, min/max reward 0.0/358.0


 44%|████▎     | 54598/125000 [38:34<48:22, 24.25it/s]

finished frames 4368000, mean/median reward 52.7/33.0, min/max reward 2.0/233.0


 44%|████▍     | 54700/125000 [38:39<48:48, 24.00it/s]

finished frames 4376000, mean/median reward 44.8/9.0, min/max reward 0.0/252.0


 44%|████▍     | 54799/125000 [38:43<49:16, 23.74it/s]

finished frames 4384000, mean/median reward 31.8/9.0, min/max reward 0.0/242.0


 44%|████▍     | 54898/125000 [38:47<50:31, 23.12it/s]

finished frames 4392000, mean/median reward 43.7/12.0, min/max reward 0.0/247.0


 44%|████▍     | 55000/125000 [38:51<50:58, 22.88it/s]

finished frames 4400000, mean/median reward 17.5/4.0, min/max reward 0.0/115.0


 44%|████▍     | 55099/125000 [38:55<49:19, 23.62it/s]

finished frames 4408000, mean/median reward 18.1/9.0, min/max reward 1.0/115.0


 44%|████▍     | 55198/125000 [39:00<48:44, 23.87it/s]

finished frames 4416000, mean/median reward 34.0/18.0, min/max reward 3.0/123.0


 44%|████▍     | 55300/125000 [39:04<48:15, 24.08it/s]

finished frames 4424000, mean/median reward 25.1/12.0, min/max reward 1.0/72.0


 44%|████▍     | 55399/125000 [39:08<48:36, 23.86it/s]

finished frames 4432000, mean/median reward 37.5/20.0, min/max reward 0.0/178.0


 44%|████▍     | 55498/125000 [39:12<48:51, 23.71it/s]

finished frames 4440000, mean/median reward 31.5/7.0, min/max reward 0.0/214.0


 44%|████▍     | 55600/125000 [39:16<46:50, 24.69it/s]

finished frames 4448000, mean/median reward 37.4/7.0, min/max reward 1.0/172.0


 45%|████▍     | 55699/125000 [39:20<46:42, 24.73it/s]

finished frames 4456000, mean/median reward 49.3/20.0, min/max reward 4.0/200.0


 45%|████▍     | 55798/125000 [39:25<49:03, 23.51it/s]

finished frames 4464000, mean/median reward 46.5/24.0, min/max reward 0.0/132.0


 45%|████▍     | 55900/125000 [39:29<47:24, 24.29it/s]

finished frames 4472000, mean/median reward 22.1/4.0, min/max reward 0.0/129.0


 45%|████▍     | 55999/125000 [39:33<47:52, 24.02it/s]

finished frames 4480000, mean/median reward 34.9/12.0, min/max reward 2.0/253.0


 45%|████▍     | 56098/125000 [39:37<47:27, 24.20it/s]

finished frames 4488000, mean/median reward 47.4/11.0, min/max reward 1.0/345.0


 45%|████▍     | 56200/125000 [39:41<46:47, 24.51it/s]

finished frames 4496000, mean/median reward 38.6/13.0, min/max reward 3.0/163.0


 45%|████▌     | 56299/125000 [39:45<46:54, 24.41it/s]

finished frames 4504000, mean/median reward 29.9/10.0, min/max reward 1.0/154.0


 45%|████▌     | 56398/125000 [39:50<47:47, 23.92it/s]

finished frames 4512000, mean/median reward 28.0/10.0, min/max reward 0.0/154.0


 45%|████▌     | 56500/125000 [39:54<47:38, 23.96it/s]

finished frames 4520000, mean/median reward 44.1/27.0, min/max reward 3.0/154.0


 45%|████▌     | 56599/125000 [39:58<47:46, 23.86it/s]

finished frames 4528000, mean/median reward 57.2/27.0, min/max reward 3.0/361.0


 45%|████▌     | 56698/125000 [40:02<47:17, 24.07it/s]

finished frames 4536000, mean/median reward 40.2/13.0, min/max reward 0.0/296.0


 45%|████▌     | 56800/125000 [40:06<47:04, 24.15it/s]

finished frames 4544000, mean/median reward 26.6/20.0, min/max reward 0.0/123.0


 46%|████▌     | 56899/125000 [40:10<46:00, 24.67it/s]

finished frames 4552000, mean/median reward 21.9/10.0, min/max reward 2.0/167.0


 46%|████▌     | 56998/125000 [40:14<46:20, 24.46it/s]

finished frames 4560000, mean/median reward 33.0/10.0, min/max reward 0.0/167.0


 46%|████▌     | 57100/125000 [40:19<46:05, 24.55it/s]

finished frames 4568000, mean/median reward 23.8/7.0, min/max reward 0.0/147.0


 46%|████▌     | 57199/125000 [40:23<46:22, 24.36it/s]

finished frames 4576000, mean/median reward 22.1/19.0, min/max reward 4.0/68.0


 46%|████▌     | 57298/125000 [40:27<48:31, 23.25it/s]

finished frames 4584000, mean/median reward 27.1/12.0, min/max reward 0.0/148.0


 46%|████▌     | 57400/125000 [40:31<47:19, 23.81it/s]

finished frames 4592000, mean/median reward 48.9/14.0, min/max reward 0.0/226.0


 46%|████▌     | 57499/125000 [40:35<46:31, 24.18it/s]

finished frames 4600000, mean/median reward 53.2/23.0, min/max reward 0.0/226.0


 46%|████▌     | 57598/125000 [40:39<46:35, 24.11it/s]

finished frames 4608000, mean/median reward 42.7/12.0, min/max reward 0.0/257.0


 46%|████▌     | 57700/125000 [40:44<46:50, 23.94it/s]

finished frames 4616000, mean/median reward 48.1/12.0, min/max reward 2.0/257.0


 46%|████▌     | 57799/125000 [40:48<46:14, 24.22it/s]

finished frames 4624000, mean/median reward 99.7/48.0, min/max reward 1.0/332.0


 46%|████▋     | 57898/125000 [40:52<46:16, 24.17it/s]

finished frames 4632000, mean/median reward 27.1/6.0, min/max reward 2.0/177.0


 46%|████▋     | 58000/125000 [40:56<46:39, 23.93it/s]

finished frames 4640000, mean/median reward 20.2/9.0, min/max reward 2.0/80.0


 46%|████▋     | 58099/125000 [41:00<47:34, 23.44it/s]

finished frames 4648000, mean/median reward 23.1/7.0, min/max reward 0.0/122.0


 47%|████▋     | 58198/125000 [41:04<45:29, 24.47it/s]

finished frames 4656000, mean/median reward 38.6/9.0, min/max reward 0.0/145.0


 47%|████▋     | 58300/125000 [41:09<45:50, 24.25it/s]

finished frames 4664000, mean/median reward 36.2/19.0, min/max reward 2.0/240.0


 47%|████▋     | 58399/125000 [41:13<45:48, 24.24it/s]

finished frames 4672000, mean/median reward 35.6/19.0, min/max reward 0.0/259.0


 47%|████▋     | 58498/125000 [41:17<45:22, 24.43it/s]

finished frames 4680000, mean/median reward 80.1/27.0, min/max reward 0.0/335.0


 47%|████▋     | 58600/125000 [41:21<46:39, 23.72it/s]

finished frames 4688000, mean/median reward 36.9/20.0, min/max reward 0.0/138.0


 47%|████▋     | 58699/125000 [41:25<45:38, 24.21it/s]

finished frames 4696000, mean/median reward 45.9/13.0, min/max reward 0.0/249.0


 47%|████▋     | 58798/125000 [41:29<44:34, 24.75it/s]

finished frames 4704000, mean/median reward 54.2/17.0, min/max reward 0.0/208.0


 47%|████▋     | 58900/125000 [41:34<49:54, 22.07it/s]

finished frames 4712000, mean/median reward 36.9/19.0, min/max reward 0.0/142.0


 47%|████▋     | 58999/125000 [41:38<45:17, 24.28it/s]

finished frames 4720000, mean/median reward 31.9/18.0, min/max reward 0.0/142.0


 47%|████▋     | 59098/125000 [41:42<44:36, 24.62it/s]

finished frames 4728000, mean/median reward 30.9/19.0, min/max reward 0.0/115.0


 47%|████▋     | 59200/125000 [41:46<44:33, 24.61it/s]

finished frames 4736000, mean/median reward 38.9/18.0, min/max reward 0.0/105.0


 47%|████▋     | 59299/125000 [41:50<43:39, 25.08it/s]

finished frames 4744000, mean/median reward 80.1/34.0, min/max reward 6.0/307.0


 48%|████▊     | 59398/125000 [41:54<45:10, 24.20it/s]

finished frames 4752000, mean/median reward 53.5/26.0, min/max reward 0.0/246.0


 48%|████▊     | 59500/125000 [41:58<45:06, 24.20it/s]

finished frames 4760000, mean/median reward 30.8/10.0, min/max reward 1.0/156.0


 48%|████▊     | 59599/125000 [42:02<44:30, 24.49it/s]

finished frames 4768000, mean/median reward 53.5/19.0, min/max reward 0.0/322.0


 48%|████▊     | 59698/125000 [42:07<45:43, 23.80it/s]

finished frames 4776000, mean/median reward 35.1/6.0, min/max reward 0.0/322.0


 48%|████▊     | 59800/125000 [42:11<45:55, 23.66it/s]

finished frames 4784000, mean/median reward 23.8/7.0, min/max reward 0.0/199.0


 48%|████▊     | 59899/125000 [42:15<43:25, 24.99it/s]

finished frames 4792000, mean/median reward 32.2/15.0, min/max reward 0.0/131.0


 48%|████▊     | 59998/125000 [42:19<46:46, 23.16it/s]

finished frames 4800000, mean/median reward 30.0/3.0, min/max reward 0.0/277.0


 48%|████▊     | 60100/125000 [42:23<46:01, 23.50it/s]

finished frames 4808000, mean/median reward 5.3/4.0, min/max reward 0.0/16.0


 48%|████▊     | 60199/125000 [42:27<44:48, 24.10it/s]

finished frames 4816000, mean/median reward 23.5/10.0, min/max reward 1.0/81.0


 48%|████▊     | 60298/125000 [42:31<44:44, 24.10it/s]

finished frames 4824000, mean/median reward 40.6/29.0, min/max reward 0.0/133.0


 48%|████▊     | 60400/125000 [42:36<45:37, 23.60it/s]

finished frames 4832000, mean/median reward 74.4/29.0, min/max reward 0.0/294.0


 48%|████▊     | 60499/125000 [42:40<44:38, 24.08it/s]

finished frames 4840000, mean/median reward 32.0/16.0, min/max reward 0.0/105.0


 48%|████▊     | 60598/125000 [42:44<44:44, 23.99it/s]

finished frames 4848000, mean/median reward 31.6/12.0, min/max reward 0.0/247.0


 49%|████▊     | 60700/125000 [42:48<45:32, 23.53it/s]

finished frames 4856000, mean/median reward 31.9/11.0, min/max reward 0.0/193.0


 49%|████▊     | 60799/125000 [42:52<43:16, 24.73it/s]

finished frames 4864000, mean/median reward 62.1/22.0, min/max reward 0.0/288.0


 49%|████▊     | 60898/125000 [42:56<44:15, 24.14it/s]

finished frames 4872000, mean/median reward 77.6/27.0, min/max reward 6.0/312.0


 49%|████▉     | 61000/125000 [43:01<45:11, 23.61it/s]

finished frames 4880000, mean/median reward 60.9/24.0, min/max reward 3.0/312.0


 49%|████▉     | 61099/125000 [43:05<45:57, 23.18it/s]

finished frames 4888000, mean/median reward 48.1/10.0, min/max reward 0.0/306.0


 49%|████▉     | 61198/125000 [43:09<44:35, 23.85it/s]

finished frames 4896000, mean/median reward 53.6/18.0, min/max reward 2.0/243.0


 49%|████▉     | 61300/125000 [43:14<45:36, 23.28it/s]

finished frames 4904000, mean/median reward 41.9/11.0, min/max reward 2.0/284.0


 49%|████▉     | 61399/125000 [43:18<46:40, 22.71it/s]

finished frames 4912000, mean/median reward 79.8/13.0, min/max reward 3.0/295.0


 49%|████▉     | 61498/125000 [43:22<44:09, 23.96it/s]

finished frames 4920000, mean/median reward 23.1/15.0, min/max reward 1.0/88.0


 49%|████▉     | 61600/125000 [43:26<45:12, 23.37it/s]

finished frames 4928000, mean/median reward 22.1/13.0, min/max reward 1.0/85.0


 49%|████▉     | 61699/125000 [43:31<45:13, 23.33it/s]

finished frames 4936000, mean/median reward 13.9/7.0, min/max reward 2.0/65.0


 49%|████▉     | 61798/125000 [43:35<45:28, 23.17it/s]

finished frames 4944000, mean/median reward 34.8/7.0, min/max reward 1.0/242.0


 50%|████▉     | 61900/125000 [43:39<45:55, 22.90it/s]

finished frames 4952000, mean/median reward 38.6/16.0, min/max reward 2.0/165.0


 50%|████▉     | 61999/125000 [43:43<44:11, 23.76it/s]

finished frames 4960000, mean/median reward 51.7/16.0, min/max reward 2.0/252.0


 50%|████▉     | 62098/125000 [43:48<44:14, 23.70it/s]

finished frames 4968000, mean/median reward 13.7/12.0, min/max reward 1.0/35.0


 50%|████▉     | 62200/125000 [43:52<44:32, 23.49it/s]

finished frames 4976000, mean/median reward 38.2/13.0, min/max reward 0.0/250.0


 50%|████▉     | 62299/125000 [43:56<43:12, 24.18it/s]

finished frames 4984000, mean/median reward 36.8/11.0, min/max reward 2.0/174.0


 50%|████▉     | 62398/125000 [44:00<42:58, 24.28it/s]

finished frames 4992000, mean/median reward 54.8/14.0, min/max reward 3.0/327.0


 50%|█████     | 62500/125000 [44:05<42:51, 24.30it/s]

finished frames 5000000, mean/median reward 26.4/11.0, min/max reward 1.0/108.0


 50%|█████     | 62599/125000 [44:09<43:09, 24.10it/s]

finished frames 5008000, mean/median reward 64.6/18.0, min/max reward 2.0/368.0


 50%|█████     | 62698/125000 [44:13<43:43, 23.75it/s]

finished frames 5016000, mean/median reward 31.7/19.0, min/max reward 0.0/112.0


 50%|█████     | 62800/125000 [44:17<42:57, 24.14it/s]

finished frames 5024000, mean/median reward 34.7/10.0, min/max reward 3.0/186.0


 50%|█████     | 62899/125000 [44:21<42:49, 24.17it/s]

finished frames 5032000, mean/median reward 28.6/13.0, min/max reward 2.0/121.0


 50%|█████     | 62998/125000 [44:25<44:11, 23.39it/s]

finished frames 5040000, mean/median reward 43.1/13.0, min/max reward 2.0/260.0


 50%|█████     | 63100/125000 [44:30<42:45, 24.13it/s]

finished frames 5048000, mean/median reward 25.1/5.0, min/max reward 0.0/174.0


 51%|█████     | 63199/125000 [44:34<44:34, 23.11it/s]

finished frames 5056000, mean/median reward 34.8/10.0, min/max reward 0.0/191.0


 51%|█████     | 63298/125000 [44:38<42:44, 24.06it/s]

finished frames 5064000, mean/median reward 20.6/15.0, min/max reward 0.0/56.0


 51%|█████     | 63400/125000 [44:42<42:09, 24.36it/s]

finished frames 5072000, mean/median reward 31.5/5.0, min/max reward 0.0/287.0


 51%|█████     | 63499/125000 [44:46<42:12, 24.28it/s]

finished frames 5080000, mean/median reward 36.8/12.0, min/max reward 0.0/287.0


 51%|█████     | 63598/125000 [44:51<42:35, 24.03it/s]

finished frames 5088000, mean/median reward 37.9/14.0, min/max reward 0.0/287.0


 51%|█████     | 63700/125000 [44:55<42:33, 24.00it/s]

finished frames 5096000, mean/median reward 22.7/12.0, min/max reward 0.0/102.0


 51%|█████     | 63799/125000 [44:59<43:40, 23.35it/s]

finished frames 5104000, mean/median reward 39.4/9.0, min/max reward 1.0/237.0


 51%|█████     | 63898/125000 [45:03<42:31, 23.95it/s]

finished frames 5112000, mean/median reward 47.2/16.0, min/max reward 2.0/281.0


 51%|█████     | 64000/125000 [45:07<42:09, 24.11it/s]

finished frames 5120000, mean/median reward 60.6/24.0, min/max reward 2.0/207.0


 51%|█████▏    | 64099/125000 [45:12<42:34, 23.84it/s]

finished frames 5128000, mean/median reward 47.6/29.0, min/max reward 0.0/213.0


 51%|█████▏    | 64198/125000 [45:16<43:04, 23.53it/s]

finished frames 5136000, mean/median reward 28.1/8.0, min/max reward 0.0/119.0


 51%|█████▏    | 64300/125000 [45:20<42:16, 23.93it/s]

finished frames 5144000, mean/median reward 35.2/15.0, min/max reward 0.0/228.0


 52%|█████▏    | 64399/125000 [45:24<42:07, 23.98it/s]

finished frames 5152000, mean/median reward 22.1/6.0, min/max reward 0.0/150.0


 52%|█████▏    | 64498/125000 [45:28<41:51, 24.09it/s]

finished frames 5160000, mean/median reward 11.9/3.0, min/max reward 0.0/92.0


 52%|█████▏    | 64600/125000 [45:33<42:17, 23.81it/s]

finished frames 5168000, mean/median reward 50.2/14.0, min/max reward 1.0/317.0


 52%|█████▏    | 64699/125000 [45:37<42:36, 23.58it/s]

finished frames 5176000, mean/median reward 43.9/10.0, min/max reward 0.0/291.0


 52%|█████▏    | 64798/125000 [45:41<41:17, 24.30it/s]

finished frames 5184000, mean/median reward 46.6/10.0, min/max reward 1.0/291.0


 52%|█████▏    | 64900/125000 [45:45<42:07, 23.78it/s]

finished frames 5192000, mean/median reward 33.6/9.0, min/max reward 0.0/166.0


 52%|█████▏    | 64999/125000 [45:49<41:00, 24.39it/s]

finished frames 5200000, mean/median reward 32.8/3.0, min/max reward 0.0/237.0


 52%|█████▏    | 65098/125000 [45:54<42:01, 23.76it/s]

finished frames 5208000, mean/median reward 54.7/26.0, min/max reward 1.0/237.0


 52%|█████▏    | 65200/125000 [45:58<41:25, 24.06it/s]

finished frames 5216000, mean/median reward 49.1/26.0, min/max reward 3.0/178.0


 52%|█████▏    | 65299/125000 [46:02<42:19, 23.51it/s]

finished frames 5224000, mean/median reward 52.1/28.0, min/max reward 1.0/250.0


 52%|█████▏    | 65398/125000 [46:06<41:00, 24.23it/s]

finished frames 5232000, mean/median reward 32.9/15.0, min/max reward 1.0/144.0


 52%|█████▏    | 65500/125000 [46:10<40:48, 24.30it/s]

finished frames 5240000, mean/median reward 40.9/21.0, min/max reward 6.0/187.0


 52%|█████▏    | 65599/125000 [46:14<40:49, 24.25it/s]

finished frames 5248000, mean/median reward 40.9/9.0, min/max reward 0.0/305.0


 53%|█████▎    | 65698/125000 [46:19<41:39, 23.73it/s]

finished frames 5256000, mean/median reward 18.6/5.0, min/max reward 0.0/101.0


 53%|█████▎    | 65800/125000 [46:23<41:00, 24.06it/s]

finished frames 5264000, mean/median reward 15.1/11.0, min/max reward 0.0/48.0


 53%|█████▎    | 65899/125000 [46:27<41:24, 23.79it/s]

finished frames 5272000, mean/median reward 22.1/8.0, min/max reward 1.0/76.0


 53%|█████▎    | 65998/125000 [46:31<41:02, 23.96it/s]

finished frames 5280000, mean/median reward 31.1/15.0, min/max reward 1.0/182.0


 53%|█████▎    | 66100/125000 [46:35<41:15, 23.79it/s]

finished frames 5288000, mean/median reward 69.8/15.0, min/max reward 1.0/280.0


 53%|█████▎    | 66199/125000 [46:40<40:21, 24.29it/s]

finished frames 5296000, mean/median reward 74.7/20.0, min/max reward 4.0/371.0


 53%|█████▎    | 66298/125000 [46:44<40:55, 23.91it/s]

finished frames 5304000, mean/median reward 62.2/24.0, min/max reward 0.0/371.0


 53%|█████▎    | 66400/125000 [46:48<42:15, 23.11it/s]

finished frames 5312000, mean/median reward 64.1/23.0, min/max reward 2.0/371.0


 53%|█████▎    | 66499/125000 [46:52<40:42, 23.95it/s]

finished frames 5320000, mean/median reward 51.6/18.0, min/max reward 3.0/371.0


 53%|█████▎    | 66598/125000 [46:56<40:33, 24.00it/s]

finished frames 5328000, mean/median reward 28.2/20.0, min/max reward 7.0/78.0


 53%|█████▎    | 66700/125000 [47:01<40:27, 24.02it/s]

finished frames 5336000, mean/median reward 40.3/18.0, min/max reward 3.0/172.0


 53%|█████▎    | 66799/125000 [47:05<39:49, 24.35it/s]

finished frames 5344000, mean/median reward 22.3/11.0, min/max reward 0.0/129.0


 54%|█████▎    | 66898/125000 [47:09<40:27, 23.93it/s]

finished frames 5352000, mean/median reward 32.8/16.0, min/max reward 0.0/132.0


 54%|█████▎    | 67000/125000 [47:13<40:13, 24.04it/s]

finished frames 5360000, mean/median reward 71.1/14.0, min/max reward 0.0/303.0


 54%|█████▎    | 67099/125000 [47:17<39:59, 24.13it/s]

finished frames 5368000, mean/median reward 70.1/14.0, min/max reward 0.0/391.0


 54%|█████▍    | 67198/125000 [47:21<39:36, 24.32it/s]

finished frames 5376000, mean/median reward 18.8/6.0, min/max reward 0.0/72.0


 54%|█████▍    | 67300/125000 [47:26<39:59, 24.05it/s]

finished frames 5384000, mean/median reward 47.1/20.0, min/max reward 5.0/206.0


 54%|█████▍    | 67399/125000 [47:30<39:25, 24.35it/s]

finished frames 5392000, mean/median reward 33.1/16.0, min/max reward 0.0/156.0


 54%|█████▍    | 67498/125000 [47:34<40:00, 23.95it/s]

finished frames 5400000, mean/median reward 27.1/6.0, min/max reward 0.0/129.0


 54%|█████▍    | 67600/125000 [47:38<40:07, 23.84it/s]

finished frames 5408000, mean/median reward 43.5/8.0, min/max reward 0.0/182.0


 54%|█████▍    | 67699/125000 [47:42<39:19, 24.29it/s]

finished frames 5416000, mean/median reward 13.1/4.0, min/max reward 0.0/65.0


 54%|█████▍    | 67798/125000 [47:46<39:04, 24.40it/s]

finished frames 5424000, mean/median reward 54.8/7.0, min/max reward 0.0/335.0


 54%|█████▍    | 67900/125000 [47:51<40:16, 23.63it/s]

finished frames 5432000, mean/median reward 28.1/11.0, min/max reward 0.0/131.0


 54%|█████▍    | 67999/125000 [47:55<41:03, 23.14it/s]

finished frames 5440000, mean/median reward 44.1/13.0, min/max reward 3.0/300.0


 54%|█████▍    | 68098/125000 [47:59<39:35, 23.96it/s]

finished frames 5448000, mean/median reward 38.4/11.0, min/max reward 0.0/219.0


 55%|█████▍    | 68200/125000 [48:03<39:10, 24.17it/s]

finished frames 5456000, mean/median reward 14.5/7.0, min/max reward 0.0/44.0


 55%|█████▍    | 68299/125000 [48:07<39:41, 23.81it/s]

finished frames 5464000, mean/median reward 11.6/5.0, min/max reward 0.0/49.0


 55%|█████▍    | 68398/125000 [48:12<39:28, 23.90it/s]

finished frames 5472000, mean/median reward 35.6/9.0, min/max reward 0.0/297.0


 55%|█████▍    | 68500/125000 [48:16<39:28, 23.86it/s]

finished frames 5480000, mean/median reward 31.9/5.0, min/max reward 0.0/260.0


 55%|█████▍    | 68599/125000 [48:20<39:21, 23.88it/s]

finished frames 5488000, mean/median reward 79.4/13.0, min/max reward 0.0/266.0


 55%|█████▍    | 68698/125000 [48:24<39:40, 23.65it/s]

finished frames 5496000, mean/median reward 47.8/13.0, min/max reward 1.0/222.0


 55%|█████▌    | 68800/125000 [48:28<39:48, 23.53it/s]

finished frames 5504000, mean/median reward 22.1/18.0, min/max reward 1.0/75.0


 55%|█████▌    | 68899/125000 [48:33<39:19, 23.78it/s]

finished frames 5512000, mean/median reward 36.3/13.0, min/max reward 1.0/196.0


 55%|█████▌    | 68998/125000 [48:37<38:39, 24.15it/s]

finished frames 5520000, mean/median reward 60.5/33.0, min/max reward 6.0/211.0


 55%|█████▌    | 69100/125000 [48:41<38:43, 24.06it/s]

finished frames 5528000, mean/median reward 38.6/33.0, min/max reward 0.0/119.0


 55%|█████▌    | 69199/125000 [48:45<38:34, 24.11it/s]

finished frames 5536000, mean/median reward 23.8/7.0, min/max reward 0.0/126.0


 55%|█████▌    | 69298/125000 [48:49<38:50, 23.90it/s]

finished frames 5544000, mean/median reward 39.5/9.0, min/max reward 2.0/206.0


 56%|█████▌    | 69400/125000 [48:54<39:06, 23.70it/s]

finished frames 5552000, mean/median reward 56.5/17.0, min/max reward 1.0/220.0


 56%|█████▌    | 69499/125000 [48:58<39:16, 23.56it/s]

finished frames 5560000, mean/median reward 22.8/8.0, min/max reward 1.0/85.0


 56%|█████▌    | 69598/125000 [49:02<39:06, 23.61it/s]

finished frames 5568000, mean/median reward 38.9/17.0, min/max reward 0.0/208.0


 56%|█████▌    | 69700/125000 [49:06<37:50, 24.36it/s]

finished frames 5576000, mean/median reward 41.2/12.0, min/max reward 0.0/208.0


 56%|█████▌    | 69799/125000 [49:10<38:29, 23.90it/s]

finished frames 5584000, mean/median reward 56.9/17.0, min/max reward 0.0/309.0


 56%|█████▌    | 69898/125000 [49:14<37:45, 24.32it/s]

finished frames 5592000, mean/median reward 41.9/9.0, min/max reward 0.0/309.0


 56%|█████▌    | 70000/125000 [49:19<38:08, 24.03it/s]

finished frames 5600000, mean/median reward 22.6/7.0, min/max reward 0.0/156.0


 56%|█████▌    | 70099/125000 [49:23<37:40, 24.28it/s]

finished frames 5608000, mean/median reward 32.3/16.0, min/max reward 0.0/238.0


 56%|█████▌    | 70198/125000 [49:27<38:50, 23.52it/s]

finished frames 5616000, mean/median reward 28.9/5.0, min/max reward 0.0/261.0


 56%|█████▌    | 70300/125000 [49:31<39:09, 23.28it/s]

finished frames 5624000, mean/median reward 27.3/11.0, min/max reward 0.0/264.0


 56%|█████▋    | 70399/125000 [49:35<37:53, 24.02it/s]

finished frames 5632000, mean/median reward 30.6/10.0, min/max reward 0.0/230.0


 56%|█████▋    | 70498/125000 [49:40<37:55, 23.95it/s]

finished frames 5640000, mean/median reward 18.1/9.0, min/max reward 0.0/71.0


 56%|█████▋    | 70600/125000 [49:44<37:54, 23.91it/s]

finished frames 5648000, mean/median reward 50.7/22.0, min/max reward 0.0/246.0


 57%|█████▋    | 70699/125000 [49:48<37:29, 24.14it/s]

finished frames 5656000, mean/median reward 24.0/13.0, min/max reward 2.0/105.0


 57%|█████▋    | 70798/125000 [49:52<37:25, 24.14it/s]

finished frames 5664000, mean/median reward 40.8/13.0, min/max reward 1.0/358.0


 57%|█████▋    | 70900/125000 [49:56<37:05, 24.31it/s]

finished frames 5672000, mean/median reward 35.0/13.0, min/max reward 1.0/358.0


 57%|█████▋    | 70999/125000 [50:00<38:04, 23.63it/s]

finished frames 5680000, mean/median reward 78.6/15.0, min/max reward 1.0/358.0


 57%|█████▋    | 71098/125000 [50:05<36:56, 24.32it/s]

finished frames 5688000, mean/median reward 35.8/10.0, min/max reward 0.0/307.0


 57%|█████▋    | 71200/125000 [50:09<36:36, 24.50it/s]

finished frames 5696000, mean/median reward 62.4/14.0, min/max reward 1.0/307.0


 57%|█████▋    | 71299/125000 [50:13<38:23, 23.32it/s]

finished frames 5704000, mean/median reward 20.9/14.0, min/max reward 0.0/53.0


 57%|█████▋    | 71398/125000 [50:17<36:54, 24.20it/s]

finished frames 5712000, mean/median reward 52.1/11.0, min/max reward 0.0/336.0


 57%|█████▋    | 71500/125000 [50:21<37:18, 23.90it/s]

finished frames 5720000, mean/median reward 22.9/7.0, min/max reward 0.0/245.0


 57%|█████▋    | 71599/125000 [50:25<37:14, 23.90it/s]

finished frames 5728000, mean/median reward 14.3/7.0, min/max reward 0.0/59.0


 57%|█████▋    | 71698/125000 [50:30<36:39, 24.23it/s]

finished frames 5736000, mean/median reward 19.3/13.0, min/max reward 0.0/59.0


 57%|█████▋    | 71800/125000 [50:34<36:48, 24.08it/s]

finished frames 5744000, mean/median reward 38.6/7.0, min/max reward 0.0/323.0


 58%|█████▊    | 71899/125000 [50:38<36:46, 24.07it/s]

finished frames 5752000, mean/median reward 57.2/20.0, min/max reward 1.0/312.0


 58%|█████▊    | 71998/125000 [50:42<36:32, 24.18it/s]

finished frames 5760000, mean/median reward 35.4/22.0, min/max reward 0.0/196.0


 58%|█████▊    | 72100/125000 [50:46<36:56, 23.87it/s]

finished frames 5768000, mean/median reward 67.9/18.0, min/max reward 0.0/373.0


 58%|█████▊    | 72199/125000 [50:51<36:33, 24.07it/s]

finished frames 5776000, mean/median reward 42.6/15.0, min/max reward 0.0/312.0


 58%|█████▊    | 72298/125000 [50:55<37:00, 23.73it/s]

finished frames 5784000, mean/median reward 37.4/5.0, min/max reward 0.0/225.0


 58%|█████▊    | 72400/125000 [50:59<37:09, 23.59it/s]

finished frames 5792000, mean/median reward 14.2/4.0, min/max reward 0.0/74.0


 58%|█████▊    | 72499/125000 [51:03<36:39, 23.87it/s]

finished frames 5800000, mean/median reward 14.4/6.0, min/max reward 1.0/74.0


 58%|█████▊    | 72598/125000 [51:07<37:04, 23.56it/s]

finished frames 5808000, mean/median reward 13.9/4.0, min/max reward 0.0/110.0


 58%|█████▊    | 72700/125000 [51:12<36:36, 23.81it/s]

finished frames 5816000, mean/median reward 43.8/8.0, min/max reward 2.0/256.0


 58%|█████▊    | 72799/125000 [51:16<36:14, 24.01it/s]

finished frames 5824000, mean/median reward 23.6/7.0, min/max reward 1.0/186.0


 58%|█████▊    | 72898/125000 [51:20<36:44, 23.63it/s]

finished frames 5832000, mean/median reward 66.7/12.0, min/max reward 1.0/268.0


 58%|█████▊    | 73000/125000 [51:24<36:12, 23.93it/s]

finished frames 5840000, mean/median reward 11.1/6.0, min/max reward 0.0/59.0


 58%|█████▊    | 73099/125000 [51:28<35:45, 24.19it/s]

finished frames 5848000, mean/median reward 24.1/16.0, min/max reward 0.0/124.0


 59%|█████▊    | 73198/125000 [51:33<36:16, 23.81it/s]

finished frames 5856000, mean/median reward 81.0/42.0, min/max reward 3.0/288.0


 59%|█████▊    | 73300/125000 [51:37<35:17, 24.41it/s]

finished frames 5864000, mean/median reward 51.7/21.0, min/max reward 1.0/264.0


 59%|█████▊    | 73399/125000 [51:41<35:38, 24.13it/s]

finished frames 5872000, mean/median reward 34.7/7.0, min/max reward 0.0/252.0


 59%|█████▉    | 73498/125000 [51:45<35:52, 23.93it/s]

finished frames 5880000, mean/median reward 72.7/21.0, min/max reward 2.0/308.0


 59%|█████▉    | 73600/125000 [51:49<35:54, 23.86it/s]

finished frames 5888000, mean/median reward 14.8/4.0, min/max reward 0.0/60.0


 59%|█████▉    | 73699/125000 [51:53<35:19, 24.21it/s]

finished frames 5896000, mean/median reward 11.4/5.0, min/max reward 0.0/62.0


 59%|█████▉    | 73798/125000 [51:58<35:24, 24.10it/s]

finished frames 5904000, mean/median reward 33.5/20.0, min/max reward 3.0/181.0


 59%|█████▉    | 73900/125000 [52:02<35:38, 23.89it/s]

finished frames 5912000, mean/median reward 38.7/8.0, min/max reward 4.0/204.0


 59%|█████▉    | 73999/125000 [52:06<35:35, 23.88it/s]

finished frames 5920000, mean/median reward 38.9/11.0, min/max reward 4.0/234.0


 59%|█████▉    | 74098/125000 [52:10<35:30, 23.90it/s]

finished frames 5928000, mean/median reward 30.0/9.0, min/max reward 1.0/253.0


 59%|█████▉    | 74200/125000 [52:14<35:05, 24.13it/s]

finished frames 5936000, mean/median reward 24.0/5.0, min/max reward 0.0/197.0


 59%|█████▉    | 74299/125000 [52:19<35:01, 24.13it/s]

finished frames 5944000, mean/median reward 36.8/7.0, min/max reward 0.0/197.0


 60%|█████▉    | 74398/125000 [52:23<35:17, 23.90it/s]

finished frames 5952000, mean/median reward 57.2/47.0, min/max reward 4.0/255.0


 60%|█████▉    | 74500/125000 [52:27<36:18, 23.18it/s]

finished frames 5960000, mean/median reward 62.2/20.0, min/max reward 4.0/293.0


 60%|█████▉    | 74599/125000 [52:31<34:58, 24.01it/s]

finished frames 5968000, mean/median reward 51.7/24.0, min/max reward 0.0/260.0


 60%|█████▉    | 74698/125000 [52:35<34:37, 24.21it/s]

finished frames 5976000, mean/median reward 37.8/17.0, min/max reward 0.0/260.0


 60%|█████▉    | 74800/125000 [52:39<35:06, 23.83it/s]

finished frames 5984000, mean/median reward 48.0/17.0, min/max reward 3.0/299.0


 60%|█████▉    | 74899/125000 [52:44<35:04, 23.80it/s]

finished frames 5992000, mean/median reward 26.6/11.0, min/max reward 0.0/86.0


 60%|█████▉    | 74998/125000 [52:48<34:48, 23.94it/s]

finished frames 6000000, mean/median reward 35.9/13.0, min/max reward 1.0/123.0


 60%|██████    | 75100/125000 [52:52<34:51, 23.86it/s]

finished frames 6008000, mean/median reward 35.4/6.0, min/max reward 1.0/216.0


 60%|██████    | 75199/125000 [52:56<35:01, 23.70it/s]

finished frames 6016000, mean/median reward 18.5/16.0, min/max reward 0.0/64.0


 60%|██████    | 75298/125000 [53:00<34:16, 24.17it/s]

finished frames 6024000, mean/median reward 30.2/21.0, min/max reward 0.0/135.0


 60%|██████    | 75400/125000 [53:05<34:32, 23.93it/s]

finished frames 6032000, mean/median reward 29.8/10.0, min/max reward 0.0/95.0


 60%|██████    | 75499/125000 [53:09<34:10, 24.14it/s]

finished frames 6040000, mean/median reward 56.2/42.0, min/max reward 10.0/134.0


 60%|██████    | 75598/125000 [53:13<34:53, 23.59it/s]

finished frames 6048000, mean/median reward 88.2/39.0, min/max reward 0.0/266.0


 61%|██████    | 75700/125000 [53:17<34:06, 24.09it/s]

finished frames 6056000, mean/median reward 76.9/16.0, min/max reward 0.0/355.0


 61%|██████    | 75799/125000 [53:21<33:45, 24.29it/s]

finished frames 6064000, mean/median reward 52.1/27.0, min/max reward 0.0/246.0


 61%|██████    | 75898/125000 [53:25<34:01, 24.05it/s]

finished frames 6072000, mean/median reward 35.2/28.0, min/max reward 0.0/203.0


 61%|██████    | 76000/125000 [53:30<33:53, 24.09it/s]

finished frames 6080000, mean/median reward 57.9/7.0, min/max reward 0.0/265.0


 61%|██████    | 76099/125000 [53:34<34:15, 23.79it/s]

finished frames 6088000, mean/median reward 31.6/12.0, min/max reward 0.0/122.0


 61%|██████    | 76198/125000 [53:38<33:47, 24.07it/s]

finished frames 6096000, mean/median reward 45.2/10.0, min/max reward 0.0/345.0


 61%|██████    | 76300/125000 [53:42<34:06, 23.80it/s]

finished frames 6104000, mean/median reward 20.3/10.0, min/max reward 0.0/82.0


 61%|██████    | 76399/125000 [53:46<33:15, 24.35it/s]

finished frames 6112000, mean/median reward 57.8/26.0, min/max reward 0.0/245.0


 61%|██████    | 76498/125000 [53:50<33:16, 24.29it/s]

finished frames 6120000, mean/median reward 72.4/30.0, min/max reward 5.0/236.0


 61%|██████▏   | 76600/125000 [53:55<33:24, 24.15it/s]

finished frames 6128000, mean/median reward 55.3/26.0, min/max reward 3.0/236.0


 61%|██████▏   | 76699/125000 [53:59<33:24, 24.10it/s]

finished frames 6136000, mean/median reward 54.4/26.0, min/max reward 1.0/272.0


 61%|██████▏   | 76798/125000 [54:03<35:18, 22.76it/s]

finished frames 6144000, mean/median reward 54.1/17.0, min/max reward 0.0/272.0


 62%|██████▏   | 76900/125000 [54:07<33:07, 24.21it/s]

finished frames 6152000, mean/median reward 75.1/19.0, min/max reward 6.0/309.0


 62%|██████▏   | 76999/125000 [54:11<33:15, 24.05it/s]

finished frames 6160000, mean/median reward 63.9/18.0, min/max reward 2.0/306.0


 62%|██████▏   | 77098/125000 [54:15<33:40, 23.71it/s]

finished frames 6168000, mean/median reward 21.6/11.0, min/max reward 0.0/121.0


 62%|██████▏   | 77200/125000 [54:20<33:24, 23.84it/s]

finished frames 6176000, mean/median reward 15.4/8.0, min/max reward 0.0/78.0


 62%|██████▏   | 77299/125000 [54:24<33:29, 23.74it/s]

finished frames 6184000, mean/median reward 36.4/8.0, min/max reward 0.0/192.0


 62%|██████▏   | 77398/125000 [54:28<32:43, 24.25it/s]

finished frames 6192000, mean/median reward 38.1/8.0, min/max reward 0.0/192.0


 62%|██████▏   | 77500/125000 [54:32<33:06, 23.91it/s]

finished frames 6200000, mean/median reward 28.6/9.0, min/max reward 0.0/112.0


 62%|██████▏   | 77599/125000 [54:36<32:57, 23.97it/s]

finished frames 6208000, mean/median reward 33.9/9.0, min/max reward 0.0/169.0


 62%|██████▏   | 77698/125000 [54:40<32:19, 24.38it/s]

finished frames 6216000, mean/median reward 19.2/5.0, min/max reward 4.0/125.0


 62%|██████▏   | 77800/125000 [54:45<33:11, 23.70it/s]

finished frames 6224000, mean/median reward 16.1/5.0, min/max reward 2.0/63.0


 62%|██████▏   | 77899/125000 [54:49<33:08, 23.68it/s]

finished frames 6232000, mean/median reward 25.2/11.0, min/max reward 4.0/89.0


 62%|██████▏   | 77998/125000 [54:53<32:13, 24.30it/s]

finished frames 6240000, mean/median reward 62.4/6.0, min/max reward 0.0/365.0


 62%|██████▏   | 78100/125000 [54:57<32:24, 24.12it/s]

finished frames 6248000, mean/median reward 45.5/15.0, min/max reward 0.0/365.0


 63%|██████▎   | 78199/125000 [55:01<32:28, 24.02it/s]

finished frames 6256000, mean/median reward 63.0/9.0, min/max reward 0.0/359.0


 63%|██████▎   | 78298/125000 [55:05<32:30, 23.95it/s]

finished frames 6264000, mean/median reward 60.9/9.0, min/max reward 0.0/359.0


 63%|██████▎   | 78400/125000 [55:10<32:32, 23.86it/s]

finished frames 6272000, mean/median reward 80.8/37.0, min/max reward 2.0/359.0


 63%|██████▎   | 78499/125000 [55:14<32:19, 23.98it/s]

finished frames 6280000, mean/median reward 82.8/18.0, min/max reward 0.0/359.0


 63%|██████▎   | 78598/125000 [55:18<32:12, 24.01it/s]

finished frames 6288000, mean/median reward 89.4/33.0, min/max reward 0.0/359.0


 63%|██████▎   | 78700/125000 [55:22<31:28, 24.51it/s]

finished frames 6296000, mean/median reward 78.6/25.0, min/max reward 0.0/359.0


 63%|██████▎   | 78799/125000 [55:26<31:40, 24.30it/s]

finished frames 6304000, mean/median reward 67.9/22.0, min/max reward 1.0/359.0


 63%|██████▎   | 78898/125000 [55:30<31:47, 24.16it/s]

finished frames 6312000, mean/median reward 64.2/15.0, min/max reward 0.0/359.0


 63%|██████▎   | 79000/125000 [55:35<32:16, 23.75it/s]

finished frames 6320000, mean/median reward 45.1/10.0, min/max reward 0.0/359.0


 63%|██████▎   | 79099/125000 [55:39<32:48, 23.32it/s]

finished frames 6328000, mean/median reward 45.8/7.0, min/max reward 0.0/359.0


 63%|██████▎   | 79198/125000 [55:43<31:51, 23.96it/s]

finished frames 6336000, mean/median reward 63.6/34.0, min/max reward 1.0/359.0


 63%|██████▎   | 79300/125000 [55:47<31:54, 23.87it/s]

finished frames 6344000, mean/median reward 75.0/34.0, min/max reward 0.0/359.0


 64%|██████▎   | 79399/125000 [55:51<31:35, 24.05it/s]

finished frames 6352000, mean/median reward 80.9/24.0, min/max reward 5.0/359.0


 64%|██████▎   | 79498/125000 [55:56<31:39, 23.95it/s]

finished frames 6360000, mean/median reward 76.6/23.0, min/max reward 9.0/359.0


 64%|██████▎   | 79600/125000 [56:00<31:28, 24.04it/s]

finished frames 6368000, mean/median reward 91.9/36.0, min/max reward 1.0/359.0


 64%|██████▍   | 79699/125000 [56:04<31:08, 24.25it/s]

finished frames 6376000, mean/median reward 30.6/6.0, min/max reward 0.0/194.0


 64%|██████▍   | 79798/125000 [56:08<31:08, 24.20it/s]

finished frames 6384000, mean/median reward 25.9/7.0, min/max reward 0.0/185.0


 64%|██████▍   | 79900/125000 [56:12<31:06, 24.16it/s]

finished frames 6392000, mean/median reward 39.7/18.0, min/max reward 1.0/185.0


 64%|██████▍   | 79999/125000 [56:17<31:33, 23.76it/s]

finished frames 6400000, mean/median reward 25.0/5.0, min/max reward 0.0/185.0


 64%|██████▍   | 80098/125000 [56:21<30:57, 24.17it/s]

finished frames 6408000, mean/median reward 8.9/6.0, min/max reward 0.0/31.0


 64%|██████▍   | 80200/125000 [56:25<31:23, 23.79it/s]

finished frames 6416000, mean/median reward 67.1/24.0, min/max reward 1.0/305.0


 64%|██████▍   | 80299/125000 [56:29<31:15, 23.83it/s]

finished frames 6424000, mean/median reward 89.9/42.0, min/max reward 1.0/314.0


 64%|██████▍   | 80398/125000 [56:33<30:55, 24.03it/s]

finished frames 6432000, mean/median reward 75.4/12.0, min/max reward 0.0/314.0


 64%|██████▍   | 80500/125000 [56:38<31:28, 23.57it/s]

finished frames 6440000, mean/median reward 4.1/3.0, min/max reward 0.0/15.0


 64%|██████▍   | 80599/125000 [56:42<31:31, 23.47it/s]

finished frames 6448000, mean/median reward 4.0/1.0, min/max reward 0.0/14.0


 65%|██████▍   | 80698/125000 [56:46<30:36, 24.12it/s]

finished frames 6456000, mean/median reward 1.9/1.0, min/max reward 0.0/6.0


 65%|██████▍   | 80800/125000 [56:50<31:10, 23.62it/s]

finished frames 6464000, mean/median reward 8.4/6.0, min/max reward 0.0/32.0


 65%|██████▍   | 80899/125000 [56:54<30:24, 24.17it/s]

finished frames 6472000, mean/median reward 25.3/13.0, min/max reward 1.0/92.0


 65%|██████▍   | 80998/125000 [56:59<31:19, 23.41it/s]

finished frames 6480000, mean/median reward 14.3/6.0, min/max reward 0.0/66.0


 65%|██████▍   | 81100/125000 [57:03<30:26, 24.03it/s]

finished frames 6488000, mean/median reward 17.7/9.0, min/max reward 1.0/51.0


 65%|██████▍   | 81199/125000 [57:07<30:34, 23.88it/s]

finished frames 6496000, mean/median reward 6.9/1.0, min/max reward 0.0/32.0


 65%|██████▌   | 81298/125000 [57:11<30:22, 23.98it/s]

finished frames 6504000, mean/median reward 4.2/2.0, min/max reward 0.0/15.0


 65%|██████▌   | 81400/125000 [57:16<30:37, 23.72it/s]

finished frames 6512000, mean/median reward 10.1/4.0, min/max reward 0.0/55.0


 65%|██████▌   | 81499/125000 [57:20<30:12, 24.00it/s]

finished frames 6520000, mean/median reward 28.8/10.0, min/max reward 4.0/176.0


 65%|██████▌   | 81598/125000 [57:24<30:46, 23.51it/s]

finished frames 6528000, mean/median reward 26.5/9.0, min/max reward 1.0/188.0


 65%|██████▌   | 81700/125000 [57:28<30:18, 23.81it/s]

finished frames 6536000, mean/median reward 8.9/5.0, min/max reward 0.0/31.0


 65%|██████▌   | 81799/125000 [57:32<29:31, 24.39it/s]

finished frames 6544000, mean/median reward 7.3/6.0, min/max reward 0.0/20.0


 66%|██████▌   | 81898/125000 [57:37<30:02, 23.91it/s]

finished frames 6552000, mean/median reward 5.3/3.0, min/max reward 0.0/28.0


 66%|██████▌   | 82000/125000 [57:41<29:53, 23.97it/s]

finished frames 6560000, mean/median reward 5.0/3.0, min/max reward 1.0/12.0


 66%|██████▌   | 82099/125000 [57:45<30:09, 23.71it/s]

finished frames 6568000, mean/median reward 12.9/9.0, min/max reward 1.0/59.0


 66%|██████▌   | 82198/125000 [57:49<30:27, 23.42it/s]

finished frames 6576000, mean/median reward 43.0/11.0, min/max reward 0.0/321.0


 66%|██████▌   | 82300/125000 [57:53<29:23, 24.22it/s]

finished frames 6584000, mean/median reward 19.4/11.0, min/max reward 0.0/56.0


 66%|██████▌   | 82399/125000 [57:58<29:30, 24.06it/s]

finished frames 6592000, mean/median reward 52.1/41.0, min/max reward 1.0/260.0


 66%|██████▌   | 82498/125000 [58:02<30:19, 23.36it/s]

finished frames 6600000, mean/median reward 45.2/41.0, min/max reward 2.0/139.0


 66%|██████▌   | 82600/125000 [58:06<29:33, 23.91it/s]

finished frames 6608000, mean/median reward 45.1/16.0, min/max reward 1.0/205.0


 66%|██████▌   | 82699/125000 [58:10<28:57, 24.35it/s]

finished frames 6616000, mean/median reward 34.6/19.0, min/max reward 2.0/163.0


 66%|██████▌   | 82798/125000 [58:14<29:40, 23.70it/s]

finished frames 6624000, mean/median reward 50.8/22.0, min/max reward 4.0/250.0


 66%|██████▋   | 82900/125000 [58:18<28:35, 24.54it/s]

finished frames 6632000, mean/median reward 77.6/25.0, min/max reward 5.0/250.0


 66%|██████▋   | 82999/125000 [58:23<28:57, 24.18it/s]

finished frames 6640000, mean/median reward 61.3/39.0, min/max reward 3.0/250.0


 66%|██████▋   | 83098/125000 [58:27<29:06, 24.00it/s]

finished frames 6648000, mean/median reward 38.8/14.0, min/max reward 2.0/234.0


 67%|██████▋   | 83200/125000 [58:31<28:41, 24.28it/s]

finished frames 6656000, mean/median reward 41.8/22.0, min/max reward 1.0/183.0


 67%|██████▋   | 83299/125000 [58:35<30:03, 23.13it/s]

finished frames 6664000, mean/median reward 96.6/38.0, min/max reward 5.0/291.0


 67%|██████▋   | 83398/125000 [58:39<28:51, 24.02it/s]

finished frames 6672000, mean/median reward 87.9/55.0, min/max reward 1.0/284.0


 67%|██████▋   | 83500/125000 [58:43<28:40, 24.12it/s]

finished frames 6680000, mean/median reward 90.1/48.0, min/max reward 5.0/284.0


 67%|██████▋   | 83599/125000 [58:48<28:59, 23.80it/s]

finished frames 6688000, mean/median reward 70.5/40.0, min/max reward 11.0/239.0


 67%|██████▋   | 83698/125000 [58:52<28:44, 23.96it/s]

finished frames 6696000, mean/median reward 59.4/23.0, min/max reward 1.0/305.0


 67%|██████▋   | 83800/125000 [58:56<28:46, 23.86it/s]

finished frames 6704000, mean/median reward 36.8/10.0, min/max reward 3.0/232.0


 67%|██████▋   | 83899/125000 [59:00<28:26, 24.08it/s]

finished frames 6712000, mean/median reward 27.5/20.0, min/max reward 4.0/59.0


 67%|██████▋   | 83998/125000 [59:04<27:55, 24.48it/s]

finished frames 6720000, mean/median reward 25.8/12.0, min/max reward 1.0/97.0


 67%|██████▋   | 84100/125000 [59:08<27:55, 24.41it/s]

finished frames 6728000, mean/median reward 73.1/28.0, min/max reward 6.0/299.0


 67%|██████▋   | 84199/125000 [59:13<28:10, 24.13it/s]

finished frames 6736000, mean/median reward 57.5/28.0, min/max reward 4.0/241.0


 67%|██████▋   | 84298/125000 [59:17<27:59, 24.24it/s]

finished frames 6744000, mean/median reward 47.1/14.0, min/max reward 0.0/300.0


 68%|██████▊   | 84400/125000 [59:21<27:45, 24.37it/s]

finished frames 6752000, mean/median reward 36.3/8.0, min/max reward 0.0/279.0


 68%|██████▊   | 84499/125000 [59:25<28:34, 23.62it/s]

finished frames 6760000, mean/median reward 34.5/13.0, min/max reward 4.0/286.0


 68%|██████▊   | 84598/125000 [59:29<28:00, 24.04it/s]

finished frames 6768000, mean/median reward 8.5/8.0, min/max reward 0.0/27.0


 68%|██████▊   | 84700/125000 [59:34<27:41, 24.26it/s]

finished frames 6776000, mean/median reward 11.6/9.0, min/max reward 0.0/40.0


 68%|██████▊   | 84799/125000 [59:38<27:26, 24.41it/s]

finished frames 6784000, mean/median reward 24.4/14.0, min/max reward 4.0/89.0


 68%|██████▊   | 84898/125000 [59:42<28:18, 23.61it/s]

finished frames 6792000, mean/median reward 45.4/13.0, min/max reward 0.0/301.0


 68%|██████▊   | 85000/125000 [59:46<27:35, 24.16it/s]

finished frames 6800000, mean/median reward 48.6/12.0, min/max reward 0.0/301.0


 68%|██████▊   | 85099/125000 [59:50<27:25, 24.25it/s]

finished frames 6808000, mean/median reward 19.7/9.0, min/max reward 0.0/98.0


 68%|██████▊   | 85198/125000 [59:54<27:30, 24.12it/s]

finished frames 6816000, mean/median reward 31.6/18.0, min/max reward 0.0/234.0


 68%|██████▊   | 85300/125000 [59:59<27:33, 24.01it/s]

finished frames 6824000, mean/median reward 24.8/11.0, min/max reward 0.0/145.0


 68%|██████▊   | 85399/125000 [1:00:03<27:24, 24.08it/s]

finished frames 6832000, mean/median reward 38.4/20.0, min/max reward 4.0/150.0


 68%|██████▊   | 85498/125000 [1:00:07<27:27, 23.98it/s]

finished frames 6840000, mean/median reward 50.8/24.0, min/max reward 3.0/197.0


 68%|██████▊   | 85600/125000 [1:00:11<28:07, 23.35it/s]

finished frames 6848000, mean/median reward 63.1/19.0, min/max reward 5.0/239.0


 69%|██████▊   | 85699/125000 [1:00:15<27:44, 23.61it/s]

finished frames 6856000, mean/median reward 57.2/24.0, min/max reward 4.0/197.0


 69%|██████▊   | 85798/125000 [1:00:19<27:09, 24.05it/s]

finished frames 6864000, mean/median reward 47.1/24.0, min/max reward 0.0/150.0


 69%|██████▊   | 85900/125000 [1:00:24<27:39, 23.56it/s]

finished frames 6872000, mean/median reward 36.7/15.0, min/max reward 0.0/168.0


 69%|██████▉   | 85999/125000 [1:00:28<28:00, 23.21it/s]

finished frames 6880000, mean/median reward 18.9/13.0, min/max reward 0.0/54.0


 69%|██████▉   | 86098/125000 [1:00:32<26:55, 24.09it/s]

finished frames 6888000, mean/median reward 14.2/11.0, min/max reward 0.0/40.0


 69%|██████▉   | 86200/125000 [1:00:36<27:19, 23.67it/s]

finished frames 6896000, mean/median reward 29.6/12.0, min/max reward 1.0/237.0


 69%|██████▉   | 86299/125000 [1:00:40<26:36, 24.24it/s]

finished frames 6904000, mean/median reward 36.5/14.0, min/max reward 1.0/237.0


 69%|██████▉   | 86398/125000 [1:00:45<27:08, 23.70it/s]

finished frames 6912000, mean/median reward 22.4/9.0, min/max reward 2.0/120.0


 69%|██████▉   | 86500/125000 [1:00:49<26:30, 24.21it/s]

finished frames 6920000, mean/median reward 12.1/7.0, min/max reward 0.0/40.0


 69%|██████▉   | 86599/125000 [1:00:53<26:46, 23.90it/s]

finished frames 6928000, mean/median reward 42.9/15.0, min/max reward 0.0/215.0


 69%|██████▉   | 86698/125000 [1:00:57<26:47, 23.83it/s]

finished frames 6936000, mean/median reward 35.1/9.0, min/max reward 0.0/163.0


 69%|██████▉   | 86800/125000 [1:01:01<26:48, 23.75it/s]

finished frames 6944000, mean/median reward 52.1/14.0, min/max reward 0.0/209.0


 70%|██████▉   | 86899/125000 [1:01:06<26:35, 23.88it/s]

finished frames 6952000, mean/median reward 28.2/11.0, min/max reward 1.0/156.0


 70%|██████▉   | 86998/125000 [1:01:10<26:33, 23.85it/s]

finished frames 6960000, mean/median reward 46.6/12.0, min/max reward 2.0/324.0


 70%|██████▉   | 87100/125000 [1:01:14<26:04, 24.23it/s]

finished frames 6968000, mean/median reward 47.9/21.0, min/max reward 2.0/307.0


 70%|██████▉   | 87199/125000 [1:01:18<26:17, 23.97it/s]

finished frames 6976000, mean/median reward 29.6/12.0, min/max reward 0.0/165.0


 70%|██████▉   | 87298/125000 [1:01:22<25:47, 24.37it/s]

finished frames 6984000, mean/median reward 31.2/13.0, min/max reward 0.0/166.0


 70%|██████▉   | 87400/125000 [1:01:26<26:24, 23.72it/s]

finished frames 6992000, mean/median reward 49.8/26.0, min/max reward 4.0/166.0


 70%|██████▉   | 87499/125000 [1:01:31<26:20, 23.73it/s]

finished frames 7000000, mean/median reward 63.8/15.0, min/max reward 1.0/316.0


 70%|███████   | 87598/125000 [1:01:35<25:53, 24.08it/s]

finished frames 7008000, mean/median reward 8.2/5.0, min/max reward 0.0/32.0


 70%|███████   | 87700/125000 [1:01:39<25:59, 23.92it/s]

finished frames 7016000, mean/median reward 17.3/8.0, min/max reward 0.0/66.0


 70%|███████   | 87799/125000 [1:01:43<25:49, 24.00it/s]

finished frames 7024000, mean/median reward 61.4/19.0, min/max reward 0.0/319.0


 70%|███████   | 87898/125000 [1:01:47<26:07, 23.67it/s]

finished frames 7032000, mean/median reward 58.6/28.0, min/max reward 4.0/202.0


 70%|███████   | 88000/125000 [1:01:52<26:32, 23.24it/s]

finished frames 7040000, mean/median reward 36.8/18.0, min/max reward 4.0/184.0


 70%|███████   | 88099/125000 [1:01:56<25:23, 24.22it/s]

finished frames 7048000, mean/median reward 26.2/8.0, min/max reward 1.0/173.0


 71%|███████   | 88198/125000 [1:02:00<25:26, 24.11it/s]

finished frames 7056000, mean/median reward 52.7/17.0, min/max reward 4.0/408.0


 71%|███████   | 88300/125000 [1:02:04<25:36, 23.88it/s]

finished frames 7064000, mean/median reward 21.6/11.0, min/max reward 1.0/90.0


 71%|███████   | 88399/125000 [1:02:08<24:57, 24.44it/s]

finished frames 7072000, mean/median reward 38.2/12.0, min/max reward 1.0/252.0


 71%|███████   | 88498/125000 [1:02:12<24:56, 24.39it/s]

finished frames 7080000, mean/median reward 39.7/9.0, min/max reward 0.0/278.0


 71%|███████   | 88600/125000 [1:02:17<26:02, 23.30it/s]

finished frames 7088000, mean/median reward 42.0/22.0, min/max reward 7.0/198.0


 71%|███████   | 88699/125000 [1:02:21<25:55, 23.33it/s]

finished frames 7096000, mean/median reward 17.8/9.0, min/max reward 0.0/116.0


 71%|███████   | 88798/125000 [1:02:25<25:09, 23.98it/s]

finished frames 7104000, mean/median reward 26.8/15.0, min/max reward 0.0/102.0


 71%|███████   | 88900/125000 [1:02:29<24:57, 24.10it/s]

finished frames 7112000, mean/median reward 55.2/39.0, min/max reward 8.0/252.0


 71%|███████   | 88999/125000 [1:02:33<25:04, 23.93it/s]

finished frames 7120000, mean/median reward 27.8/21.0, min/max reward 0.0/111.0


 71%|███████▏  | 89098/125000 [1:02:37<24:54, 24.02it/s]

finished frames 7128000, mean/median reward 61.0/24.0, min/max reward 4.0/222.0


 71%|███████▏  | 89200/125000 [1:02:42<24:54, 23.96it/s]

finished frames 7136000, mean/median reward 30.0/18.0, min/max reward 4.0/127.0


 71%|███████▏  | 89299/125000 [1:02:46<25:08, 23.66it/s]

finished frames 7144000, mean/median reward 42.1/16.0, min/max reward 0.0/152.0


 72%|███████▏  | 89398/125000 [1:02:50<25:07, 23.62it/s]

finished frames 7152000, mean/median reward 29.9/12.0, min/max reward 0.0/178.0


 72%|███████▏  | 89500/125000 [1:02:54<24:16, 24.37it/s]

finished frames 7160000, mean/median reward 43.1/17.0, min/max reward 2.0/178.0


 72%|███████▏  | 89599/125000 [1:02:58<24:15, 24.33it/s]

finished frames 7168000, mean/median reward 65.6/26.0, min/max reward 4.0/260.0


 72%|███████▏  | 89698/125000 [1:03:02<24:13, 24.29it/s]

finished frames 7176000, mean/median reward 50.8/17.0, min/max reward 0.0/183.0


 72%|███████▏  | 89800/125000 [1:03:07<24:45, 23.70it/s]

finished frames 7184000, mean/median reward 29.2/6.0, min/max reward 0.0/225.0


 72%|███████▏  | 89899/125000 [1:03:11<24:26, 23.93it/s]

finished frames 7192000, mean/median reward 43.4/17.0, min/max reward 0.0/384.0


 72%|███████▏  | 89998/125000 [1:03:15<24:23, 23.92it/s]

finished frames 7200000, mean/median reward 19.6/9.0, min/max reward 1.0/68.0


 72%|███████▏  | 90100/125000 [1:03:19<24:01, 24.21it/s]

finished frames 7208000, mean/median reward 21.4/8.0, min/max reward 0.0/78.0


 72%|███████▏  | 90199/125000 [1:03:23<24:55, 23.28it/s]

finished frames 7216000, mean/median reward 31.3/26.0, min/max reward 0.0/116.0


 72%|███████▏  | 90298/125000 [1:03:27<24:59, 23.14it/s]

finished frames 7224000, mean/median reward 43.1/8.0, min/max reward 2.0/276.0


 72%|███████▏  | 90400/125000 [1:03:32<24:21, 23.67it/s]

finished frames 7232000, mean/median reward 25.0/8.0, min/max reward 0.0/97.0


 72%|███████▏  | 90499/125000 [1:03:36<23:45, 24.20it/s]

finished frames 7240000, mean/median reward 27.5/15.0, min/max reward 0.0/92.0


 72%|███████▏  | 90598/125000 [1:03:40<23:54, 23.98it/s]

finished frames 7248000, mean/median reward 27.1/18.0, min/max reward 0.0/94.0


 73%|███████▎  | 90700/125000 [1:03:44<23:31, 24.31it/s]

finished frames 7256000, mean/median reward 47.5/14.0, min/max reward 1.0/277.0


 73%|███████▎  | 90799/125000 [1:03:48<23:27, 24.29it/s]

finished frames 7264000, mean/median reward 26.9/17.0, min/max reward 1.0/131.0


 73%|███████▎  | 90898/125000 [1:03:52<23:55, 23.76it/s]

finished frames 7272000, mean/median reward 50.6/29.0, min/max reward 5.0/236.0


 73%|███████▎  | 91000/125000 [1:03:57<23:34, 24.03it/s]

finished frames 7280000, mean/median reward 43.4/27.0, min/max reward 0.0/246.0


 73%|███████▎  | 91099/125000 [1:04:01<23:27, 24.09it/s]

finished frames 7288000, mean/median reward 64.2/41.0, min/max reward 1.0/341.0


 73%|███████▎  | 91198/125000 [1:04:05<23:04, 24.41it/s]

finished frames 7296000, mean/median reward 58.9/31.0, min/max reward 1.0/247.0


 73%|███████▎  | 91300/125000 [1:04:09<23:25, 23.98it/s]

finished frames 7304000, mean/median reward 82.5/79.0, min/max reward 4.0/212.0


 73%|███████▎  | 91399/125000 [1:04:13<23:24, 23.93it/s]

finished frames 7312000, mean/median reward 28.5/15.0, min/max reward 0.0/147.0


 73%|███████▎  | 91498/125000 [1:04:17<23:17, 23.98it/s]

finished frames 7320000, mean/median reward 43.8/15.0, min/max reward 0.0/308.0


 73%|███████▎  | 91600/125000 [1:04:22<23:06, 24.10it/s]

finished frames 7328000, mean/median reward 79.4/46.0, min/max reward 1.0/281.0


 73%|███████▎  | 91699/125000 [1:04:26<23:17, 23.84it/s]

finished frames 7336000, mean/median reward 17.4/5.0, min/max reward 0.0/147.0


 73%|███████▎  | 91798/125000 [1:04:30<23:14, 23.80it/s]

finished frames 7344000, mean/median reward 28.9/14.0, min/max reward 4.0/147.0


 74%|███████▎  | 91900/125000 [1:04:34<22:36, 24.40it/s]

finished frames 7352000, mean/median reward 41.4/16.0, min/max reward 2.0/197.0


 74%|███████▎  | 91999/125000 [1:04:38<22:30, 24.44it/s]

finished frames 7360000, mean/median reward 69.4/46.0, min/max reward 0.0/236.0


 74%|███████▎  | 92098/125000 [1:04:43<23:18, 23.52it/s]

finished frames 7368000, mean/median reward 56.9/22.0, min/max reward 5.0/236.0


 74%|███████▍  | 92200/125000 [1:04:47<22:44, 24.04it/s]

finished frames 7376000, mean/median reward 68.9/24.0, min/max reward 3.0/242.0


 74%|███████▍  | 92299/125000 [1:04:51<22:32, 24.18it/s]

finished frames 7384000, mean/median reward 96.1/64.0, min/max reward 2.0/305.0


 74%|███████▍  | 92398/125000 [1:04:55<22:41, 23.94it/s]

finished frames 7392000, mean/median reward 68.6/18.0, min/max reward 5.0/242.0


 74%|███████▍  | 92500/125000 [1:04:59<23:08, 23.41it/s]

finished frames 7400000, mean/median reward 93.4/41.0, min/max reward 5.0/339.0


 74%|███████▍  | 92599/125000 [1:05:04<22:15, 24.26it/s]

finished frames 7408000, mean/median reward 59.5/18.0, min/max reward 0.0/332.0


 74%|███████▍  | 92698/125000 [1:05:08<22:48, 23.60it/s]

finished frames 7416000, mean/median reward 21.2/9.0, min/max reward 0.0/147.0


 74%|███████▍  | 92800/125000 [1:05:12<22:33, 23.79it/s]

finished frames 7424000, mean/median reward 55.0/11.0, min/max reward 0.0/195.0


 74%|███████▍  | 92899/125000 [1:05:16<22:16, 24.01it/s]

finished frames 7432000, mean/median reward 55.1/24.0, min/max reward 0.0/165.0


 74%|███████▍  | 92998/125000 [1:05:20<22:04, 24.16it/s]

finished frames 7440000, mean/median reward 69.2/30.0, min/max reward 0.0/298.0


 74%|███████▍  | 93100/125000 [1:05:24<22:56, 23.17it/s]

finished frames 7448000, mean/median reward 50.4/13.0, min/max reward 3.0/271.0


 75%|███████▍  | 93199/125000 [1:05:29<21:48, 24.31it/s]

finished frames 7456000, mean/median reward 41.1/27.0, min/max reward 3.0/147.0


 75%|███████▍  | 93298/125000 [1:05:33<21:55, 24.11it/s]

finished frames 7464000, mean/median reward 40.1/24.0, min/max reward 0.0/150.0


 75%|███████▍  | 93400/125000 [1:05:37<22:52, 23.03it/s]

finished frames 7472000, mean/median reward 68.2/21.0, min/max reward 0.0/262.0


 75%|███████▍  | 93499/125000 [1:05:41<22:05, 23.77it/s]

finished frames 7480000, mean/median reward 30.2/17.0, min/max reward 2.0/87.0


 75%|███████▍  | 93598/125000 [1:05:45<21:57, 23.83it/s]

finished frames 7488000, mean/median reward 8.9/5.0, min/max reward 0.0/39.0


 75%|███████▍  | 93700/125000 [1:05:50<21:52, 23.84it/s]

finished frames 7496000, mean/median reward 8.2/3.0, min/max reward 0.0/47.0


 75%|███████▌  | 93799/125000 [1:05:54<22:19, 23.29it/s]

finished frames 7504000, mean/median reward 8.9/4.0, min/max reward 0.0/49.0


 75%|███████▌  | 93898/125000 [1:05:58<21:34, 24.03it/s]

finished frames 7512000, mean/median reward 9.7/6.0, min/max reward 1.0/39.0


 75%|███████▌  | 94000/125000 [1:06:02<21:29, 24.04it/s]

finished frames 7520000, mean/median reward 21.9/5.0, min/max reward 0.0/117.0


 75%|███████▌  | 94099/125000 [1:06:06<20:53, 24.64it/s]

finished frames 7528000, mean/median reward 34.8/13.0, min/max reward 4.0/170.0


 75%|███████▌  | 94198/125000 [1:06:10<21:07, 24.30it/s]

finished frames 7536000, mean/median reward 34.8/7.0, min/max reward 0.0/303.0


 75%|███████▌  | 94300/125000 [1:06:15<21:45, 23.52it/s]

finished frames 7544000, mean/median reward 27.8/9.0, min/max reward 0.0/245.0


 76%|███████▌  | 94399/125000 [1:06:19<21:58, 23.21it/s]

finished frames 7552000, mean/median reward 23.1/26.0, min/max reward 0.0/51.0


 76%|███████▌  | 94498/125000 [1:06:23<21:09, 24.03it/s]

finished frames 7560000, mean/median reward 41.9/30.0, min/max reward 0.0/202.0


 76%|███████▌  | 94600/125000 [1:06:27<21:18, 23.78it/s]

finished frames 7568000, mean/median reward 104.4/49.0, min/max reward 0.0/338.0


 76%|███████▌  | 94699/125000 [1:06:31<21:12, 23.81it/s]

finished frames 7576000, mean/median reward 62.2/32.0, min/max reward 1.0/338.0


 76%|███████▌  | 94798/125000 [1:06:35<21:57, 22.92it/s]

finished frames 7584000, mean/median reward 45.4/13.0, min/max reward 4.0/281.0


 76%|███████▌  | 94900/125000 [1:06:40<21:00, 23.87it/s]

finished frames 7592000, mean/median reward 42.9/16.0, min/max reward 0.0/348.0


 76%|███████▌  | 94999/125000 [1:06:44<21:18, 23.46it/s]

finished frames 7600000, mean/median reward 84.2/30.0, min/max reward 0.0/358.0


 76%|███████▌  | 95098/125000 [1:06:48<20:35, 24.20it/s]

finished frames 7608000, mean/median reward 50.4/11.0, min/max reward 0.0/316.0


 76%|███████▌  | 95200/125000 [1:06:52<20:37, 24.08it/s]

finished frames 7616000, mean/median reward 107.0/40.0, min/max reward 3.0/354.0


 76%|███████▌  | 95299/125000 [1:06:56<20:06, 24.61it/s]

finished frames 7624000, mean/median reward 101.7/31.0, min/max reward 4.0/354.0


 76%|███████▋  | 95398/125000 [1:07:00<20:01, 24.63it/s]

finished frames 7632000, mean/median reward 71.1/43.0, min/max reward 1.0/316.0


 76%|███████▋  | 95500/125000 [1:07:05<20:19, 24.20it/s]

finished frames 7640000, mean/median reward 85.5/19.0, min/max reward 0.0/335.0


 76%|███████▋  | 95599/125000 [1:07:09<20:06, 24.36it/s]

finished frames 7648000, mean/median reward 32.2/17.0, min/max reward 0.0/112.0


 77%|███████▋  | 95698/125000 [1:07:13<20:16, 24.09it/s]

finished frames 7656000, mean/median reward 54.3/24.0, min/max reward 4.0/203.0


 77%|███████▋  | 95800/125000 [1:07:17<20:31, 23.71it/s]

finished frames 7664000, mean/median reward 72.2/16.0, min/max reward 4.0/312.0


 77%|███████▋  | 95899/125000 [1:07:21<20:43, 23.41it/s]

finished frames 7672000, mean/median reward 37.8/16.0, min/max reward 4.0/148.0


 77%|███████▋  | 95998/125000 [1:07:25<20:01, 24.14it/s]

finished frames 7680000, mean/median reward 11.8/8.0, min/max reward 1.0/33.0


 77%|███████▋  | 96100/125000 [1:07:30<20:16, 23.75it/s]

finished frames 7688000, mean/median reward 42.0/17.0, min/max reward 0.0/275.0


 77%|███████▋  | 96199/125000 [1:07:34<20:04, 23.91it/s]

finished frames 7696000, mean/median reward 36.5/17.0, min/max reward 0.0/112.0


 77%|███████▋  | 96298/125000 [1:07:38<19:57, 23.97it/s]

finished frames 7704000, mean/median reward 31.0/17.0, min/max reward 0.0/155.0


 77%|███████▋  | 96400/125000 [1:07:42<19:42, 24.18it/s]

finished frames 7712000, mean/median reward 46.6/22.0, min/max reward 4.0/155.0


 77%|███████▋  | 96499/125000 [1:07:46<19:45, 24.05it/s]

finished frames 7720000, mean/median reward 45.4/21.0, min/max reward 2.0/185.0


 77%|███████▋  | 96598/125000 [1:07:50<19:38, 24.10it/s]

finished frames 7728000, mean/median reward 62.4/23.0, min/max reward 0.0/291.0


 77%|███████▋  | 96700/125000 [1:07:55<20:07, 23.44it/s]

finished frames 7736000, mean/median reward 96.2/50.0, min/max reward 3.0/291.0


 77%|███████▋  | 96799/125000 [1:07:59<19:52, 23.64it/s]

finished frames 7744000, mean/median reward 64.0/19.0, min/max reward 0.0/187.0


 78%|███████▊  | 96898/125000 [1:08:03<19:34, 23.93it/s]

finished frames 7752000, mean/median reward 60.1/19.0, min/max reward 1.0/207.0


 78%|███████▊  | 97000/125000 [1:08:07<19:13, 24.27it/s]

finished frames 7760000, mean/median reward 71.8/18.0, min/max reward 2.0/247.0


 78%|███████▊  | 97099/125000 [1:08:11<19:35, 23.74it/s]

finished frames 7768000, mean/median reward 55.1/13.0, min/max reward 2.0/257.0


 78%|███████▊  | 97198/125000 [1:08:16<19:20, 23.96it/s]

finished frames 7776000, mean/median reward 37.2/13.0, min/max reward 1.0/170.0


 78%|███████▊  | 97300/125000 [1:08:20<18:56, 24.36it/s]

finished frames 7784000, mean/median reward 54.6/29.0, min/max reward 1.0/203.0


 78%|███████▊  | 97399/125000 [1:08:24<19:32, 23.53it/s]

finished frames 7792000, mean/median reward 36.2/12.0, min/max reward 4.0/239.0


 78%|███████▊  | 97498/125000 [1:08:28<19:13, 23.84it/s]

finished frames 7800000, mean/median reward 23.6/11.0, min/max reward 0.0/68.0


 78%|███████▊  | 97600/125000 [1:08:32<19:00, 24.02it/s]

finished frames 7808000, mean/median reward 19.9/6.0, min/max reward 0.0/68.0


 78%|███████▊  | 97699/125000 [1:08:36<18:44, 24.27it/s]

finished frames 7816000, mean/median reward 55.6/15.0, min/max reward 0.0/296.0


 78%|███████▊  | 97798/125000 [1:08:41<19:03, 23.78it/s]

finished frames 7824000, mean/median reward 56.0/16.0, min/max reward 3.0/259.0


 78%|███████▊  | 97900/125000 [1:08:45<18:50, 23.97it/s]

finished frames 7832000, mean/median reward 41.2/7.0, min/max reward 0.0/189.0


 78%|███████▊  | 97999/125000 [1:08:49<18:43, 24.03it/s]

finished frames 7840000, mean/median reward 53.7/11.0, min/max reward 0.0/337.0


 78%|███████▊  | 98098/125000 [1:08:53<18:42, 23.97it/s]

finished frames 7848000, mean/median reward 28.5/7.0, min/max reward 0.0/189.0


 79%|███████▊  | 98200/125000 [1:08:57<18:28, 24.17it/s]

finished frames 7856000, mean/median reward 37.7/8.0, min/max reward 0.0/195.0


 79%|███████▊  | 98299/125000 [1:09:01<18:56, 23.50it/s]

finished frames 7864000, mean/median reward 24.6/10.0, min/max reward 2.0/82.0


 79%|███████▊  | 98398/125000 [1:09:06<18:32, 23.90it/s]

finished frames 7872000, mean/median reward 52.2/12.0, min/max reward 1.0/278.0


 79%|███████▉  | 98500/125000 [1:09:10<18:20, 24.08it/s]

finished frames 7880000, mean/median reward 59.8/35.0, min/max reward 2.0/278.0


 79%|███████▉  | 98599/125000 [1:09:14<18:16, 24.07it/s]

finished frames 7888000, mean/median reward 67.9/18.0, min/max reward 2.0/354.0


 79%|███████▉  | 98698/125000 [1:09:18<18:18, 23.94it/s]

finished frames 7896000, mean/median reward 32.1/15.0, min/max reward 4.0/131.0


 79%|███████▉  | 98800/125000 [1:09:22<17:54, 24.38it/s]

finished frames 7904000, mean/median reward 41.2/9.0, min/max reward 0.0/241.0


 79%|███████▉  | 98899/125000 [1:09:27<17:57, 24.22it/s]

finished frames 7912000, mean/median reward 20.3/11.0, min/max reward 6.0/58.0


 79%|███████▉  | 98998/125000 [1:09:31<18:54, 22.92it/s]

finished frames 7920000, mean/median reward 36.6/26.0, min/max reward 0.0/140.0


 79%|███████▉  | 99100/125000 [1:09:35<18:10, 23.75it/s]

finished frames 7928000, mean/median reward 44.2/7.0, min/max reward 0.0/281.0


 79%|███████▉  | 99199/125000 [1:09:39<17:48, 24.14it/s]

finished frames 7936000, mean/median reward 11.2/5.0, min/max reward 0.0/89.0


 79%|███████▉  | 99298/125000 [1:09:43<18:15, 23.47it/s]

finished frames 7944000, mean/median reward 58.1/21.0, min/max reward 2.0/188.0


 80%|███████▉  | 99400/125000 [1:09:48<18:03, 23.63it/s]

finished frames 7952000, mean/median reward 20.1/7.0, min/max reward 1.0/100.0


 80%|███████▉  | 99499/125000 [1:09:52<17:47, 23.89it/s]

finished frames 7960000, mean/median reward 21.8/13.0, min/max reward 1.0/76.0


 80%|███████▉  | 99598/125000 [1:09:56<17:46, 23.81it/s]

finished frames 7968000, mean/median reward 14.8/6.0, min/max reward 0.0/91.0


 80%|███████▉  | 99700/125000 [1:10:00<18:13, 23.14it/s]

finished frames 7976000, mean/median reward 12.3/5.0, min/max reward 0.0/54.0


 80%|███████▉  | 99799/125000 [1:10:04<17:14, 24.37it/s]

finished frames 7984000, mean/median reward 15.2/7.0, min/max reward 3.0/135.0


 80%|███████▉  | 99898/125000 [1:10:09<17:27, 23.96it/s]

finished frames 7992000, mean/median reward 18.8/10.0, min/max reward 1.0/139.0


 80%|████████  | 100000/125000 [1:10:13<17:05, 24.38it/s]

finished frames 8000000, mean/median reward 17.8/11.0, min/max reward 1.0/43.0


 80%|████████  | 100099/125000 [1:10:17<17:10, 24.16it/s]

finished frames 8008000, mean/median reward 63.6/7.0, min/max reward 2.0/279.0


 80%|████████  | 100198/125000 [1:10:21<17:02, 24.26it/s]

finished frames 8016000, mean/median reward 47.1/7.0, min/max reward 0.0/286.0


 80%|████████  | 100300/125000 [1:10:25<17:04, 24.11it/s]

finished frames 8024000, mean/median reward 41.9/18.0, min/max reward 0.0/278.0


 80%|████████  | 100399/125000 [1:10:30<17:28, 23.47it/s]

finished frames 8032000, mean/median reward 16.5/4.0, min/max reward 0.0/85.0


 80%|████████  | 100498/125000 [1:10:34<16:51, 24.21it/s]

finished frames 8040000, mean/median reward 17.5/3.0, min/max reward 0.0/138.0


 80%|████████  | 100600/125000 [1:10:38<16:52, 24.10it/s]

finished frames 8048000, mean/median reward 7.6/4.0, min/max reward 0.0/31.0


 81%|████████  | 100699/125000 [1:10:42<16:45, 24.18it/s]

finished frames 8056000, mean/median reward 93.8/40.0, min/max reward 2.0/303.0


 81%|████████  | 100798/125000 [1:10:46<16:42, 24.15it/s]

finished frames 8064000, mean/median reward 38.1/13.0, min/max reward 0.0/322.0


 81%|████████  | 100900/125000 [1:10:50<16:37, 24.17it/s]

finished frames 8072000, mean/median reward 24.0/5.0, min/max reward 0.0/259.0


 81%|████████  | 100999/125000 [1:10:55<16:21, 24.44it/s]

finished frames 8080000, mean/median reward 35.1/6.0, min/max reward 0.0/259.0


 81%|████████  | 101098/125000 [1:10:59<16:45, 23.77it/s]

finished frames 8088000, mean/median reward 36.4/9.0, min/max reward 1.0/130.0


 81%|████████  | 101200/125000 [1:11:03<16:36, 23.89it/s]

finished frames 8096000, mean/median reward 37.4/11.0, min/max reward 2.0/130.0


 81%|████████  | 101299/125000 [1:11:07<17:00, 23.22it/s]

finished frames 8104000, mean/median reward 60.6/9.0, min/max reward 2.0/282.0


 81%|████████  | 101398/125000 [1:11:11<16:19, 24.11it/s]

finished frames 8112000, mean/median reward 21.1/8.0, min/max reward 4.0/96.0


 81%|████████  | 101500/125000 [1:11:16<16:46, 23.34it/s]

finished frames 8120000, mean/median reward 95.1/43.0, min/max reward 0.0/342.0


 81%|████████▏ | 101599/125000 [1:11:20<16:40, 23.39it/s]

finished frames 8128000, mean/median reward 40.9/5.0, min/max reward 0.0/298.0


 81%|████████▏ | 101698/125000 [1:11:24<16:39, 23.31it/s]

finished frames 8136000, mean/median reward 8.3/5.0, min/max reward 0.0/39.0


 81%|████████▏ | 101800/125000 [1:11:28<16:04, 24.06it/s]

finished frames 8144000, mean/median reward 57.0/41.0, min/max reward 1.0/249.0


 82%|████████▏ | 101899/125000 [1:11:32<16:10, 23.80it/s]

finished frames 8152000, mean/median reward 52.2/28.0, min/max reward 4.0/249.0


 82%|████████▏ | 101998/125000 [1:11:36<15:36, 24.56it/s]

finished frames 8160000, mean/median reward 57.8/19.0, min/max reward 1.0/306.0


 82%|████████▏ | 102100/125000 [1:11:41<15:54, 24.00it/s]

finished frames 8168000, mean/median reward 54.7/12.0, min/max reward 0.0/306.0


 82%|████████▏ | 102199/125000 [1:11:45<16:09, 23.51it/s]

finished frames 8176000, mean/median reward 48.1/13.0, min/max reward 1.0/218.0


 82%|████████▏ | 102298/125000 [1:11:49<15:48, 23.93it/s]

finished frames 8184000, mean/median reward 21.6/6.0, min/max reward 1.0/174.0


 82%|████████▏ | 102400/125000 [1:11:53<15:34, 24.18it/s]

finished frames 8192000, mean/median reward 14.6/6.0, min/max reward 0.0/54.0


 82%|████████▏ | 102499/125000 [1:11:57<15:39, 23.95it/s]

finished frames 8200000, mean/median reward 55.7/25.0, min/max reward 1.0/287.0


 82%|████████▏ | 102598/125000 [1:12:02<15:22, 24.30it/s]

finished frames 8208000, mean/median reward 24.9/9.0, min/max reward 0.0/95.0


 82%|████████▏ | 102700/125000 [1:12:06<15:33, 23.88it/s]

finished frames 8216000, mean/median reward 36.1/7.0, min/max reward 2.0/296.0


 82%|████████▏ | 102799/125000 [1:12:10<15:20, 24.11it/s]

finished frames 8224000, mean/median reward 24.5/12.0, min/max reward 2.0/92.0


 82%|████████▏ | 102898/125000 [1:12:14<15:14, 24.18it/s]

finished frames 8232000, mean/median reward 40.4/12.0, min/max reward 0.0/190.0


 82%|████████▏ | 103000/125000 [1:12:18<15:11, 24.13it/s]

finished frames 8240000, mean/median reward 30.8/10.0, min/max reward 0.0/140.0


 82%|████████▏ | 103099/125000 [1:12:23<15:11, 24.02it/s]

finished frames 8248000, mean/median reward 59.2/23.0, min/max reward 0.0/272.0


 83%|████████▎ | 103198/125000 [1:12:27<15:39, 23.21it/s]

finished frames 8256000, mean/median reward 46.0/25.0, min/max reward 1.0/272.0


 83%|████████▎ | 103300/125000 [1:12:31<15:01, 24.07it/s]

finished frames 8264000, mean/median reward 68.1/25.0, min/max reward 1.0/272.0


 83%|████████▎ | 103399/125000 [1:12:35<15:10, 23.73it/s]

finished frames 8272000, mean/median reward 27.0/15.0, min/max reward 0.0/123.0


 83%|████████▎ | 103498/125000 [1:12:39<14:44, 24.30it/s]

finished frames 8280000, mean/median reward 44.6/16.0, min/max reward 1.0/214.0


 83%|████████▎ | 103600/125000 [1:12:43<15:06, 23.60it/s]

finished frames 8288000, mean/median reward 79.2/22.0, min/max reward 0.0/339.0


 83%|████████▎ | 103699/125000 [1:12:48<14:43, 24.11it/s]

finished frames 8296000, mean/median reward 57.4/21.0, min/max reward 0.0/226.0


 83%|████████▎ | 103798/125000 [1:12:52<14:43, 24.00it/s]

finished frames 8304000, mean/median reward 48.4/8.0, min/max reward 1.0/284.0


 83%|████████▎ | 103900/125000 [1:12:56<14:35, 24.09it/s]

finished frames 8312000, mean/median reward 19.8/8.0, min/max reward 4.0/152.0


 83%|████████▎ | 103999/125000 [1:13:00<14:39, 23.87it/s]

finished frames 8320000, mean/median reward 47.4/9.0, min/max reward 1.0/291.0


 83%|████████▎ | 104098/125000 [1:13:04<14:22, 24.24it/s]

finished frames 8328000, mean/median reward 40.1/7.0, min/max reward 0.0/343.0


 83%|████████▎ | 104200/125000 [1:13:08<14:06, 24.56it/s]

finished frames 8336000, mean/median reward 46.9/15.0, min/max reward 0.0/356.0


 83%|████████▎ | 104299/125000 [1:13:13<14:27, 23.86it/s]

finished frames 8344000, mean/median reward 34.0/18.0, min/max reward 0.0/136.0


 84%|████████▎ | 104398/125000 [1:13:17<14:11, 24.19it/s]

finished frames 8352000, mean/median reward 35.6/13.0, min/max reward 1.0/136.0


 84%|████████▎ | 104500/125000 [1:13:21<14:06, 24.21it/s]

finished frames 8360000, mean/median reward 33.8/12.0, min/max reward 1.0/215.0


 84%|████████▎ | 104599/125000 [1:13:25<14:03, 24.18it/s]

finished frames 8368000, mean/median reward 32.4/29.0, min/max reward 0.0/81.0


 84%|████████▍ | 104698/125000 [1:13:29<14:07, 23.96it/s]

finished frames 8376000, mean/median reward 79.7/22.0, min/max reward 1.0/304.0


 84%|████████▍ | 104800/125000 [1:13:34<14:08, 23.80it/s]

finished frames 8384000, mean/median reward 16.6/13.0, min/max reward 0.0/57.0


 84%|████████▍ | 104899/125000 [1:13:38<13:55, 24.06it/s]

finished frames 8392000, mean/median reward 31.2/26.0, min/max reward 2.0/150.0


 84%|████████▍ | 104998/125000 [1:13:42<13:55, 23.95it/s]

finished frames 8400000, mean/median reward 30.8/26.0, min/max reward 2.0/79.0


 84%|████████▍ | 105100/125000 [1:13:46<13:39, 24.27it/s]

finished frames 8408000, mean/median reward 91.4/25.0, min/max reward 4.0/316.0


 84%|████████▍ | 105199/125000 [1:13:50<13:35, 24.28it/s]

finished frames 8416000, mean/median reward 34.9/16.0, min/max reward 0.0/189.0


 84%|████████▍ | 105298/125000 [1:13:54<13:34, 24.20it/s]

finished frames 8424000, mean/median reward 41.2/16.0, min/max reward 0.0/178.0


 84%|████████▍ | 105400/125000 [1:13:59<13:31, 24.15it/s]

finished frames 8432000, mean/median reward 43.6/13.0, min/max reward 0.0/219.0


 84%|████████▍ | 105499/125000 [1:14:03<13:52, 23.43it/s]

finished frames 8440000, mean/median reward 57.2/5.0, min/max reward 0.0/330.0


 84%|████████▍ | 105598/125000 [1:14:07<13:41, 23.62it/s]

finished frames 8448000, mean/median reward 37.2/17.0, min/max reward 0.0/219.0


 85%|████████▍ | 105700/125000 [1:14:11<13:26, 23.94it/s]

finished frames 8456000, mean/median reward 49.8/10.0, min/max reward 0.0/328.0


 85%|████████▍ | 105799/125000 [1:14:15<13:18, 24.06it/s]

finished frames 8464000, mean/median reward 39.2/10.0, min/max reward 0.0/171.0


 85%|████████▍ | 105898/125000 [1:14:19<13:38, 23.34it/s]

finished frames 8472000, mean/median reward 38.6/17.0, min/max reward 0.0/171.0


 85%|████████▍ | 106000/125000 [1:14:24<13:18, 23.80it/s]

finished frames 8480000, mean/median reward 64.9/17.0, min/max reward 2.0/330.0


 85%|████████▍ | 106099/125000 [1:14:28<13:16, 23.74it/s]

finished frames 8488000, mean/median reward 38.0/13.0, min/max reward 1.0/330.0


 85%|████████▍ | 106198/125000 [1:14:32<13:12, 23.72it/s]

finished frames 8496000, mean/median reward 65.6/13.0, min/max reward 2.0/330.0


 85%|████████▌ | 106300/125000 [1:14:36<12:53, 24.16it/s]

finished frames 8504000, mean/median reward 82.8/16.0, min/max reward 0.0/346.0


 85%|████████▌ | 106399/125000 [1:14:40<12:49, 24.16it/s]

finished frames 8512000, mean/median reward 46.4/10.0, min/max reward 0.0/237.0


 85%|████████▌ | 106498/125000 [1:14:45<12:53, 23.92it/s]

finished frames 8520000, mean/median reward 36.4/11.0, min/max reward 1.0/262.0


 85%|████████▌ | 106600/125000 [1:14:49<13:03, 23.48it/s]

finished frames 8528000, mean/median reward 23.6/8.0, min/max reward 0.0/159.0


 85%|████████▌ | 106699/125000 [1:14:53<12:42, 24.00it/s]

finished frames 8536000, mean/median reward 28.1/9.0, min/max reward 0.0/127.0


 85%|████████▌ | 106798/125000 [1:14:57<12:35, 24.08it/s]

finished frames 8544000, mean/median reward 24.8/13.0, min/max reward 3.0/93.0


 86%|████████▌ | 106900/125000 [1:15:01<12:26, 24.25it/s]

finished frames 8552000, mean/median reward 33.0/11.0, min/max reward 2.0/235.0


 86%|████████▌ | 106999/125000 [1:15:05<12:42, 23.61it/s]

finished frames 8560000, mean/median reward 50.2/12.0, min/max reward 0.0/370.0


 86%|████████▌ | 107098/125000 [1:15:10<12:30, 23.85it/s]

finished frames 8568000, mean/median reward 13.1/4.0, min/max reward 0.0/84.0


 86%|████████▌ | 107200/125000 [1:15:14<12:20, 24.03it/s]

finished frames 8576000, mean/median reward 78.2/39.0, min/max reward 4.0/242.0


 86%|████████▌ | 107299/125000 [1:15:18<12:25, 23.75it/s]

finished frames 8584000, mean/median reward 39.3/22.0, min/max reward 2.0/242.0


 86%|████████▌ | 107398/125000 [1:15:22<12:12, 24.03it/s]

finished frames 8592000, mean/median reward 35.8/8.0, min/max reward 0.0/242.0


 86%|████████▌ | 107500/125000 [1:15:26<12:13, 23.87it/s]

finished frames 8600000, mean/median reward 67.7/19.0, min/max reward 0.0/365.0


 86%|████████▌ | 107599/125000 [1:15:31<12:15, 23.67it/s]

finished frames 8608000, mean/median reward 58.0/18.0, min/max reward 0.0/288.0


 86%|████████▌ | 107698/125000 [1:15:35<12:08, 23.75it/s]

finished frames 8616000, mean/median reward 48.4/18.0, min/max reward 2.0/242.0


 86%|████████▌ | 107800/125000 [1:15:39<12:18, 23.28it/s]

finished frames 8624000, mean/median reward 50.9/16.0, min/max reward 0.0/272.0


 86%|████████▋ | 107899/125000 [1:15:43<11:59, 23.78it/s]

finished frames 8632000, mean/median reward 34.5/14.0, min/max reward 0.0/272.0


 86%|████████▋ | 107998/125000 [1:15:47<11:53, 23.83it/s]

finished frames 8640000, mean/median reward 27.8/5.0, min/max reward 0.0/327.0


 86%|████████▋ | 108100/125000 [1:15:52<11:54, 23.64it/s]

finished frames 8648000, mean/median reward 7.8/4.0, min/max reward 0.0/23.0


 87%|████████▋ | 108199/125000 [1:15:56<11:49, 23.68it/s]

finished frames 8656000, mean/median reward 19.3/9.0, min/max reward 1.0/109.0


 87%|████████▋ | 108298/125000 [1:16:00<11:37, 23.94it/s]

finished frames 8664000, mean/median reward 53.9/10.0, min/max reward 2.0/274.0


 87%|████████▋ | 108400/125000 [1:16:04<11:25, 24.23it/s]

finished frames 8672000, mean/median reward 58.8/42.0, min/max reward 3.0/314.0


 87%|████████▋ | 108499/125000 [1:16:08<11:18, 24.31it/s]

finished frames 8680000, mean/median reward 79.6/25.0, min/max reward 4.0/315.0


 87%|████████▋ | 108598/125000 [1:16:12<11:10, 24.44it/s]

finished frames 8688000, mean/median reward 50.0/15.0, min/max reward 0.0/287.0


 87%|████████▋ | 108700/125000 [1:16:17<11:12, 24.25it/s]

finished frames 8696000, mean/median reward 75.6/36.0, min/max reward 2.0/276.0


 87%|████████▋ | 108799/125000 [1:16:21<11:09, 24.19it/s]

finished frames 8704000, mean/median reward 72.9/16.0, min/max reward 1.0/276.0


 87%|████████▋ | 108898/125000 [1:16:25<11:07, 24.11it/s]

finished frames 8712000, mean/median reward 60.5/18.0, min/max reward 0.0/356.0


 87%|████████▋ | 109000/125000 [1:16:29<11:03, 24.13it/s]

finished frames 8720000, mean/median reward 52.4/12.0, min/max reward 0.0/309.0


 87%|████████▋ | 109099/125000 [1:16:33<10:57, 24.20it/s]

finished frames 8728000, mean/median reward 74.6/36.0, min/max reward 4.0/278.0


 87%|████████▋ | 109198/125000 [1:16:37<10:53, 24.18it/s]

finished frames 8736000, mean/median reward 63.3/8.0, min/max reward 0.0/313.0


 87%|████████▋ | 109300/125000 [1:16:42<10:54, 23.98it/s]

finished frames 8744000, mean/median reward 56.0/12.0, min/max reward 3.0/206.0


 88%|████████▊ | 109399/125000 [1:16:46<10:42, 24.29it/s]

finished frames 8752000, mean/median reward 72.5/45.0, min/max reward 3.0/270.0


 88%|████████▊ | 109498/125000 [1:16:50<10:44, 24.05it/s]

finished frames 8760000, mean/median reward 63.7/23.0, min/max reward 1.0/254.0


 88%|████████▊ | 109600/125000 [1:16:54<10:31, 24.38it/s]

finished frames 8768000, mean/median reward 38.4/14.0, min/max reward 3.0/167.0


 88%|████████▊ | 109699/125000 [1:16:58<10:45, 23.70it/s]

finished frames 8776000, mean/median reward 40.8/16.0, min/max reward 2.0/167.0


 88%|████████▊ | 109798/125000 [1:17:02<10:34, 23.98it/s]

finished frames 8784000, mean/median reward 33.1/7.0, min/max reward 2.0/154.0


 88%|████████▊ | 109900/125000 [1:17:07<10:17, 24.45it/s]

finished frames 8792000, mean/median reward 52.9/16.0, min/max reward 3.0/220.0


 88%|████████▊ | 109999/125000 [1:17:11<10:21, 24.14it/s]

finished frames 8800000, mean/median reward 65.5/17.0, min/max reward 0.0/305.0


 88%|████████▊ | 110098/125000 [1:17:15<10:48, 22.98it/s]

finished frames 8808000, mean/median reward 40.9/18.0, min/max reward 0.0/277.0


 88%|████████▊ | 110200/125000 [1:17:19<10:15, 24.04it/s]

finished frames 8816000, mean/median reward 56.9/16.0, min/max reward 0.0/277.0


 88%|████████▊ | 110299/125000 [1:17:23<10:17, 23.81it/s]

finished frames 8824000, mean/median reward 45.4/8.0, min/max reward 0.0/277.0


 88%|████████▊ | 110398/125000 [1:17:28<10:18, 23.60it/s]

finished frames 8832000, mean/median reward 39.4/13.0, min/max reward 1.0/202.0


 88%|████████▊ | 110500/125000 [1:17:32<09:59, 24.18it/s]

finished frames 8840000, mean/median reward 86.2/33.0, min/max reward 2.0/331.0


 88%|████████▊ | 110599/125000 [1:17:36<09:54, 24.22it/s]

finished frames 8848000, mean/median reward 22.2/18.0, min/max reward 2.0/68.0


 89%|████████▊ | 110698/125000 [1:17:40<09:51, 24.18it/s]

finished frames 8856000, mean/median reward 24.9/10.0, min/max reward 2.0/84.0


 89%|████████▊ | 110800/125000 [1:17:44<09:44, 24.31it/s]

finished frames 8864000, mean/median reward 29.1/11.0, min/max reward 5.0/175.0


 89%|████████▊ | 110899/125000 [1:17:48<09:42, 24.20it/s]

finished frames 8872000, mean/median reward 53.6/15.0, min/max reward 2.0/313.0


 89%|████████▉ | 110998/125000 [1:17:53<09:45, 23.90it/s]

finished frames 8880000, mean/median reward 59.9/15.0, min/max reward 2.0/287.0


 89%|████████▉ | 111100/125000 [1:17:57<09:33, 24.26it/s]

finished frames 8888000, mean/median reward 92.2/36.0, min/max reward 8.0/287.0


 89%|████████▉ | 111199/125000 [1:18:01<09:22, 24.52it/s]

finished frames 8896000, mean/median reward 72.6/37.0, min/max reward 1.0/277.0


 89%|████████▉ | 111298/125000 [1:18:05<09:26, 24.17it/s]

finished frames 8904000, mean/median reward 53.6/23.0, min/max reward 0.0/277.0


 89%|████████▉ | 111400/125000 [1:18:09<09:22, 24.16it/s]

finished frames 8912000, mean/median reward 65.7/21.0, min/max reward 1.0/227.0


 89%|████████▉ | 111499/125000 [1:18:13<09:25, 23.89it/s]

finished frames 8920000, mean/median reward 30.1/19.0, min/max reward 7.0/95.0


 89%|████████▉ | 111598/125000 [1:18:18<09:16, 24.06it/s]

finished frames 8928000, mean/median reward 48.9/22.0, min/max reward 3.0/256.0


 89%|████████▉ | 111700/125000 [1:18:22<09:09, 24.19it/s]

finished frames 8936000, mean/median reward 65.5/24.0, min/max reward 3.0/313.0


 89%|████████▉ | 111799/125000 [1:18:26<09:05, 24.21it/s]

finished frames 8944000, mean/median reward 73.7/20.0, min/max reward 0.0/377.0


 90%|████████▉ | 111898/125000 [1:18:30<09:08, 23.90it/s]

finished frames 8952000, mean/median reward 62.4/19.0, min/max reward 0.0/377.0


 90%|████████▉ | 112000/125000 [1:18:34<08:58, 24.16it/s]

finished frames 8960000, mean/median reward 54.7/19.0, min/max reward 0.0/377.0


 90%|████████▉ | 112099/125000 [1:18:38<09:08, 23.50it/s]

finished frames 8968000, mean/median reward 50.2/15.0, min/max reward 1.0/329.0


 90%|████████▉ | 112198/125000 [1:18:43<08:45, 24.36it/s]

finished frames 8976000, mean/median reward 69.6/24.0, min/max reward 6.0/329.0


 90%|████████▉ | 112300/125000 [1:18:47<08:48, 24.03it/s]

finished frames 8984000, mean/median reward 63.6/25.0, min/max reward 0.0/302.0


 90%|████████▉ | 112399/125000 [1:18:51<08:58, 23.40it/s]

finished frames 8992000, mean/median reward 69.1/23.0, min/max reward 0.0/319.0


 90%|████████▉ | 112498/125000 [1:18:55<08:46, 23.73it/s]

finished frames 9000000, mean/median reward 32.7/5.0, min/max reward 0.0/281.0


 90%|█████████ | 112600/125000 [1:18:59<08:39, 23.88it/s]

finished frames 9008000, mean/median reward 52.1/27.0, min/max reward 0.0/281.0


 90%|█████████ | 112699/125000 [1:19:03<08:33, 23.95it/s]

finished frames 9016000, mean/median reward 64.2/13.0, min/max reward 0.0/302.0


 90%|█████████ | 112798/125000 [1:19:08<08:35, 23.66it/s]

finished frames 9024000, mean/median reward 36.1/8.0, min/max reward 0.0/152.0


 90%|█████████ | 112900/125000 [1:19:12<08:28, 23.78it/s]

finished frames 9032000, mean/median reward 57.2/9.0, min/max reward 0.0/300.0


 90%|█████████ | 112999/125000 [1:19:16<08:22, 23.88it/s]

finished frames 9040000, mean/median reward 54.7/28.0, min/max reward 0.0/253.0


 90%|█████████ | 113098/125000 [1:19:20<08:21, 23.75it/s]

finished frames 9048000, mean/median reward 84.3/34.0, min/max reward 4.0/337.0


 91%|█████████ | 113200/125000 [1:19:25<08:19, 23.64it/s]

finished frames 9056000, mean/median reward 52.0/22.0, min/max reward 0.0/322.0


 91%|█████████ | 113299/125000 [1:19:29<08:08, 23.96it/s]

finished frames 9064000, mean/median reward 31.8/20.0, min/max reward 0.0/139.0


 91%|█████████ | 113398/125000 [1:19:33<08:11, 23.59it/s]

finished frames 9072000, mean/median reward 43.1/36.0, min/max reward 0.0/153.0


 91%|█████████ | 113500/125000 [1:19:37<08:05, 23.71it/s]

finished frames 9080000, mean/median reward 33.3/19.0, min/max reward 1.0/153.0


 91%|█████████ | 113599/125000 [1:19:41<08:03, 23.60it/s]

finished frames 9088000, mean/median reward 66.6/37.0, min/max reward 3.0/207.0


 91%|█████████ | 113698/125000 [1:19:45<07:56, 23.74it/s]

finished frames 9096000, mean/median reward 40.6/7.0, min/max reward 0.0/268.0


 91%|█████████ | 113800/125000 [1:19:50<07:44, 24.11it/s]

finished frames 9104000, mean/median reward 60.9/15.0, min/max reward 0.0/275.0


 91%|█████████ | 113899/125000 [1:19:54<07:37, 24.29it/s]

finished frames 9112000, mean/median reward 45.3/15.0, min/max reward 0.0/275.0


 91%|█████████ | 113998/125000 [1:19:58<07:34, 24.20it/s]

finished frames 9120000, mean/median reward 90.0/37.0, min/max reward 1.0/311.0


 91%|█████████▏| 114100/125000 [1:20:02<07:27, 24.38it/s]

finished frames 9128000, mean/median reward 54.8/16.0, min/max reward 4.0/224.0


 91%|█████████▏| 114199/125000 [1:20:06<07:35, 23.69it/s]

finished frames 9136000, mean/median reward 40.6/15.0, min/max reward 0.0/224.0


 91%|█████████▏| 114298/125000 [1:20:10<07:18, 24.43it/s]

finished frames 9144000, mean/median reward 42.4/9.0, min/max reward 0.0/226.0


 92%|█████████▏| 114400/125000 [1:20:15<07:24, 23.87it/s]

finished frames 9152000, mean/median reward 36.6/4.0, min/max reward 0.0/241.0


 92%|█████████▏| 114499/125000 [1:20:19<07:13, 24.24it/s]

finished frames 9160000, mean/median reward 12.1/5.0, min/max reward 0.0/94.0


 92%|█████████▏| 114598/125000 [1:20:23<07:10, 24.15it/s]

finished frames 9168000, mean/median reward 63.8/15.0, min/max reward 0.0/271.0


 92%|█████████▏| 114700/125000 [1:20:27<07:21, 23.32it/s]

finished frames 9176000, mean/median reward 36.4/13.0, min/max reward 0.0/271.0


 92%|█████████▏| 114799/125000 [1:20:31<07:02, 24.14it/s]

finished frames 9184000, mean/median reward 46.6/19.0, min/max reward 0.0/312.0


 92%|█████████▏| 114898/125000 [1:20:35<07:02, 23.90it/s]

finished frames 9192000, mean/median reward 45.5/14.0, min/max reward 0.0/137.0


 92%|█████████▏| 115000/125000 [1:20:40<06:52, 24.22it/s]

finished frames 9200000, mean/median reward 44.8/26.0, min/max reward 0.0/144.0


 92%|█████████▏| 115099/125000 [1:20:44<07:01, 23.47it/s]

finished frames 9208000, mean/median reward 64.8/16.0, min/max reward 0.0/371.0


 92%|█████████▏| 115198/125000 [1:20:48<06:51, 23.84it/s]

finished frames 9216000, mean/median reward 17.6/10.0, min/max reward 0.0/111.0


 92%|█████████▏| 115300/125000 [1:20:52<06:46, 23.84it/s]

finished frames 9224000, mean/median reward 21.3/11.0, min/max reward 0.0/111.0


 92%|█████████▏| 115399/125000 [1:20:56<06:38, 24.10it/s]

finished frames 9232000, mean/median reward 42.9/11.0, min/max reward 0.0/235.0


 92%|█████████▏| 115498/125000 [1:21:01<06:39, 23.80it/s]

finished frames 9240000, mean/median reward 30.9/6.0, min/max reward 0.0/144.0


 92%|█████████▏| 115600/125000 [1:21:05<06:27, 24.28it/s]

finished frames 9248000, mean/median reward 22.1/6.0, min/max reward 0.0/129.0


 93%|█████████▎| 115699/125000 [1:21:09<06:23, 24.24it/s]

finished frames 9256000, mean/median reward 22.6/11.0, min/max reward 0.0/109.0


 93%|█████████▎| 115798/125000 [1:21:13<06:27, 23.78it/s]

finished frames 9264000, mean/median reward 62.2/25.0, min/max reward 0.0/328.0


 93%|█████████▎| 115900/125000 [1:21:17<06:18, 24.06it/s]

finished frames 9272000, mean/median reward 79.8/14.0, min/max reward 0.0/379.0


 93%|█████████▎| 115999/125000 [1:21:22<06:11, 24.21it/s]

finished frames 9280000, mean/median reward 30.2/9.0, min/max reward 0.0/141.0


 93%|█████████▎| 116098/125000 [1:21:26<06:13, 23.81it/s]

finished frames 9288000, mean/median reward 49.3/9.0, min/max reward 0.0/303.0


 93%|█████████▎| 116200/125000 [1:21:30<06:05, 24.09it/s]

finished frames 9296000, mean/median reward 44.9/11.0, min/max reward 0.0/303.0


 93%|█████████▎| 116299/125000 [1:21:34<05:57, 24.34it/s]

finished frames 9304000, mean/median reward 52.3/14.0, min/max reward 1.0/358.0


 93%|█████████▎| 116398/125000 [1:21:38<06:07, 23.44it/s]

finished frames 9312000, mean/median reward 63.0/30.0, min/max reward 2.0/242.0


 93%|█████████▎| 116500/125000 [1:21:43<05:56, 23.86it/s]

finished frames 9320000, mean/median reward 40.6/14.0, min/max reward 1.0/158.0


 93%|█████████▎| 116599/125000 [1:21:47<06:06, 22.94it/s]

finished frames 9328000, mean/median reward 28.6/6.0, min/max reward 0.0/125.0


 93%|█████████▎| 116698/125000 [1:21:51<05:48, 23.81it/s]

finished frames 9336000, mean/median reward 20.6/7.0, min/max reward 1.0/125.0


 93%|█████████▎| 116800/125000 [1:21:55<05:47, 23.58it/s]

finished frames 9344000, mean/median reward 76.1/31.0, min/max reward 3.0/378.0


 94%|█████████▎| 116899/125000 [1:22:00<05:37, 24.01it/s]

finished frames 9352000, mean/median reward 84.6/8.0, min/max reward 2.0/379.0


 94%|█████████▎| 116998/125000 [1:22:04<05:35, 23.88it/s]

finished frames 9360000, mean/median reward 42.4/8.0, min/max reward 0.0/191.0


 94%|█████████▎| 117100/125000 [1:22:08<05:31, 23.85it/s]

finished frames 9368000, mean/median reward 23.6/6.0, min/max reward 2.0/81.0


 94%|█████████▍| 117199/125000 [1:22:12<05:22, 24.18it/s]

finished frames 9376000, mean/median reward 34.6/12.0, min/max reward 2.0/138.0


 94%|█████████▍| 117298/125000 [1:22:16<05:29, 23.38it/s]

finished frames 9384000, mean/median reward 49.7/10.0, min/max reward 0.0/296.0


 94%|█████████▍| 117400/125000 [1:22:21<05:19, 23.78it/s]

finished frames 9392000, mean/median reward 3.1/2.0, min/max reward 0.0/9.0


 94%|█████████▍| 117499/125000 [1:22:25<05:11, 24.08it/s]

finished frames 9400000, mean/median reward 18.2/7.0, min/max reward 0.0/101.0


 94%|█████████▍| 117598/125000 [1:22:29<05:10, 23.82it/s]

finished frames 9408000, mean/median reward 14.4/7.0, min/max reward 0.0/101.0


 94%|█████████▍| 117700/125000 [1:22:33<05:06, 23.78it/s]

finished frames 9416000, mean/median reward 18.8/11.0, min/max reward 0.0/101.0


 94%|█████████▍| 117799/125000 [1:22:38<05:00, 23.96it/s]

finished frames 9424000, mean/median reward 46.5/11.0, min/max reward 0.0/410.0


 94%|█████████▍| 117898/125000 [1:22:42<04:53, 24.24it/s]

finished frames 9432000, mean/median reward 33.8/13.0, min/max reward 1.0/162.0


 94%|█████████▍| 118000/125000 [1:22:46<04:56, 23.64it/s]

finished frames 9440000, mean/median reward 46.1/10.0, min/max reward 2.0/303.0


 94%|█████████▍| 118099/125000 [1:22:50<04:55, 23.39it/s]

finished frames 9448000, mean/median reward 60.4/10.0, min/max reward 0.0/325.0


 95%|█████████▍| 118198/125000 [1:22:54<04:44, 23.91it/s]

finished frames 9456000, mean/median reward 39.1/16.0, min/max reward 4.0/177.0


 95%|█████████▍| 118300/125000 [1:22:59<04:38, 24.08it/s]

finished frames 9464000, mean/median reward 61.2/15.0, min/max reward 2.0/345.0


 95%|█████████▍| 118399/125000 [1:23:03<04:32, 24.19it/s]

finished frames 9472000, mean/median reward 56.8/39.0, min/max reward 3.0/226.0


 95%|█████████▍| 118498/125000 [1:23:07<04:44, 22.88it/s]

finished frames 9480000, mean/median reward 53.1/13.0, min/max reward 2.0/338.0


 95%|█████████▍| 118600/125000 [1:23:11<04:34, 23.35it/s]

finished frames 9488000, mean/median reward 60.9/16.0, min/max reward 4.0/279.0


 95%|█████████▍| 118699/125000 [1:23:15<04:23, 23.92it/s]

finished frames 9496000, mean/median reward 38.4/18.0, min/max reward 4.0/136.0


 95%|█████████▌| 118798/125000 [1:23:20<04:29, 22.98it/s]

finished frames 9504000, mean/median reward 38.4/11.0, min/max reward 4.0/177.0


 95%|█████████▌| 118900/125000 [1:23:24<04:17, 23.67it/s]

finished frames 9512000, mean/median reward 71.8/12.0, min/max reward 4.0/343.0


 95%|█████████▌| 118999/125000 [1:23:28<04:12, 23.75it/s]

finished frames 9520000, mean/median reward 65.8/30.0, min/max reward 2.0/315.0


 95%|█████████▌| 119098/125000 [1:23:32<04:09, 23.64it/s]

finished frames 9528000, mean/median reward 52.9/19.0, min/max reward 0.0/315.0


 95%|█████████▌| 119200/125000 [1:23:36<03:57, 24.42it/s]

finished frames 9536000, mean/median reward 19.6/23.0, min/max reward 0.0/52.0


 95%|█████████▌| 119299/125000 [1:23:41<03:56, 24.08it/s]

finished frames 9544000, mean/median reward 35.8/15.0, min/max reward 0.0/264.0


 96%|█████████▌| 119398/125000 [1:23:45<03:52, 24.09it/s]

finished frames 9552000, mean/median reward 15.2/3.0, min/max reward 0.0/61.0


 96%|█████████▌| 119500/125000 [1:23:49<03:48, 24.12it/s]

finished frames 9560000, mean/median reward 39.7/21.0, min/max reward 1.0/179.0


 96%|█████████▌| 119599/125000 [1:23:53<03:45, 24.00it/s]

finished frames 9568000, mean/median reward 69.8/53.0, min/max reward 7.0/237.0


 96%|█████████▌| 119698/125000 [1:23:57<03:43, 23.76it/s]

finished frames 9576000, mean/median reward 35.4/14.0, min/max reward 0.0/150.0


 96%|█████████▌| 119800/125000 [1:24:02<03:35, 24.15it/s]

finished frames 9584000, mean/median reward 48.5/14.0, min/max reward 0.0/325.0


 96%|█████████▌| 119899/125000 [1:24:06<03:32, 24.02it/s]

finished frames 9592000, mean/median reward 26.4/9.0, min/max reward 0.0/84.0


 96%|█████████▌| 119998/125000 [1:24:10<03:27, 24.15it/s]

finished frames 9600000, mean/median reward 62.7/38.0, min/max reward 1.0/266.0


 96%|█████████▌| 120100/125000 [1:24:14<03:28, 23.52it/s]

finished frames 9608000, mean/median reward 73.8/18.0, min/max reward 0.0/366.0


 96%|█████████▌| 120199/125000 [1:24:18<03:18, 24.13it/s]

finished frames 9616000, mean/median reward 21.8/12.0, min/max reward 0.0/114.0


 96%|█████████▌| 120298/125000 [1:24:22<03:15, 24.07it/s]

finished frames 9624000, mean/median reward 57.6/4.0, min/max reward 0.0/363.0


 96%|█████████▋| 120400/125000 [1:24:27<03:14, 23.66it/s]

finished frames 9632000, mean/median reward 49.9/16.0, min/max reward 0.0/299.0


 96%|█████████▋| 120499/125000 [1:24:31<03:06, 24.11it/s]

finished frames 9640000, mean/median reward 31.1/12.0, min/max reward 0.0/167.0


 96%|█████████▋| 120598/125000 [1:24:35<03:06, 23.60it/s]

finished frames 9648000, mean/median reward 17.2/10.0, min/max reward 0.0/61.0


 97%|█████████▋| 120700/125000 [1:24:39<02:58, 24.13it/s]

finished frames 9656000, mean/median reward 27.5/10.0, min/max reward 0.0/119.0


 97%|█████████▋| 120799/125000 [1:24:43<02:58, 23.51it/s]

finished frames 9664000, mean/median reward 59.1/12.0, min/max reward 1.0/314.0


 97%|█████████▋| 120898/125000 [1:24:48<02:50, 24.02it/s]

finished frames 9672000, mean/median reward 63.8/11.0, min/max reward 1.0/279.0


 97%|█████████▋| 121000/125000 [1:24:52<02:45, 24.10it/s]

finished frames 9680000, mean/median reward 17.3/6.0, min/max reward 0.0/154.0


 97%|█████████▋| 121099/125000 [1:24:56<02:42, 24.07it/s]

finished frames 9688000, mean/median reward 29.4/9.0, min/max reward 1.0/135.0


 97%|█████████▋| 121198/125000 [1:25:00<02:40, 23.66it/s]

finished frames 9696000, mean/median reward 48.6/11.0, min/max reward 0.0/333.0


 97%|█████████▋| 121300/125000 [1:25:04<02:32, 24.30it/s]

finished frames 9704000, mean/median reward 73.9/11.0, min/max reward 1.0/333.0


 97%|█████████▋| 121399/125000 [1:25:09<02:30, 23.88it/s]

finished frames 9712000, mean/median reward 90.2/11.0, min/max reward 1.0/333.0


 97%|█████████▋| 121498/125000 [1:25:13<02:25, 23.99it/s]

finished frames 9720000, mean/median reward 92.4/30.0, min/max reward 1.0/333.0


 97%|█████████▋| 121600/125000 [1:25:17<02:22, 23.94it/s]

finished frames 9728000, mean/median reward 82.1/11.0, min/max reward 0.0/333.0


 97%|█████████▋| 121699/125000 [1:25:21<02:18, 23.86it/s]

finished frames 9736000, mean/median reward 78.2/16.0, min/max reward 1.0/333.0


 97%|█████████▋| 121798/125000 [1:25:25<02:12, 24.11it/s]

finished frames 9744000, mean/median reward 75.2/19.0, min/max reward 0.0/333.0


 98%|█████████▊| 121900/125000 [1:25:30<02:10, 23.80it/s]

finished frames 9752000, mean/median reward 65.5/24.0, min/max reward 0.0/333.0


 98%|█████████▊| 121999/125000 [1:25:34<02:05, 23.92it/s]

finished frames 9760000, mean/median reward 47.4/14.0, min/max reward 0.0/333.0


 98%|█████████▊| 122098/125000 [1:25:38<02:01, 23.98it/s]

finished frames 9768000, mean/median reward 52.1/16.0, min/max reward 0.0/333.0


 98%|█████████▊| 122200/125000 [1:25:42<01:56, 24.05it/s]

finished frames 9776000, mean/median reward 56.1/10.0, min/max reward 0.0/333.0


 98%|█████████▊| 122299/125000 [1:25:46<01:52, 24.02it/s]

finished frames 9784000, mean/median reward 76.1/14.0, min/max reward 0.0/333.0


 98%|█████████▊| 122398/125000 [1:25:50<01:46, 24.42it/s]

finished frames 9792000, mean/median reward 46.9/14.0, min/max reward 0.0/206.0


 98%|█████████▊| 122500/125000 [1:25:55<01:43, 24.12it/s]

finished frames 9800000, mean/median reward 25.4/11.0, min/max reward 1.0/103.0


 98%|█████████▊| 122599/125000 [1:25:59<01:39, 24.15it/s]

finished frames 9808000, mean/median reward 45.0/16.0, min/max reward 0.0/171.0


 98%|█████████▊| 122698/125000 [1:26:03<01:37, 23.50it/s]

finished frames 9816000, mean/median reward 54.7/12.0, min/max reward 2.0/274.0


 98%|█████████▊| 122800/125000 [1:26:07<01:31, 24.12it/s]

finished frames 9824000, mean/median reward 38.7/4.0, min/max reward 2.0/188.0


 98%|█████████▊| 122899/125000 [1:26:11<01:28, 23.66it/s]

finished frames 9832000, mean/median reward 33.9/12.0, min/max reward 0.0/131.0


 98%|█████████▊| 122998/125000 [1:26:15<01:24, 23.60it/s]

finished frames 9840000, mean/median reward 18.0/12.0, min/max reward 0.0/64.0


 98%|█████████▊| 123100/125000 [1:26:20<01:19, 23.84it/s]

finished frames 9848000, mean/median reward 52.0/24.0, min/max reward 3.0/282.0


 99%|█████████▊| 123199/125000 [1:26:24<01:14, 24.25it/s]

finished frames 9856000, mean/median reward 51.7/31.0, min/max reward 0.0/295.0


 99%|█████████▊| 123298/125000 [1:26:28<01:12, 23.48it/s]

finished frames 9864000, mean/median reward 30.7/13.0, min/max reward 0.0/119.0


 99%|█████████▊| 123400/125000 [1:26:32<01:05, 24.31it/s]

finished frames 9872000, mean/median reward 45.4/11.0, min/max reward 0.0/288.0


 99%|█████████▉| 123499/125000 [1:26:36<01:02, 24.15it/s]

finished frames 9880000, mean/median reward 56.8/16.0, min/max reward 0.0/290.0


 99%|█████████▉| 123598/125000 [1:26:41<00:58, 23.88it/s]

finished frames 9888000, mean/median reward 47.8/16.0, min/max reward 0.0/245.0


 99%|█████████▉| 123700/125000 [1:26:45<00:55, 23.46it/s]

finished frames 9896000, mean/median reward 46.8/17.0, min/max reward 0.0/189.0


 99%|█████████▉| 123799/125000 [1:26:49<00:50, 23.69it/s]

finished frames 9904000, mean/median reward 61.8/36.0, min/max reward 0.0/246.0


 99%|█████████▉| 123898/125000 [1:26:53<00:45, 24.35it/s]

finished frames 9912000, mean/median reward 61.5/19.0, min/max reward 0.0/349.0


 99%|█████████▉| 124000/125000 [1:26:57<00:41, 23.97it/s]

finished frames 9920000, mean/median reward 45.0/13.0, min/max reward 0.0/228.0


 99%|█████████▉| 124099/125000 [1:27:02<00:38, 23.15it/s]

finished frames 9928000, mean/median reward 21.6/11.0, min/max reward 4.0/107.0


 99%|█████████▉| 124198/125000 [1:27:06<00:33, 23.76it/s]

finished frames 9936000, mean/median reward 47.8/16.0, min/max reward 2.0/282.0


 99%|█████████▉| 124300/125000 [1:27:10<00:28, 24.34it/s]

finished frames 9944000, mean/median reward 34.8/10.0, min/max reward 0.0/317.0


100%|█████████▉| 124399/125000 [1:27:14<00:25, 23.91it/s]

finished frames 9952000, mean/median reward 37.9/13.0, min/max reward 0.0/294.0


100%|█████████▉| 124498/125000 [1:27:18<00:20, 24.43it/s]

finished frames 9960000, mean/median reward 55.0/11.0, min/max reward 0.0/320.0


100%|█████████▉| 124600/125000 [1:27:23<00:17, 23.20it/s]

finished frames 9968000, mean/median reward 50.7/11.0, min/max reward 0.0/208.0


100%|█████████▉| 124699/125000 [1:27:27<00:12, 23.76it/s]

finished frames 9976000, mean/median reward 68.2/33.0, min/max reward 6.0/333.0


100%|█████████▉| 124798/125000 [1:27:31<00:08, 23.53it/s]

finished frames 9984000, mean/median reward 65.8/28.0, min/max reward 6.0/333.0


100%|█████████▉| 124900/125000 [1:27:35<00:04, 23.68it/s]

finished frames 9992000, mean/median reward 48.8/25.0, min/max reward 5.0/279.0


100%|██████████| 125000/125000 [1:27:40<00:00, 23.76it/s]
