## 7장 벽돌깨기 게임 학습 프로그램

In [1]:
# 구현에 사용할 패키지 임포트
import numpy as np
from collections import deque
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import gym
from gym import spaces
from gym.spaces.box import Box


In [2]:
# 실행환경 설정
# 참고：https://github.com/openai/baselines/blob/master/baselines/common/atari_wrappers.py

import cv2
cv2.ocl.setUseOpenCL(False)


class NoopResetEnv(gym.Wrapper):
    def __init__(self, env, noop_max=30):
        '''첫 번째 트릭 No-Operation. 초기화 후 일정 단계에 이를때까지 아무 행동도 하지않고
        게임 초기 상태를 다양하게 하여 특정 시작 상태만 학습하는 것을 방지한다'''

        gym.Wrapper.__init__(self, env)
        self.noop_max = noop_max
        self.override_num_noops = None
        self.noop_action = 0
        assert env.unwrapped.get_action_meanings()[0] == 'NOOP'

    def reset(self, **kwargs):
        """ Do no-op action for a number of steps in [1, noop_max]."""
        self.env.reset(**kwargs)
        if self.override_num_noops is not None:
            noops = self.override_num_noops
        else:
            noops = self.unwrapped.np_random.randint(
                1, self.noop_max + 1)  # pylint: disable=E1101
        assert noops > 0
        obs = None
        for _ in range(noops):
            obs, _, done, _ = self.env.step(self.noop_action)
            if done:
                obs = self.env.reset(**kwargs)
        return obs

    def step(self, ac):
        return self.env.step(ac)


class EpisodicLifeEnv(gym.Wrapper):
    def __init__(self, env):
        '''두 번째 트릭 Episodic Life. 한번 실패를 게임 종료로 간주하나, 다음 게임을 같은 블록 상태로 시작'''
        gym.Wrapper.__init__(self, env)
        self.lives = 0
        self.was_real_done = True

    def step(self, action):
        obs, reward, done, info = self.env.step(action)
        self.was_real_done = done
        # check current lives, make loss of life terminal,
        # then update lives to handle bonus lives
        lives = self.env.unwrapped.ale.lives()
        if lives < self.lives and lives > 0:
            # for Qbert sometimes we stay in lives == 0 condtion for a few frames
            # so its important to keep lives > 0, so that we only reset once
            # the environment advertises done.
            done = True
        self.lives = lives
        return obs, reward, done, info

    def reset(self, **kwargs):
        '''5번 실패하면 게임을 완전히 다시 시작'''
        if self.was_real_done:
            obs = self.env.reset(**kwargs)
        else:
            # no-op step to advance from terminal/lost life state
            obs, _, _, _ = self.env.step(0)
        self.lives = self.env.unwrapped.ale.lives()
        return obs


class MaxAndSkipEnv(gym.Wrapper):
    def __init__(self, env, skip=4):
        '''세 번째 트릭 Max and Skip. 4프레임 동안 같은 행동을 지속하되, 3번째와 4번째 프레임의 최댓값 이미지를 관측 obs로 삼는다'''
        gym.Wrapper.__init__(self, env)
        # most recent raw observations (for max pooling across time steps)
        self._obs_buffer = np.zeros(
            (2,)+env.observation_space.shape, dtype=np.uint8)
        self._skip = skip

    def step(self, action):
        """Repeat action, sum reward, and max over last observations."""
        total_reward = 0.0
        done = None
        for i in range(self._skip):
            obs, reward, done, info = self.env.step(action)
            if i == self._skip - 2:
                self._obs_buffer[0] = obs
            if i == self._skip - 1:
                self._obs_buffer[1] = obs
            total_reward += reward
            if done:
                break
        # Note that the observation on the done=True frame
        # doesn't matter
        max_frame = self._obs_buffer.max(axis=0)

        return max_frame, total_reward, done, info

    def reset(self, **kwargs):
        return self.env.reset(**kwargs)


class WarpFrame(gym.ObservationWrapper):
    def __init__(self, env):
        '''네 번째 트릭 Warp frame. DQN 네이처 논문 구현과 같이 84*84 흑백 이미지를 사용'''
        gym.ObservationWrapper.__init__(self, env)
        self.width = 84
        self.height = 84
        self.observation_space = spaces.Box(low=0, high=255,
                                            shape=(self.height, self.width, 1), dtype=np.uint8)

    def observation(self, frame):
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
        frame = cv2.resize(frame, (self.width, self.height),
                           interpolation=cv2.INTER_AREA)
        return frame[:, :, None]


class WrapPyTorch(gym.ObservationWrapper):
    def __init__(self, env=None):
        '''인덱스 순서를 파이토치 미니배치와 같이 조정하는 래퍼'''
        super(WrapPyTorch, self).__init__(env)
        obs_shape = self.observation_space.shape
        self.observation_space = Box(
            self.observation_space.low[0, 0, 0],
            self.observation_space.high[0, 0, 0],
            [obs_shape[2], obs_shape[1], obs_shape[0]],
            dtype=self.observation_space.dtype)

    def observation(self, observation):
        return observation.transpose(2, 0, 1)


In [3]:
# 실행환경 생성 함수

# 병렬 실행환경
from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv


def make_env(env_id, seed, rank):
    def _thunk():
        '''멀티 프로세스로 동작하는 환경 SubprocVecEnv를 실행하기 위해 필요하다'''

        env = gym.make(env_id)
        env = NoopResetEnv(env, noop_max=30)
        env = MaxAndSkipEnv(env, skip=4)
        env.seed(seed + rank)  # 난수 시드 설정
        env = EpisodicLifeEnv(env)
        env = WarpFrame(env)
        env = WrapPyTorch(env)

        return env

    return _thunk


In [4]:
# 상수 정의

ENV_NAME = 'BreakoutNoFrameskip-v4' 
# Breakout-v0 대신 BreakoutNoFrameskip-v4을 사용
# v0은 2~4개 프레임을 자동으로 생략하므로 이 기능이 없는 버전을 사용한다
# 참고 URL https://becominghuman.ai/lets-build-an-atari-ai-part-1-dqn-df57e8ff3b26
# https://github.com/openai/gym/blob/5cb12296274020db9bb6378ce54276b31e7002da/gym/envs/__init__.py#L371
    
NUM_SKIP_FRAME = 4 # 생략할 프레임 수
NUM_STACK_FRAME = 4  # 하나의 상태로 사용할 프레임의 수
NOOP_MAX = 30  #  초기화 후 No-operation을 적용할 최초 프레임 수의 최댓값
NUM_PROCESSES = 16 #  병렬로 실행할 프로세스 수
NUM_ADVANCED_STEP = 5  # Advanced 학습할 단계 수
GAMMA = 0.99  # 시간할인율

TOTAL_FRAMES=10e6  #  학습에 사용하는 총 프레임 수
NUM_UPDATES = int(TOTAL_FRAMES / NUM_ADVANCED_STEP / NUM_PROCESSES)  # 신경망 수정 총 횟수
# NUM_UPDATES는 약 125,000이 됨


In [5]:
# A2C 손실함수를 계산하기 위한 상수
value_loss_coef = 0.5
entropy_coef = 0.01
max_grad_norm = 0.5

# 최적회 기법 RMSprop에 대한 설정
lr = 7e-4
eps = 1e-5
alpha = 0.99


In [6]:
# GPU 사용 설정
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)


cuda


In [7]:
# 메모리 클래스 정의


class RolloutStorage(object):
    '''Advantage 학습에 사용하는 메모리 클래스'''

    def __init__(self, num_steps, num_processes, obs_shape):

        self.observations = torch.zeros(
            num_steps + 1, num_processes, *obs_shape).to(device)
        # *로 리스트의 요소를 풀어낸다(unpack)
        # obs_shape→(4,84,84)
        # *obs_shape→ 4 84 84

        self.masks = torch.ones(num_steps + 1, num_processes, 1).to(device)
        self.rewards = torch.zeros(num_steps, num_processes, 1).to(device)
        self.actions = torch.zeros(
            num_steps, num_processes, 1).long().to(device)

        # 할인 총보상을 저장
        self.returns = torch.zeros(num_steps + 1, num_processes, 1).to(device)
        self.index = 0  # 저장할 인덱스

    def insert(self, current_obs, action, reward, mask):
        '''인덱스가 가리키는 다음 자리에 transition을 저장'''
        self.observations[self.index + 1].copy_(current_obs)
        self.masks[self.index + 1].copy_(mask)
        self.rewards[self.index].copy_(reward)
        self.actions[self.index].copy_(action)

        self.index = (self.index + 1) % NUM_ADVANCED_STEP  # 인덱스 업데이트

    def after_update(self):
        '''Advantage 학습 단계 수만큼 단계가 진행되면 가장 최근 단계를 index0에 저장'''
        self.observations[0].copy_(self.observations[-1])
        self.masks[0].copy_(self.masks[-1])

    def compute_returns(self, next_value):
        '''Advantage 학습 단계에 들어가는 각 단계에 대해 할인 총보상을 계산'''

        # 주의 : 5번째 단계부터 거슬러 올라가며 계산
        # 주의 : 5번째 단계가 Advantage1, 4번째 단계가 Advantage2가 되는 식임
        self.returns[-1] = next_value
        for ad_step in reversed(range(self.rewards.size(0))):
            self.returns[ad_step] = self.returns[ad_step + 1] * \
                GAMMA * self.masks[ad_step + 1] + self.rewards[ad_step]


In [8]:
# A2C 신경망 구성


def init(module, gain):
    '''결합 가중치를 초기화하는 함수'''
    nn.init.orthogonal_(module.weight.data, gain=gain)
    nn.init.constant_(module.bias.data, 0)
    return module


class Flatten(nn.Module):
    '''합성곱층의 출력 이미지를 1차원으로 변환하는 층'''

    def forward(self, x):
        return x.view(x.size(0), -1)


class Net(nn.Module):
    def __init__(self, n_out):
        super(Net, self).__init__()

        # 결합 가중치 초기화 함수
        def init_(module): return init(
            module, gain=nn.init.calculate_gain('relu'))

        # 합성곱층을 정의
        self.conv = nn.Sequential(
            # 이미지 크기의 변화 (84*84 -> 20*20)
            init_(nn.Conv2d(NUM_STACK_FRAME, 32, kernel_size=8, stride=4)),
            # 프레임 4개를 합치므로 input=NUM_STACK_FRAME=4가 된다. 출력은 32이다.
            # size 계산  size = (Input_size - Kernel_size + 2*Padding_size)/ Stride_size + 1

            nn.ReLU(),
            # 이미지 크기의 변화 (20*20 -> 9*9)
            init_(nn.Conv2d(32, 64, kernel_size=4, stride=2)),
            nn.ReLU(),
            init_(nn.Conv2d(64, 64, kernel_size=3, stride=1)),  # 이미지 크기의 변화(9*9 -> 7*7)
            nn.ReLU(),
            Flatten(),  # 이미지를 1차원으로 변환
            init_(nn.Linear(64 * 7 * 7, 512)),  # 7*7 이미지 64개를 512차원으로 변환
            nn.ReLU()
        )

        # 결합 가중치 초기화 함수
        def init_(module): return init(module, gain=1.0)

        # Critic을 정의
        self.critic = init_(nn.Linear(512, 1))  # 출력은 상태가치이므로 1개

        # 결합 가중치 초기화 함수
        def init_(module): return init(module, gain=0.01)

        # Actor를 정의
        self.actor = init_(nn.Linear(512, n_out))  # 출력이 행동이므로 출력 수는 행동의 가짓수
        
        # 신경망을 학습 모드로 전환
        self.train()

    def forward(self, x):
        '''신경망의 순전파 계산 정의'''
        input = x / 255.0  # 이미지의 픽셀값을 [0,255]에서 [0,1] 구간으로 정규화
        conv_output = self.conv(input)  # 합성곱층 계산
        critic_output = self.critic(conv_output)  # 상태가치 출력 계산
        actor_output = self.actor(conv_output)  # 행동 출력 계산

        return critic_output, actor_output

    def act(self, x):
        '''상태 x일때 취할 확률을 확률적으로 구함'''
        value, actor_output = self(x)
        probs = F.softmax(actor_output, dim=1)    # dim=1で行動の種類方向に計算
        action = probs.multinomial(num_samples=1)

        return action

    def get_value(self, x):
        '''상태 x의 상태가치를 구함'''
        value, actor_output = self(x)

        return value

    def evaluate_actions(self, x, actions):
        '''상태 x의 상태가치, 실제 행동 actions의 로그 확률, 엔트로피를 구함'''
        value, actor_output = self(x)

        log_probs = F.log_softmax(actor_output, dim=1)  # dim=1이므로 행동의 종류 방향으로 계산
        action_log_probs = log_probs.gather(1, actions)  # 실제 행동에 대한 log_probs 계산

        probs = F.softmax(actor_output, dim=1)  # dim=1이므로 행동의 종류 방향으로 계산
        dist_entropy = -(log_probs * probs).sum(-1).mean()

        return value, action_log_probs, dist_entropy


In [9]:
# 에이전트의 두뇌 역할을 하는 클래스로, 모든 에이전트가 공유한다


class Brain(object):
    def __init__(self, actor_critic):

        self.actor_critic = actor_critic  # actor_critic은 Net클래스로 구현한 신경망이다

        # 이미 학습된 결합 가중치를 로드하려면
        # filename = 'weight.pth'
        # param = torch.load(filename, map_location='cpu')
        # self.actor_critic.load_state_dict(param)

        # 가중치를 학습하는 최적화 알고리즘 설정
        self.optimizer = optim.RMSprop(
            actor_critic.parameters(), lr=lr, eps=eps, alpha=alpha)

    def update(self, rollouts):
        '''advanced 학습 대상 5단계를 모두 사용하여 수정한다'''
        obs_shape = rollouts.observations.size()[2:]  # torch.Size([4, 84, 84])
        num_steps = NUM_ADVANCED_STEP
        num_processes = NUM_PROCESSES

        values, action_log_probs, dist_entropy = self.actor_critic.evaluate_actions(
            rollouts.observations[:-1].view(-1, *obs_shape),
            rollouts.actions.view(-1, 1))

        # 각 변수의 크기에 주의할 것
        # rollouts.observations[:-1].view(-1, *obs_shape) torch.Size([80, 4, 84, 84])
        # rollouts.actions.view(-1, 1) torch.Size([80, 1])
        # values torch.Size([80, 1])
        # action_log_probs torch.Size([80, 1])
        # dist_entropy torch.Size([])

        values = values.view(num_steps, num_processes,
                             1)  # torch.Size([5, 16, 1])
        action_log_probs = action_log_probs.view(num_steps, num_processes, 1)

        advantages = rollouts.returns[:-1] - values  # torch.Size([5, 16, 1])
        value_loss = advantages.pow(2).mean()

        action_gain = (advantages.detach() * action_log_probs).mean()
        # advantages는 detach 하여 정수로 취급한다

        total_loss = (value_loss * value_loss_coef -
                      action_gain - dist_entropy * entropy_coef)

        self.optimizer.zero_grad()  # 경사 초기화
        total_loss.backward()  # 역전파 계산
        nn.utils.clip_grad_norm_(self.actor_critic.parameters(), max_grad_norm)
        # 한번에 결합 가중치가 너무 크게 변화하지 않도록, 경사의 최댓값을 0.5로 제한한다

        self.optimizer.step()  # 결합 가중치 수정


In [10]:
# Breakout을 실행하는 환경 클래스


class Environment:
    def run(self):

        # 난수 시드 설정
        seed_num = 1
        torch.manual_seed(seed_num)
        if use_cuda:
            torch.cuda.manual_seed(seed_num)

        # 실행환경 구축
        torch.set_num_threads(seed_num)
        envs = [make_env(ENV_NAME, seed_num, i) for i in range(NUM_PROCESSES)]
        envs = SubprocVecEnv(envs)  # 멀티프로세스 실행환경

        # 모든 에이전트가 공유하는 두뇌 역할 클래스 Brain 객체 생성
        n_out = envs.action_space.n  # 행동의 가짓수는 4
        actor_critic = Net(n_out).to(device)  # GPU 사용
        global_brain = Brain(actor_critic)

        # 정보 저장용 변수 생성
        obs_shape = envs.observation_space.shape  # (1, 84, 84)
        obs_shape = (obs_shape[0] * NUM_STACK_FRAME,
                     *obs_shape[1:])  # (4, 84, 84)
        # torch.Size([16, 4, 84, 84])
        current_obs = torch.zeros(NUM_PROCESSES, *obs_shape).to(device)
        rollouts = RolloutStorage(
            NUM_ADVANCED_STEP, NUM_PROCESSES, obs_shape)  # rollouts 객체
        episode_rewards = torch.zeros([NUM_PROCESSES, 1])  # 현재 에피소드에서 받을 보상 저장
        final_rewards = torch.zeros([NUM_PROCESSES, 1])  # 마지막 에피소드의 총 보상 저장

        # 초기 상태로 시작
        obs = envs.reset()
        obs = torch.from_numpy(obs).float()  # torch.Size([16, 1, 84, 84])
        current_obs[:, -1:] = obs  # 4번째 프레임에 가장 최근 관측결과를 저장

        # advanced 학습에 사용할 객체 rollouts에 첫번째 상태로 현재 상태를 저장
        rollouts.observations[0].copy_(current_obs)

        # 주 반복문
        for j in tqdm(range(NUM_UPDATES)):
            # advanced 학습 범위에 들어가는 단계마다 반복
            for step in range(NUM_ADVANCED_STEP):

                # 행동을 결정
                with torch.no_grad():
                    action = actor_critic.act(rollouts.observations[step])

                cpu_actions = action.squeeze(1).cpu().numpy()  # tensor를 NumPy 변수로

                # 1단계를 병렬로 실행, 반환값 obs의 크기는 (16, 1, 84, 84)
                obs, reward, done, info = envs.step(cpu_actions)

                # 보상을 텐서로 변환한 다음 에피소드 총 보상에 더함
                # 크기가 (16,)인 것을 (16, 1)로 변환
                reward = np.expand_dims(np.stack(reward), 1)
                reward = torch.from_numpy(reward).float()
                episode_rewards += reward

                # 각 프로세스마다 done이 True이면 0, False이면 1
                masks = torch.FloatTensor(
                    [[0.0] if done_ else [1.0] for done_ in done])

                # 마지막 에피소드의 총 보상을 업데이트
                final_rewards *= masks  # done이 True이면 0을 곱하고, False이면 1을 곱하여 리셋
                # done이 False이면 0을 더하고, True이면 epicodic_rewards를 더함
                final_rewards += (1 - masks) * episode_rewards

                # 에피소드의 총 보상을 업데이트
                episode_rewards *= masks  # 각 프로세스마다 done이 True이면 0, False이면 1을 곱함

                # masks 변수를 GPU로 전달
                masks = masks.to(device)

                # done이 True이면 모두 0으로
                # mask의 크기를 torch.Size([16, 1]) --> torch.Size([16, 1, 1 ,1])로 변환하고 곱함
                current_obs *= masks.unsqueeze(2).unsqueeze(2)

                # 프레임을 모음
                # torch.Size([16, 1, 84, 84])
                obs = torch.from_numpy(obs).float()
                current_obs[:, :-1] = current_obs[:, 1:]  # 0～2번째 프레임을 1~3번째 프레임으로 덮어씀
                current_obs[:, -1:] = obs  # 4번째 프레임에 가장 최근 obs를 저장

                # 메모리 객체에 현 단계의 transition을 저장
                rollouts.insert(current_obs, action.data, reward, masks)

            # advanced 학습의 for문 끝

            # advanced 학습 대상 단계 중 마지막 단계의 상태에서 예상되는 상태가치를 계산
            with torch.no_grad():
                next_value = actor_critic.get_value(
                    rollouts.observations[-1]).detach()

            # 모든 단계의 할인 총보상을 계산하고, rollouts의 변수 returns를 업데이트
            rollouts.compute_returns(next_value)

            # 신경망 수정 및 rollout 업데이트
            global_brain.update(rollouts)
            rollouts.after_update()

            # 로그 기록 : 중간 결과 출력
            if j % 100 == 0:
                print("finished frames {}, mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}".
                      format(j*NUM_PROCESSES*NUM_ADVANCED_STEP,
                             final_rewards.mean(),
                             final_rewards.median(),
                             final_rewards.min(),
                             final_rewards.max()))

            # 결합 가중치 저장
            if j % 12500 == 0:
                torch.save(global_brain.actor_critic.state_dict(),
                           'weight_'+str(j)+'.pth')
        
        # 주 반복문 끝
        torch.save(global_brain.actor_critic.state_dict(), 'weight_end.pth')
        

In [None]:
# 실행
breakout_env = Environment()
breakout_env.run()


  0%|          | 5/125000 [00:00<2:00:28, 17.29it/s]

finished frames 0, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


  0%|          | 104/125000 [00:04<1:26:26, 24.08it/s]

finished frames 8000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


  0%|          | 203/125000 [00:08<1:24:19, 24.67it/s]

finished frames 16000, mean/median reward 0.3/0.0, min/max reward 0.0/4.0


  0%|          | 305/125000 [00:12<1:28:14, 23.55it/s]

finished frames 24000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


  0%|          | 404/125000 [00:16<1:33:47, 22.14it/s]

finished frames 32000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


  0%|          | 503/125000 [00:21<1:28:14, 23.51it/s]

finished frames 40000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


  0%|          | 602/125000 [00:25<1:28:59, 23.30it/s]

finished frames 48000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


  1%|          | 704/125000 [00:29<1:27:45, 23.60it/s]

finished frames 56000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


  1%|          | 803/125000 [00:33<1:29:05, 23.23it/s]

finished frames 64000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


  1%|          | 905/125000 [00:38<1:23:09, 24.87it/s]

finished frames 72000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


  1%|          | 1004/125000 [00:42<1:23:18, 24.81it/s]

finished frames 80000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


  1%|          | 1103/125000 [00:46<1:26:11, 23.96it/s]

finished frames 88000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


  1%|          | 1205/125000 [00:51<1:25:39, 24.09it/s]

finished frames 96000, mean/median reward 0.7/0.0, min/max reward 0.0/2.0


  1%|          | 1304/125000 [00:55<1:26:05, 23.95it/s]

finished frames 104000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


  1%|          | 1403/125000 [00:59<1:23:18, 24.73it/s]

finished frames 112000, mean/median reward 0.5/0.0, min/max reward 0.0/3.0


  1%|          | 1505/125000 [01:03<1:27:48, 23.44it/s]

finished frames 120000, mean/median reward 0.6/0.0, min/max reward 0.0/3.0


  1%|▏         | 1604/125000 [01:07<1:24:27, 24.35it/s]

finished frames 128000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


  1%|▏         | 1706/125000 [01:11<1:23:45, 24.53it/s]

finished frames 136000, mean/median reward 0.8/0.0, min/max reward 0.0/2.0


  1%|▏         | 1805/125000 [01:16<1:25:00, 24.15it/s]

finished frames 144000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


  2%|▏         | 1904/125000 [01:20<1:27:36, 23.42it/s]

finished frames 152000, mean/median reward 1.0/1.0, min/max reward 0.0/4.0


  2%|▏         | 2006/125000 [01:24<1:25:51, 23.88it/s]

finished frames 160000, mean/median reward 0.9/0.0, min/max reward 0.0/4.0


  2%|▏         | 2105/125000 [01:28<1:25:30, 23.96it/s]

finished frames 168000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


  2%|▏         | 2204/125000 [01:32<1:25:11, 24.03it/s]

finished frames 176000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


  2%|▏         | 2303/125000 [01:37<1:31:30, 22.35it/s]

finished frames 184000, mean/median reward 0.8/0.0, min/max reward 0.0/3.0


  2%|▏         | 2403/125000 [01:41<2:01:29, 16.82it/s]

finished frames 192000, mean/median reward 0.9/1.0, min/max reward 0.0/2.0


  2%|▏         | 2504/125000 [01:47<1:36:23, 21.18it/s]

finished frames 200000, mean/median reward 0.8/1.0, min/max reward 0.0/2.0


  2%|▏         | 2605/125000 [01:51<1:28:27, 23.06it/s]

finished frames 208000, mean/median reward 0.9/0.0, min/max reward 0.0/4.0


  2%|▏         | 2704/125000 [01:55<1:24:45, 24.05it/s]

finished frames 216000, mean/median reward 1.2/1.0, min/max reward 0.0/4.0


  2%|▏         | 2803/125000 [01:59<1:25:40, 23.77it/s]

finished frames 224000, mean/median reward 0.9/1.0, min/max reward 0.0/2.0


  2%|▏         | 2905/125000 [02:05<1:34:28, 21.54it/s]

finished frames 232000, mean/median reward 1.1/1.0, min/max reward 0.0/4.0


  2%|▏         | 3004/125000 [02:09<1:28:35, 22.95it/s]

finished frames 240000, mean/median reward 1.5/1.0, min/max reward 0.0/6.0


  2%|▏         | 3103/125000 [02:13<1:31:52, 22.11it/s]

finished frames 248000, mean/median reward 1.7/1.0, min/max reward 0.0/5.0


  3%|▎         | 3205/125000 [02:18<1:24:57, 23.89it/s]

finished frames 256000, mean/median reward 1.0/1.0, min/max reward 0.0/4.0


  3%|▎         | 3304/125000 [02:23<1:35:07, 21.32it/s]

finished frames 264000, mean/median reward 1.6/1.0, min/max reward 1.0/4.0


  3%|▎         | 3403/125000 [02:27<1:29:16, 22.70it/s]

finished frames 272000, mean/median reward 1.3/1.0, min/max reward 0.0/4.0


  3%|▎         | 3505/125000 [02:31<1:18:27, 25.81it/s]

finished frames 280000, mean/median reward 1.8/2.0, min/max reward 0.0/4.0


  3%|▎         | 3604/125000 [02:35<1:21:50, 24.72it/s]

finished frames 288000, mean/median reward 1.6/1.0, min/max reward 0.0/4.0


  3%|▎         | 3704/125000 [02:40<1:32:00, 21.97it/s]

finished frames 296000, mean/median reward 1.8/1.0, min/max reward 0.0/5.0


  3%|▎         | 3806/125000 [02:45<1:21:14, 24.86it/s]

finished frames 304000, mean/median reward 1.2/1.0, min/max reward 0.0/6.0


  3%|▎         | 3905/125000 [02:49<1:19:07, 25.51it/s]

finished frames 312000, mean/median reward 1.9/1.0, min/max reward 1.0/4.0


  3%|▎         | 4004/125000 [02:53<1:23:47, 24.07it/s]

finished frames 320000, mean/median reward 1.5/1.0, min/max reward 0.0/6.0


  3%|▎         | 4103/125000 [02:58<1:55:21, 17.47it/s]

finished frames 328000, mean/median reward 2.0/1.0, min/max reward 1.0/5.0


  3%|▎         | 4205/125000 [03:03<1:31:32, 21.99it/s]

finished frames 336000, mean/median reward 1.5/1.0, min/max reward 1.0/4.0


  3%|▎         | 4304/125000 [03:07<1:21:31, 24.68it/s]

finished frames 344000, mean/median reward 2.1/2.0, min/max reward 0.0/5.0


  4%|▎         | 4403/125000 [03:11<1:20:12, 25.06it/s]

finished frames 352000, mean/median reward 2.1/1.0, min/max reward 0.0/5.0


  4%|▎         | 4502/125000 [03:15<1:27:56, 22.84it/s]

finished frames 360000, mean/median reward 2.3/2.0, min/max reward 1.0/7.0


  4%|▎         | 4604/125000 [03:20<1:31:28, 21.94it/s]

finished frames 368000, mean/median reward 1.9/1.0, min/max reward 0.0/4.0


  4%|▍         | 4706/125000 [03:25<1:20:03, 25.05it/s]

finished frames 376000, mean/median reward 2.7/2.0, min/max reward 0.0/10.0


  4%|▍         | 4805/125000 [03:29<1:21:28, 24.59it/s]

finished frames 384000, mean/median reward 2.8/2.0, min/max reward 0.0/7.0


  4%|▍         | 4904/125000 [03:33<1:25:26, 23.42it/s]

finished frames 392000, mean/median reward 2.2/1.0, min/max reward 0.0/6.0


  4%|▍         | 5004/125000 [03:38<1:49:33, 18.26it/s]

finished frames 400000, mean/median reward 2.2/2.0, min/max reward 0.0/5.0


  4%|▍         | 5104/125000 [03:43<1:33:57, 21.27it/s]

finished frames 408000, mean/median reward 3.3/2.0, min/max reward 0.0/11.0


  4%|▍         | 5203/125000 [03:47<1:20:23, 24.84it/s]

finished frames 416000, mean/median reward 2.2/1.0, min/max reward 0.0/6.0


  4%|▍         | 5305/125000 [03:51<1:32:55, 21.47it/s]

finished frames 424000, mean/median reward 3.5/3.0, min/max reward 0.0/10.0


  4%|▍         | 5404/125000 [03:58<1:50:20, 18.07it/s]

finished frames 432000, mean/median reward 3.6/2.0, min/max reward 0.0/9.0


  4%|▍         | 5505/125000 [04:03<1:28:36, 22.48it/s]

finished frames 440000, mean/median reward 4.4/4.0, min/max reward 0.0/12.0


  4%|▍         | 5604/125000 [04:07<1:23:39, 23.79it/s]

finished frames 448000, mean/median reward 3.2/3.0, min/max reward 1.0/7.0


  5%|▍         | 5703/125000 [04:11<1:27:44, 22.66it/s]

finished frames 456000, mean/median reward 3.0/2.0, min/max reward 1.0/6.0


  5%|▍         | 5805/125000 [04:15<1:21:44, 24.31it/s]

finished frames 464000, mean/median reward 3.7/3.0, min/max reward 0.0/12.0


  5%|▍         | 5904/125000 [04:19<1:16:57, 25.79it/s]

finished frames 472000, mean/median reward 4.1/3.0, min/max reward 1.0/12.0


  5%|▍         | 6006/125000 [04:23<1:20:13, 24.72it/s]

finished frames 480000, mean/median reward 4.8/4.0, min/max reward 1.0/11.0


  5%|▍         | 6104/125000 [04:29<1:29:36, 22.11it/s]

finished frames 488000, mean/median reward 3.3/3.0, min/max reward 0.0/10.0


  5%|▍         | 6206/125000 [04:33<1:20:16, 24.67it/s]

finished frames 496000, mean/median reward 4.2/4.0, min/max reward 1.0/12.0


  5%|▌         | 6305/125000 [04:37<1:18:03, 25.34it/s]

finished frames 504000, mean/median reward 4.8/4.0, min/max reward 1.0/12.0


  5%|▌         | 6404/125000 [04:41<1:19:57, 24.72it/s]

finished frames 512000, mean/median reward 4.1/4.0, min/max reward 0.0/9.0


  5%|▌         | 6504/125000 [04:45<1:48:04, 18.27it/s]

finished frames 520000, mean/median reward 3.0/3.0, min/max reward 1.0/7.0


  5%|▌         | 6604/125000 [04:50<1:28:38, 22.26it/s]

finished frames 528000, mean/median reward 2.8/2.0, min/max reward 0.0/7.0


  5%|▌         | 6703/125000 [04:55<1:21:34, 24.17it/s]

finished frames 536000, mean/median reward 4.1/4.0, min/max reward 1.0/10.0


  5%|▌         | 6805/125000 [04:59<1:16:49, 25.64it/s]

finished frames 544000, mean/median reward 4.4/3.0, min/max reward 0.0/12.0


  6%|▌         | 6904/125000 [05:03<1:19:39, 24.71it/s]

finished frames 552000, mean/median reward 4.2/4.0, min/max reward 0.0/10.0


  6%|▌         | 7003/125000 [05:08<1:50:37, 17.78it/s]

finished frames 560000, mean/median reward 4.6/4.0, min/max reward 1.0/13.0


  6%|▌         | 7105/125000 [05:12<1:24:12, 23.33it/s]

finished frames 568000, mean/median reward 3.4/3.0, min/max reward 1.0/9.0


  6%|▌         | 7204/125000 [05:16<1:25:52, 22.86it/s]

finished frames 576000, mean/median reward 6.8/4.0, min/max reward 1.0/17.0


  6%|▌         | 7303/125000 [05:22<2:08:30, 15.27it/s]

finished frames 584000, mean/median reward 4.7/3.0, min/max reward 1.0/24.0


  6%|▌         | 7403/125000 [05:28<1:48:34, 18.05it/s]

finished frames 592000, mean/median reward 5.8/4.0, min/max reward 1.0/14.0


  6%|▌         | 7504/125000 [05:32<1:22:22, 23.77it/s]

finished frames 600000, mean/median reward 4.2/3.0, min/max reward 1.0/10.0


  6%|▌         | 7603/125000 [05:37<1:20:17, 24.37it/s]

finished frames 608000, mean/median reward 4.4/4.0, min/max reward 1.0/14.0


  6%|▌         | 7705/125000 [05:41<1:22:25, 23.72it/s]

finished frames 616000, mean/median reward 6.1/5.0, min/max reward 1.0/17.0


  6%|▌         | 7804/125000 [05:45<1:34:23, 20.69it/s]

finished frames 624000, mean/median reward 3.8/2.0, min/max reward 1.0/14.0


  6%|▋         | 7903/125000 [05:50<1:25:07, 22.93it/s]

finished frames 632000, mean/median reward 6.5/4.0, min/max reward 1.0/17.0


  6%|▋         | 8005/125000 [05:55<1:19:02, 24.67it/s]

finished frames 640000, mean/median reward 5.3/4.0, min/max reward 0.0/10.0


  6%|▋         | 8104/125000 [05:59<1:16:13, 25.56it/s]

finished frames 648000, mean/median reward 7.1/4.0, min/max reward 1.0/26.0


  7%|▋         | 8206/125000 [06:03<1:17:55, 24.98it/s]

finished frames 656000, mean/median reward 5.3/4.0, min/max reward 1.0/16.0


  7%|▋         | 8305/125000 [06:07<1:22:45, 23.50it/s]

finished frames 664000, mean/median reward 6.5/4.0, min/max reward 1.0/19.0


  7%|▋         | 8404/125000 [06:12<1:27:00, 22.34it/s]

finished frames 672000, mean/median reward 7.5/7.0, min/max reward 0.0/20.0


  7%|▋         | 8506/125000 [06:16<1:17:32, 25.04it/s]

finished frames 680000, mean/median reward 4.5/4.0, min/max reward 0.0/12.0


  7%|▋         | 8605/125000 [06:20<1:24:01, 23.09it/s]

finished frames 688000, mean/median reward 5.6/4.0, min/max reward 1.0/21.0


  7%|▋         | 8704/125000 [06:25<1:17:31, 25.00it/s]

finished frames 696000, mean/median reward 6.7/5.0, min/max reward 1.0/21.0


  7%|▋         | 8803/125000 [06:29<1:25:49, 22.57it/s]

finished frames 704000, mean/median reward 5.4/5.0, min/max reward 0.0/12.0


  7%|▋         | 8905/125000 [06:33<1:20:00, 24.18it/s]

finished frames 712000, mean/median reward 5.8/4.0, min/max reward 0.0/31.0


  7%|▋         | 9004/125000 [06:38<1:23:57, 23.03it/s]

finished frames 720000, mean/median reward 4.8/4.0, min/max reward 1.0/13.0


  7%|▋         | 9106/125000 [06:42<1:17:29, 24.93it/s]

finished frames 728000, mean/median reward 6.3/4.0, min/max reward 1.0/19.0


  7%|▋         | 9205/125000 [06:46<1:23:19, 23.16it/s]

finished frames 736000, mean/median reward 6.4/5.0, min/max reward 2.0/21.0


  7%|▋         | 9304/125000 [06:50<1:19:01, 24.40it/s]

finished frames 744000, mean/median reward 2.9/1.0, min/max reward 0.0/12.0


  8%|▊         | 9403/125000 [06:55<1:25:48, 22.45it/s]

finished frames 752000, mean/median reward 5.6/5.0, min/max reward 0.0/11.0


  8%|▊         | 9505/125000 [06:59<1:17:09, 24.95it/s]

finished frames 760000, mean/median reward 5.5/5.0, min/max reward 1.0/12.0


  8%|▊         | 9604/125000 [07:03<1:14:51, 25.69it/s]

finished frames 768000, mean/median reward 6.1/5.0, min/max reward 0.0/18.0


  8%|▊         | 9703/125000 [07:07<1:35:51, 20.05it/s]

finished frames 776000, mean/median reward 6.5/5.0, min/max reward 1.0/21.0


  8%|▊         | 9804/125000 [07:13<1:35:48, 20.04it/s]

finished frames 784000, mean/median reward 5.1/5.0, min/max reward 2.0/10.0


  8%|▊         | 9903/125000 [07:18<1:22:29, 23.25it/s]

finished frames 792000, mean/median reward 6.4/6.0, min/max reward 1.0/14.0


  8%|▊         | 10005/125000 [07:22<1:17:23, 24.77it/s]

finished frames 800000, mean/median reward 7.4/6.0, min/max reward 0.0/21.0


  8%|▊         | 10104/125000 [07:26<1:28:28, 21.64it/s]

finished frames 808000, mean/median reward 5.4/5.0, min/max reward 1.0/20.0


  8%|▊         | 10203/125000 [07:32<1:38:52, 19.35it/s]

finished frames 816000, mean/median reward 6.8/6.0, min/max reward 0.0/15.0


  8%|▊         | 10303/125000 [07:36<1:44:30, 18.29it/s]

finished frames 824000, mean/median reward 8.1/7.0, min/max reward 1.0/23.0


  8%|▊         | 10403/125000 [07:42<1:26:17, 22.13it/s]

finished frames 832000, mean/median reward 7.6/6.0, min/max reward 2.0/16.0


  8%|▊         | 10505/125000 [07:46<1:20:58, 23.56it/s]

finished frames 840000, mean/median reward 5.7/4.0, min/max reward 1.0/16.0


  8%|▊         | 10604/125000 [07:50<1:19:33, 23.97it/s]

finished frames 848000, mean/median reward 7.8/5.0, min/max reward 1.0/22.0


  9%|▊         | 10703/125000 [07:54<1:26:27, 22.03it/s]

finished frames 856000, mean/median reward 8.7/7.0, min/max reward 1.0/22.0


  9%|▊         | 10803/125000 [08:00<1:35:05, 20.02it/s]

finished frames 864000, mean/median reward 8.5/5.0, min/max reward 2.0/24.0


  9%|▊         | 10905/125000 [08:05<1:21:12, 23.42it/s]

finished frames 872000, mean/median reward 7.3/5.0, min/max reward 2.0/27.0


  9%|▉         | 11003/125000 [08:10<1:39:19, 19.13it/s]

finished frames 880000, mean/median reward 8.3/5.0, min/max reward 2.0/23.0


  9%|▉         | 11104/125000 [08:17<1:53:43, 16.69it/s]

finished frames 888000, mean/median reward 6.7/5.0, min/max reward 1.0/14.0


  9%|▉         | 11205/125000 [08:22<1:31:59, 20.62it/s]

finished frames 896000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


  9%|▉         | 11304/125000 [08:27<1:23:13, 22.77it/s]

finished frames 904000, mean/median reward 3.7/3.0, min/max reward 0.0/7.0


  9%|▉         | 11404/125000 [08:31<1:29:40, 21.11it/s]

finished frames 912000, mean/median reward 3.5/3.0, min/max reward 0.0/8.0


  9%|▉         | 11503/125000 [08:36<1:29:16, 21.19it/s]

finished frames 920000, mean/median reward 5.6/5.0, min/max reward 1.0/17.0


  9%|▉         | 11603/125000 [08:41<2:28:19, 12.74it/s]

finished frames 928000, mean/median reward 7.0/5.0, min/max reward 1.0/23.0


  9%|▉         | 11703/125000 [08:46<1:23:12, 22.69it/s]

finished frames 936000, mean/median reward 4.6/4.0, min/max reward 0.0/11.0


  9%|▉         | 11805/125000 [08:51<1:22:07, 22.97it/s]

finished frames 944000, mean/median reward 8.5/7.0, min/max reward 2.0/27.0


 10%|▉         | 11904/125000 [08:55<1:22:02, 22.98it/s]

finished frames 952000, mean/median reward 7.5/6.0, min/max reward 2.0/19.0


 10%|▉         | 12003/125000 [08:59<1:20:26, 23.41it/s]

finished frames 960000, mean/median reward 6.9/5.0, min/max reward 1.0/22.0


 10%|▉         | 12105/125000 [09:04<1:20:37, 23.34it/s]

finished frames 968000, mean/median reward 9.1/8.0, min/max reward 2.0/19.0


 10%|▉         | 12204/125000 [09:09<2:02:17, 15.37it/s]

finished frames 976000, mean/median reward 8.2/7.0, min/max reward 1.0/15.0


 10%|▉         | 12304/125000 [09:16<2:02:23, 15.35it/s]

finished frames 984000, mean/median reward 8.6/6.0, min/max reward 0.0/23.0


 10%|▉         | 12402/125000 [09:22<2:04:03, 15.13it/s]

finished frames 992000, mean/median reward 7.8/4.0, min/max reward 0.0/46.0


 10%|█         | 12504/125000 [09:29<2:04:08, 15.10it/s]

finished frames 1000000, mean/median reward 11.3/10.0, min/max reward 1.0/37.0


 10%|█         | 12604/125000 [09:36<2:04:51, 15.00it/s]

finished frames 1008000, mean/median reward 9.3/6.0, min/max reward 1.0/33.0


 10%|█         | 12704/125000 [09:42<2:05:44, 14.88it/s]

finished frames 1016000, mean/median reward 10.7/7.0, min/max reward 1.0/31.0


 10%|█         | 12804/125000 [09:49<1:53:43, 16.44it/s]

finished frames 1024000, mean/median reward 7.2/6.0, min/max reward 1.0/16.0


 10%|█         | 12904/125000 [09:55<1:51:18, 16.78it/s]

finished frames 1032000, mean/median reward 7.9/6.0, min/max reward 1.0/18.0


 10%|█         | 13004/125000 [10:01<1:54:02, 16.37it/s]

finished frames 1040000, mean/median reward 8.9/5.0, min/max reward 2.0/36.0


 10%|█         | 13104/125000 [10:07<1:53:05, 16.49it/s]

finished frames 1048000, mean/median reward 5.8/4.0, min/max reward 0.0/21.0


 11%|█         | 13204/125000 [10:13<1:57:25, 15.87it/s]

finished frames 1056000, mean/median reward 8.3/5.0, min/max reward 1.0/28.0


 11%|█         | 13304/125000 [10:19<1:50:24, 16.86it/s]

finished frames 1064000, mean/median reward 10.8/8.0, min/max reward 2.0/26.0


 11%|█         | 13404/125000 [10:25<1:53:55, 16.32it/s]

finished frames 1072000, mean/median reward 9.6/10.0, min/max reward 1.0/17.0


 11%|█         | 13504/125000 [10:30<1:20:44, 23.02it/s]

finished frames 1080000, mean/median reward 9.5/8.0, min/max reward 1.0/25.0


 11%|█         | 13603/125000 [10:34<1:20:38, 23.02it/s]

finished frames 1088000, mean/median reward 10.2/7.0, min/max reward 3.0/24.0


 11%|█         | 13705/125000 [10:39<1:19:19, 23.38it/s]

finished frames 1096000, mean/median reward 8.2/7.0, min/max reward 0.0/26.0


 11%|█         | 13804/125000 [10:43<1:19:46, 23.23it/s]

finished frames 1104000, mean/median reward 3.5/3.0, min/max reward 0.0/9.0


 11%|█         | 13903/125000 [10:47<1:20:45, 22.93it/s]

finished frames 1112000, mean/median reward 10.6/7.0, min/max reward 1.0/40.0


 11%|█         | 14004/125000 [10:52<1:50:43, 16.71it/s]

finished frames 1120000, mean/median reward 16.1/7.0, min/max reward 2.0/130.0


 11%|█▏        | 14104/125000 [10:58<1:51:09, 16.63it/s]

finished frames 1128000, mean/median reward 9.6/7.0, min/max reward 1.0/40.0


 11%|█▏        | 14204/125000 [11:04<1:53:42, 16.24it/s]

finished frames 1136000, mean/median reward 8.5/7.0, min/max reward 1.0/16.0


 11%|█▏        | 14304/125000 [11:10<1:52:00, 16.47it/s]

finished frames 1144000, mean/median reward 9.3/7.0, min/max reward 2.0/22.0


 12%|█▏        | 14402/125000 [11:16<2:11:28, 14.02it/s]

finished frames 1152000, mean/median reward 11.1/10.0, min/max reward 1.0/23.0


 12%|█▏        | 14504/125000 [11:22<1:16:06, 24.20it/s]

finished frames 1160000, mean/median reward 9.7/5.0, min/max reward 1.0/31.0


 12%|█▏        | 14603/125000 [11:26<1:13:53, 24.90it/s]

finished frames 1168000, mean/median reward 11.4/7.0, min/max reward 0.0/33.0


 12%|█▏        | 14705/125000 [11:30<1:15:06, 24.48it/s]

finished frames 1176000, mean/median reward 13.2/9.0, min/max reward 2.0/47.0


 12%|█▏        | 14804/125000 [11:34<1:13:11, 25.09it/s]

finished frames 1184000, mean/median reward 12.8/13.0, min/max reward 4.0/23.0


 12%|█▏        | 14906/125000 [11:38<1:13:28, 24.97it/s]

finished frames 1192000, mean/median reward 9.1/7.0, min/max reward 3.0/24.0


 12%|█▏        | 15005/125000 [11:42<1:16:39, 23.92it/s]

finished frames 1200000, mean/median reward 10.2/7.0, min/max reward 1.0/39.0


 12%|█▏        | 15103/125000 [11:48<1:44:08, 17.59it/s]

finished frames 1208000, mean/median reward 8.3/8.0, min/max reward 1.0/20.0


 12%|█▏        | 15203/125000 [11:53<1:44:23, 17.53it/s]

finished frames 1216000, mean/median reward 10.1/8.0, min/max reward 0.0/27.0


 12%|█▏        | 15303/125000 [12:00<2:03:41, 14.78it/s]

finished frames 1224000, mean/median reward 7.7/5.0, min/max reward 0.0/22.0


 12%|█▏        | 15403/125000 [12:06<1:54:53, 15.90it/s]

finished frames 1232000, mean/median reward 9.9/7.0, min/max reward 2.0/27.0


 12%|█▏        | 15503/125000 [12:12<1:49:33, 16.66it/s]

finished frames 1240000, mean/median reward 12.0/8.0, min/max reward 2.0/32.0


 12%|█▏        | 15603/125000 [12:18<1:45:31, 17.28it/s]

finished frames 1248000, mean/median reward 14.8/12.0, min/max reward 2.0/36.0


 13%|█▎        | 15703/125000 [12:24<2:11:36, 13.84it/s]

finished frames 1256000, mean/median reward 11.3/11.0, min/max reward 1.0/23.0


 13%|█▎        | 15804/125000 [12:29<1:18:02, 23.32it/s]

finished frames 1264000, mean/median reward 8.5/7.0, min/max reward 1.0/25.0


 13%|█▎        | 15903/125000 [12:34<1:16:36, 23.74it/s]

finished frames 1272000, mean/median reward 9.9/7.0, min/max reward 0.0/29.0


 13%|█▎        | 16005/125000 [12:38<1:24:32, 21.49it/s]

finished frames 1280000, mean/median reward 11.1/6.0, min/max reward 1.0/41.0


 13%|█▎        | 16104/125000 [12:42<1:16:26, 23.74it/s]

finished frames 1288000, mean/median reward 8.5/6.0, min/max reward 0.0/26.0


 13%|█▎        | 16203/125000 [12:46<1:17:17, 23.46it/s]

finished frames 1296000, mean/median reward 11.1/9.0, min/max reward 3.0/26.0


 13%|█▎        | 16305/125000 [12:51<1:18:09, 23.18it/s]

finished frames 1304000, mean/median reward 7.2/5.0, min/max reward 1.0/23.0


 13%|█▎        | 16404/125000 [12:57<1:47:16, 16.87it/s]

finished frames 1312000, mean/median reward 11.2/10.0, min/max reward 1.0/41.0


 13%|█▎        | 16504/125000 [13:03<1:47:17, 16.85it/s]

finished frames 1320000, mean/median reward 10.7/7.0, min/max reward 4.0/33.0


 13%|█▎        | 16604/125000 [13:09<1:46:38, 16.94it/s]

finished frames 1328000, mean/median reward 9.1/8.0, min/max reward 1.0/23.0


 13%|█▎        | 16704/125000 [13:14<1:47:16, 16.83it/s]

finished frames 1336000, mean/median reward 10.6/9.0, min/max reward 1.0/38.0


 13%|█▎        | 16804/125000 [13:20<1:46:16, 16.97it/s]

finished frames 1344000, mean/median reward 8.6/7.0, min/max reward 1.0/16.0


 14%|█▎        | 16904/125000 [13:26<1:45:25, 17.09it/s]

finished frames 1352000, mean/median reward 11.2/10.0, min/max reward 2.0/34.0


 14%|█▎        | 17005/125000 [13:31<1:15:05, 23.97it/s]

finished frames 1360000, mean/median reward 7.9/6.0, min/max reward 2.0/16.0


 14%|█▎        | 17104/125000 [13:35<1:16:05, 23.63it/s]

finished frames 1368000, mean/median reward 16.2/11.0, min/max reward 0.0/45.0


 14%|█▍        | 17203/125000 [13:39<1:14:16, 24.19it/s]

finished frames 1376000, mean/median reward 13.6/9.0, min/max reward 1.0/57.0


 14%|█▍        | 17305/125000 [13:44<1:19:31, 22.57it/s]

finished frames 1384000, mean/median reward 7.8/5.0, min/max reward 1.0/20.0


 14%|█▍        | 17404/125000 [13:48<1:16:05, 23.57it/s]

finished frames 1392000, mean/median reward 12.1/9.0, min/max reward 2.0/31.0


 14%|█▍        | 17503/125000 [13:52<1:44:02, 17.22it/s]

finished frames 1400000, mean/median reward 12.5/6.0, min/max reward 1.0/63.0


 14%|█▍        | 17603/125000 [13:58<1:47:37, 16.63it/s]

finished frames 1408000, mean/median reward 14.1/12.0, min/max reward 1.0/63.0


 14%|█▍        | 17703/125000 [14:04<1:44:54, 17.04it/s]

finished frames 1416000, mean/median reward 8.6/4.0, min/max reward 0.0/29.0


 14%|█▍        | 17803/125000 [14:10<1:44:48, 17.05it/s]

finished frames 1424000, mean/median reward 12.5/12.0, min/max reward 1.0/35.0


 14%|█▍        | 17903/125000 [14:16<1:48:28, 16.46it/s]

finished frames 1432000, mean/median reward 23.7/11.0, min/max reward 0.0/110.0


 14%|█▍        | 18003/125000 [14:21<1:16:02, 23.45it/s]

finished frames 1440000, mean/median reward 12.9/7.0, min/max reward 1.0/41.0


 14%|█▍        | 18105/125000 [14:26<1:15:33, 23.58it/s]

finished frames 1448000, mean/median reward 13.5/12.0, min/max reward 1.0/41.0


 15%|█▍        | 18204/125000 [14:30<1:16:44, 23.20it/s]

finished frames 1456000, mean/median reward 7.6/7.0, min/max reward 0.0/27.0


 15%|█▍        | 18303/125000 [14:34<1:15:36, 23.52it/s]

finished frames 1464000, mean/median reward 9.9/8.0, min/max reward 0.0/36.0


 15%|█▍        | 18405/125000 [14:38<1:15:06, 23.66it/s]

finished frames 1472000, mean/median reward 12.8/11.0, min/max reward 4.0/25.0


 15%|█▍        | 18504/125000 [14:43<1:15:59, 23.36it/s]

finished frames 1480000, mean/median reward 16.5/9.0, min/max reward 2.0/48.0


 15%|█▍        | 18603/125000 [14:48<1:44:01, 17.05it/s]

finished frames 1488000, mean/median reward 18.5/6.0, min/max reward 0.0/162.0


 15%|█▍        | 18703/125000 [14:54<1:45:17, 16.83it/s]

finished frames 1496000, mean/median reward 9.2/6.0, min/max reward 0.0/19.0


 15%|█▌        | 18803/125000 [15:00<1:51:04, 15.94it/s]

finished frames 1504000, mean/median reward 14.6/8.0, min/max reward 0.0/71.0


 15%|█▌        | 18903/125000 [15:06<1:43:29, 17.08it/s]

finished frames 1512000, mean/median reward 11.6/6.0, min/max reward 0.0/63.0


 15%|█▌        | 19003/125000 [15:12<1:45:00, 16.82it/s]

finished frames 1520000, mean/median reward 20.9/13.0, min/max reward 1.0/60.0


 15%|█▌        | 19103/125000 [15:18<1:45:36, 16.71it/s]

finished frames 1528000, mean/median reward 11.2/5.0, min/max reward 1.0/50.0


 15%|█▌        | 19205/125000 [15:24<1:20:19, 21.95it/s]

finished frames 1536000, mean/median reward 19.8/12.0, min/max reward 1.0/98.0


 15%|█▌        | 19303/125000 [15:29<1:14:49, 23.54it/s]

finished frames 1544000, mean/median reward 16.1/12.0, min/max reward 5.0/44.0


 16%|█▌        | 19405/125000 [15:33<1:13:23, 23.98it/s]

finished frames 1552000, mean/median reward 11.8/10.0, min/max reward 1.0/33.0


 16%|█▌        | 19504/125000 [15:37<1:14:07, 23.72it/s]

finished frames 1560000, mean/median reward 16.3/8.0, min/max reward 3.0/59.0


 16%|█▌        | 19603/125000 [15:41<1:14:24, 23.61it/s]

finished frames 1568000, mean/median reward 9.3/8.0, min/max reward 2.0/25.0


 16%|█▌        | 19705/125000 [15:46<1:16:50, 22.84it/s]

finished frames 1576000, mean/median reward 9.2/8.0, min/max reward 2.0/31.0


 16%|█▌        | 19804/125000 [15:50<1:15:07, 23.34it/s]

finished frames 1584000, mean/median reward 17.5/13.0, min/max reward 2.0/45.0


 16%|█▌        | 19903/125000 [15:55<1:43:26, 16.93it/s]

finished frames 1592000, mean/median reward 11.3/5.0, min/max reward 1.0/37.0


 16%|█▌        | 20005/125000 [16:00<1:17:34, 22.56it/s]

finished frames 1600000, mean/median reward 18.9/9.0, min/max reward 0.0/162.0


 16%|█▌        | 20104/125000 [16:05<1:20:48, 21.63it/s]

finished frames 1608000, mean/median reward 9.1/7.0, min/max reward 1.0/27.0


 16%|█▌        | 20204/125000 [16:09<1:16:23, 22.86it/s]

finished frames 1616000, mean/median reward 10.7/9.0, min/max reward 0.0/24.0


 16%|█▌        | 20303/125000 [16:14<1:21:58, 21.28it/s]

finished frames 1624000, mean/median reward 27.6/8.0, min/max reward 1.0/145.0


 16%|█▋        | 20405/125000 [16:19<1:14:08, 23.51it/s]

finished frames 1632000, mean/median reward 11.1/8.0, min/max reward 2.0/29.0


 16%|█▋        | 20503/125000 [16:23<1:22:16, 21.17it/s]

finished frames 1640000, mean/median reward 18.3/12.0, min/max reward 1.0/76.0


 16%|█▋        | 20603/125000 [16:30<1:44:08, 16.71it/s]

finished frames 1648000, mean/median reward 7.6/5.0, min/max reward 0.0/18.0


 17%|█▋        | 20705/125000 [16:35<1:22:03, 21.18it/s]

finished frames 1656000, mean/median reward 11.9/7.0, min/max reward 0.0/42.0


 17%|█▋        | 20804/125000 [16:39<1:15:30, 23.00it/s]

finished frames 1664000, mean/median reward 25.2/11.0, min/max reward 1.0/208.0


 17%|█▋        | 20903/125000 [16:44<1:11:45, 24.18it/s]

finished frames 1672000, mean/median reward 13.6/7.0, min/max reward 1.0/82.0


 17%|█▋        | 21002/125000 [16:48<1:27:52, 19.72it/s]

finished frames 1680000, mean/median reward 12.4/9.0, min/max reward 0.0/27.0


 17%|█▋        | 21105/125000 [16:54<1:23:33, 20.72it/s]

finished frames 1688000, mean/median reward 26.6/19.0, min/max reward 1.0/98.0


 17%|█▋        | 21204/125000 [16:58<1:15:14, 22.99it/s]

finished frames 1696000, mean/median reward 23.0/11.0, min/max reward 0.0/138.0


 17%|█▋        | 21303/125000 [17:03<1:21:04, 21.32it/s]

finished frames 1704000, mean/median reward 13.7/11.0, min/max reward 2.0/35.0


 17%|█▋        | 21405/125000 [17:07<1:17:55, 22.16it/s]

finished frames 1712000, mean/median reward 14.3/7.0, min/max reward 1.0/40.0


 17%|█▋        | 21504/125000 [17:12<1:16:14, 22.63it/s]

finished frames 1720000, mean/median reward 9.3/6.0, min/max reward 0.0/20.0


 17%|█▋        | 21603/125000 [17:16<1:20:51, 21.31it/s]

finished frames 1728000, mean/median reward 10.9/8.0, min/max reward 1.0/30.0


 17%|█▋        | 21705/125000 [17:21<1:13:12, 23.51it/s]

finished frames 1736000, mean/median reward 10.2/8.0, min/max reward 0.0/25.0


 17%|█▋        | 21804/125000 [17:26<1:24:02, 20.47it/s]

finished frames 1744000, mean/median reward 13.2/10.0, min/max reward 4.0/40.0


 18%|█▊        | 21903/125000 [17:30<1:16:52, 22.35it/s]

finished frames 1752000, mean/median reward 9.3/5.0, min/max reward 1.0/49.0


 18%|█▊        | 22004/125000 [17:35<1:27:11, 19.69it/s]

finished frames 1760000, mean/median reward 7.3/5.0, min/max reward 0.0/16.0


 18%|█▊        | 22104/125000 [17:39<1:10:37, 24.28it/s]

finished frames 1768000, mean/median reward 10.4/5.0, min/max reward 0.0/41.0


 18%|█▊        | 22203/125000 [17:44<1:21:13, 21.09it/s]

finished frames 1776000, mean/median reward 13.5/13.0, min/max reward 1.0/29.0


 18%|█▊        | 22305/125000 [17:48<1:13:36, 23.25it/s]

finished frames 1784000, mean/median reward 25.2/13.0, min/max reward 0.0/218.0


 18%|█▊        | 22404/125000 [17:53<1:11:00, 24.08it/s]

finished frames 1792000, mean/median reward 17.8/11.0, min/max reward 0.0/83.0


 18%|█▊        | 22503/125000 [17:57<1:15:15, 22.70it/s]

finished frames 1800000, mean/median reward 14.8/11.0, min/max reward 3.0/42.0


 18%|█▊        | 22605/125000 [18:01<1:10:09, 24.32it/s]

finished frames 1808000, mean/median reward 17.6/13.0, min/max reward 0.0/56.0


 18%|█▊        | 22704/125000 [18:05<1:16:51, 22.18it/s]

finished frames 1816000, mean/median reward 18.4/11.0, min/max reward 1.0/60.0


 18%|█▊        | 22803/125000 [18:10<1:12:42, 23.42it/s]

finished frames 1824000, mean/median reward 13.8/11.0, min/max reward 0.0/44.0


 18%|█▊        | 22905/125000 [18:14<1:15:19, 22.59it/s]

finished frames 1832000, mean/median reward 7.9/5.0, min/max reward 0.0/18.0


 18%|█▊        | 23004/125000 [18:18<1:09:04, 24.61it/s]

finished frames 1840000, mean/median reward 14.2/13.0, min/max reward 0.0/44.0


 18%|█▊        | 23106/125000 [18:23<1:08:55, 24.64it/s]

finished frames 1848000, mean/median reward 15.1/7.0, min/max reward 3.0/79.0


 19%|█▊        | 23205/125000 [18:27<1:10:09, 24.18it/s]

finished frames 1856000, mean/median reward 9.9/6.0, min/max reward 1.0/41.0


 19%|█▊        | 23305/125000 [18:32<1:28:02, 19.25it/s]

finished frames 1864000, mean/median reward 9.8/4.0, min/max reward 0.0/43.0


 19%|█▊        | 23404/125000 [18:36<1:11:29, 23.69it/s]

finished frames 1872000, mean/median reward 9.6/5.0, min/max reward 0.0/46.0


 19%|█▉        | 23503/125000 [18:41<1:15:01, 22.55it/s]

finished frames 1880000, mean/median reward 9.0/5.0, min/max reward 0.0/33.0


 19%|█▉        | 23605/125000 [18:45<1:12:16, 23.38it/s]

finished frames 1888000, mean/median reward 14.4/9.0, min/max reward 0.0/76.0


 19%|█▉        | 23704/125000 [18:49<1:15:16, 22.43it/s]

finished frames 1896000, mean/median reward 5.9/4.0, min/max reward 2.0/16.0


 19%|█▉        | 23803/125000 [18:54<1:10:06, 24.06it/s]

finished frames 1904000, mean/median reward 10.4/8.0, min/max reward 1.0/34.0


 19%|█▉        | 23905/125000 [18:58<1:15:06, 22.43it/s]

finished frames 1912000, mean/median reward 34.2/7.0, min/max reward 1.0/238.0


 19%|█▉        | 24004/125000 [19:04<1:42:22, 16.44it/s]

finished frames 1920000, mean/median reward 30.1/10.0, min/max reward 0.0/191.0


 19%|█▉        | 24105/125000 [19:09<1:15:34, 22.25it/s]

finished frames 1928000, mean/median reward 15.6/7.0, min/max reward 0.0/140.0


 19%|█▉        | 24204/125000 [19:14<1:11:04, 23.63it/s]

finished frames 1936000, mean/median reward 42.0/13.0, min/max reward 3.0/230.0


 19%|█▉        | 24303/125000 [19:18<1:14:50, 22.43it/s]

finished frames 1944000, mean/median reward 34.4/13.0, min/max reward 1.0/230.0


 20%|█▉        | 24405/125000 [19:22<1:08:23, 24.51it/s]

finished frames 1952000, mean/median reward 25.3/18.0, min/max reward 0.0/140.0


 20%|█▉        | 24504/125000 [19:26<1:05:54, 25.42it/s]

finished frames 1960000, mean/median reward 29.6/14.0, min/max reward 1.0/140.0


 20%|█▉        | 24603/125000 [19:31<1:09:15, 24.16it/s]

finished frames 1968000, mean/median reward 13.4/11.0, min/max reward 0.0/37.0


 20%|█▉        | 24703/125000 [19:36<1:38:04, 17.04it/s]

finished frames 1976000, mean/median reward 18.4/5.0, min/max reward 1.0/187.0


 20%|█▉        | 24805/125000 [19:40<1:11:12, 23.45it/s]

finished frames 1984000, mean/median reward 24.9/11.0, min/max reward 1.0/187.0


 20%|█▉        | 24904/125000 [19:45<1:15:06, 22.21it/s]

finished frames 1992000, mean/median reward 15.2/10.0, min/max reward 0.0/48.0


 20%|██        | 25003/125000 [19:49<1:09:09, 24.10it/s]

finished frames 2000000, mean/median reward 13.2/9.0, min/max reward 2.0/33.0


 20%|██        | 25105/125000 [19:53<1:14:31, 22.34it/s]

finished frames 2008000, mean/median reward 19.6/14.0, min/max reward 5.0/89.0


 20%|██        | 25204/125000 [19:58<1:08:50, 24.16it/s]

finished frames 2016000, mean/median reward 22.1/16.0, min/max reward 0.0/124.0


 20%|██        | 25303/125000 [20:02<1:14:49, 22.21it/s]

finished frames 2024000, mean/median reward 14.5/11.0, min/max reward 2.0/44.0


 20%|██        | 25405/125000 [20:06<1:08:34, 24.21it/s]

finished frames 2032000, mean/median reward 15.4/10.0, min/max reward 2.0/43.0


 20%|██        | 25504/125000 [20:11<1:13:52, 22.44it/s]

finished frames 2040000, mean/median reward 13.8/9.0, min/max reward 1.0/43.0


 20%|██        | 25603/125000 [20:16<1:39:41, 16.62it/s]

finished frames 2048000, mean/median reward 11.9/5.0, min/max reward 0.0/46.0


 21%|██        | 25705/125000 [20:22<1:14:54, 22.09it/s]

finished frames 2056000, mean/median reward 5.8/4.0, min/max reward 0.0/13.0


 21%|██        | 25804/125000 [20:26<1:07:46, 24.39it/s]

finished frames 2064000, mean/median reward 15.4/8.0, min/max reward 2.0/45.0


 21%|██        | 25903/125000 [20:30<1:16:28, 21.60it/s]

finished frames 2072000, mean/median reward 31.6/12.0, min/max reward 4.0/268.0


 21%|██        | 26004/125000 [20:35<1:07:33, 24.42it/s]

finished frames 2080000, mean/median reward 8.1/4.0, min/max reward 0.0/27.0


 21%|██        | 26103/125000 [20:39<1:14:05, 22.24it/s]

finished frames 2088000, mean/median reward 14.2/8.0, min/max reward 0.0/55.0


 21%|██        | 26205/125000 [20:44<1:07:35, 24.36it/s]

finished frames 2096000, mean/median reward 17.9/15.0, min/max reward 5.0/47.0


 21%|██        | 26304/125000 [20:48<1:10:34, 23.31it/s]

finished frames 2104000, mean/median reward 20.2/16.0, min/max reward 0.0/55.0


 21%|██        | 26403/125000 [20:52<1:11:45, 22.90it/s]

finished frames 2112000, mean/median reward 10.6/8.0, min/max reward 0.0/32.0


 21%|██        | 26505/125000 [20:57<1:11:50, 22.85it/s]

finished frames 2120000, mean/median reward 15.3/10.0, min/max reward 2.0/56.0


 21%|██▏       | 26604/125000 [21:01<1:13:15, 22.39it/s]

finished frames 2128000, mean/median reward 11.2/6.0, min/max reward 0.0/45.0


 21%|██▏       | 26703/125000 [21:05<1:08:00, 24.09it/s]

finished frames 2136000, mean/median reward 8.4/4.0, min/max reward 0.0/23.0


 21%|██▏       | 26804/125000 [21:10<1:48:18, 15.11it/s]

finished frames 2144000, mean/median reward 17.0/10.0, min/max reward 2.0/54.0


 22%|██▏       | 26905/125000 [21:15<1:17:03, 21.22it/s]

finished frames 2152000, mean/median reward 20.8/15.0, min/max reward 3.0/80.0


 22%|██▏       | 27004/125000 [21:19<1:06:56, 24.40it/s]

finished frames 2160000, mean/median reward 27.9/15.0, min/max reward 2.0/120.0


 22%|██▏       | 27103/125000 [21:24<1:12:49, 22.41it/s]

finished frames 2168000, mean/median reward 14.9/8.0, min/max reward 1.0/80.0


 22%|██▏       | 27205/125000 [21:28<1:11:32, 22.78it/s]

finished frames 2176000, mean/median reward 11.7/4.0, min/max reward 0.0/80.0


 22%|██▏       | 27304/125000 [21:33<1:07:54, 23.98it/s]

finished frames 2184000, mean/median reward 18.6/10.0, min/max reward 0.0/82.0


 22%|██▏       | 27406/125000 [21:37<1:05:27, 24.85it/s]

finished frames 2192000, mean/median reward 8.4/4.0, min/max reward 1.0/29.0


 22%|██▏       | 27505/125000 [21:41<1:06:08, 24.57it/s]

finished frames 2200000, mean/median reward 14.6/7.0, min/max reward 2.0/64.0


 22%|██▏       | 27604/125000 [21:45<1:36:01, 16.91it/s]

finished frames 2208000, mean/median reward 11.1/8.0, min/max reward 0.0/49.0


 22%|██▏       | 27703/125000 [21:51<1:13:22, 22.10it/s]

finished frames 2216000, mean/median reward 8.8/4.0, min/max reward 0.0/42.0


 22%|██▏       | 27805/125000 [21:55<1:06:47, 24.26it/s]

finished frames 2224000, mean/median reward 18.0/4.0, min/max reward 1.0/139.0


 22%|██▏       | 27904/125000 [22:00<1:14:29, 21.72it/s]

finished frames 2232000, mean/median reward 24.3/15.0, min/max reward 4.0/72.0


 22%|██▏       | 28003/125000 [22:04<1:06:44, 24.22it/s]

finished frames 2240000, mean/median reward 23.6/15.0, min/max reward 2.0/116.0


 22%|██▏       | 28105/125000 [22:08<1:11:51, 22.47it/s]

finished frames 2248000, mean/median reward 15.4/13.0, min/max reward 3.0/35.0


 23%|██▎       | 28204/125000 [22:12<1:09:45, 23.12it/s]

finished frames 2256000, mean/median reward 32.9/19.0, min/max reward 2.0/147.0


 23%|██▎       | 28304/125000 [22:17<1:41:57, 15.81it/s]

finished frames 2264000, mean/median reward 13.4/8.0, min/max reward 0.0/56.0


 23%|██▎       | 28405/125000 [22:22<1:04:07, 25.11it/s]

finished frames 2272000, mean/median reward 15.9/11.0, min/max reward 2.0/39.0


 23%|██▎       | 28504/125000 [22:26<1:07:42, 23.75it/s]

finished frames 2280000, mean/median reward 15.4/9.0, min/max reward 5.0/53.0


 23%|██▎       | 28606/125000 [22:30<1:02:44, 25.61it/s]

finished frames 2288000, mean/median reward 21.4/10.0, min/max reward 1.0/190.0


 23%|██▎       | 28705/125000 [22:34<1:03:05, 25.43it/s]

finished frames 2296000, mean/median reward 38.2/8.0, min/max reward 0.0/229.0


 23%|██▎       | 28804/125000 [22:38<1:07:36, 23.72it/s]

finished frames 2304000, mean/median reward 30.0/14.0, min/max reward 3.0/190.0


 23%|██▎       | 28903/125000 [22:43<1:23:02, 19.29it/s]

finished frames 2312000, mean/median reward 31.0/12.0, min/max reward 2.0/190.0


 23%|██▎       | 29003/125000 [22:49<1:39:47, 16.03it/s]

finished frames 2320000, mean/median reward 15.1/12.0, min/max reward 1.0/60.0


 23%|██▎       | 29103/125000 [22:56<1:35:24, 16.75it/s]

finished frames 2328000, mean/median reward 19.8/14.0, min/max reward 0.0/71.0


 23%|██▎       | 29203/125000 [23:02<1:33:30, 17.07it/s]

finished frames 2336000, mean/median reward 19.9/12.0, min/max reward 0.0/71.0


 23%|██▎       | 29303/125000 [23:08<1:36:29, 16.53it/s]

finished frames 2344000, mean/median reward 22.8/4.0, min/max reward 0.0/112.0


 24%|██▎       | 29403/125000 [23:14<1:34:55, 16.79it/s]

finished frames 2352000, mean/median reward 14.8/7.0, min/max reward 0.0/45.0


 24%|██▎       | 29503/125000 [23:20<1:43:02, 15.45it/s]

finished frames 2360000, mean/median reward 12.5/6.0, min/max reward 1.0/77.0


 24%|██▎       | 29603/125000 [23:26<1:04:51, 24.52it/s]

finished frames 2368000, mean/median reward 18.3/12.0, min/max reward 0.0/61.0


 24%|██▍       | 29705/125000 [23:30<1:04:44, 24.53it/s]

finished frames 2376000, mean/median reward 9.4/7.0, min/max reward 1.0/42.0


 24%|██▍       | 29804/125000 [23:34<1:06:53, 23.72it/s]

finished frames 2384000, mean/median reward 21.1/13.0, min/max reward 3.0/60.0


 24%|██▍       | 29903/125000 [23:38<1:08:28, 23.15it/s]

finished frames 2392000, mean/median reward 10.4/4.0, min/max reward 0.0/36.0


 24%|██▍       | 30005/125000 [23:42<1:02:21, 25.39it/s]

finished frames 2400000, mean/median reward 13.4/13.0, min/max reward 0.0/27.0


 24%|██▍       | 30104/125000 [23:46<1:06:33, 23.76it/s]

finished frames 2408000, mean/median reward 20.8/14.0, min/max reward 4.0/58.0


 24%|██▍       | 30203/125000 [23:50<1:15:16, 20.99it/s]

finished frames 2416000, mean/median reward 12.1/5.0, min/max reward 0.0/58.0


 24%|██▍       | 30304/125000 [23:57<1:36:00, 16.44it/s]

finished frames 2424000, mean/median reward 1.3/0.0, min/max reward 0.0/9.0


 24%|██▍       | 30404/125000 [24:03<1:29:56, 17.53it/s]

finished frames 2432000, mean/median reward 10.8/12.0, min/max reward 0.0/20.0


 24%|██▍       | 30504/125000 [24:08<1:33:25, 16.86it/s]

finished frames 2440000, mean/median reward 16.0/16.0, min/max reward 4.0/39.0


 24%|██▍       | 30604/125000 [24:14<1:28:39, 17.74it/s]

finished frames 2448000, mean/median reward 16.1/7.0, min/max reward 1.0/85.0


 25%|██▍       | 30704/125000 [24:20<1:30:11, 17.42it/s]

finished frames 2456000, mean/median reward 22.4/20.0, min/max reward 1.0/59.0


 25%|██▍       | 30806/125000 [24:25<1:04:37, 24.29it/s]

finished frames 2464000, mean/median reward 20.9/14.0, min/max reward 1.0/70.0


 25%|██▍       | 30905/125000 [24:30<1:06:04, 23.73it/s]

finished frames 2472000, mean/median reward 7.6/8.0, min/max reward 0.0/17.0


 25%|██▍       | 31004/125000 [24:34<1:03:05, 24.83it/s]

finished frames 2480000, mean/median reward 16.4/11.0, min/max reward 0.0/48.0


 25%|██▍       | 31106/125000 [24:38<1:05:42, 23.82it/s]

finished frames 2488000, mean/median reward 17.3/9.0, min/max reward 2.0/84.0


 25%|██▍       | 31205/125000 [24:42<1:07:19, 23.22it/s]

finished frames 2496000, mean/median reward 19.0/9.0, min/max reward 0.0/84.0


 25%|██▌       | 31304/125000 [24:46<1:06:40, 23.42it/s]

finished frames 2504000, mean/median reward 18.9/10.0, min/max reward 0.0/53.0


 25%|██▌       | 31403/125000 [24:51<1:35:35, 16.32it/s]

finished frames 2512000, mean/median reward 8.4/7.0, min/max reward 0.0/32.0


 25%|██▌       | 31503/125000 [24:57<1:28:55, 17.52it/s]

finished frames 2520000, mean/median reward 15.6/12.0, min/max reward 2.0/68.0


 25%|██▌       | 31603/125000 [25:03<1:28:36, 17.57it/s]

finished frames 2528000, mean/median reward 11.0/7.0, min/max reward 1.0/42.0


 25%|██▌       | 31703/125000 [25:09<1:32:44, 16.77it/s]

finished frames 2536000, mean/median reward 18.6/11.0, min/max reward 4.0/82.0


 25%|██▌       | 31803/125000 [25:15<1:43:02, 15.07it/s]

finished frames 2544000, mean/median reward 41.6/21.0, min/max reward 0.0/185.0


 26%|██▌       | 31903/125000 [25:21<1:36:28, 16.08it/s]

finished frames 2552000, mean/median reward 33.6/19.0, min/max reward 2.0/185.0


 26%|██▌       | 32003/125000 [25:28<1:33:22, 16.60it/s]

finished frames 2560000, mean/median reward 30.5/13.0, min/max reward 2.0/150.0


 26%|██▌       | 32105/125000 [25:33<1:06:02, 23.44it/s]

finished frames 2568000, mean/median reward 24.3/10.0, min/max reward 0.0/121.0


 26%|██▌       | 32204/125000 [25:38<1:14:09, 20.85it/s]

finished frames 2576000, mean/median reward 23.4/10.0, min/max reward 3.0/151.0


 26%|██▌       | 32303/125000 [25:42<1:02:32, 24.70it/s]

finished frames 2584000, mean/median reward 16.8/15.0, min/max reward 2.0/57.0


 26%|██▌       | 32405/125000 [25:46<1:02:47, 24.58it/s]

finished frames 2592000, mean/median reward 24.2/19.0, min/max reward 0.0/67.0


 26%|██▌       | 32504/125000 [25:50<1:03:32, 24.26it/s]

finished frames 2600000, mean/median reward 43.1/22.0, min/max reward 3.0/159.0


 26%|██▌       | 32603/125000 [25:54<1:03:48, 24.13it/s]

finished frames 2608000, mean/median reward 17.5/11.0, min/max reward 2.0/47.0


 26%|██▌       | 32704/125000 [25:59<1:31:31, 16.81it/s]

finished frames 2616000, mean/median reward 12.8/7.0, min/max reward 0.0/41.0


 26%|██▌       | 32804/125000 [26:05<1:29:26, 17.18it/s]

finished frames 2624000, mean/median reward 16.6/10.0, min/max reward 0.0/55.0


 26%|██▋       | 32904/125000 [26:11<1:26:30, 17.74it/s]

finished frames 2632000, mean/median reward 20.3/10.0, min/max reward 0.0/58.0


 26%|██▋       | 33004/125000 [26:17<1:26:45, 17.67it/s]

finished frames 2640000, mean/median reward 23.5/5.0, min/max reward 0.0/212.0


 26%|██▋       | 33104/125000 [26:23<1:30:59, 16.83it/s]

finished frames 2648000, mean/median reward 41.2/10.0, min/max reward 0.0/212.0


 27%|██▋       | 33204/125000 [26:29<1:28:35, 17.27it/s]

finished frames 2656000, mean/median reward 28.6/9.0, min/max reward 0.0/212.0


 27%|██▋       | 33304/125000 [26:35<1:28:06, 17.34it/s]

finished frames 2664000, mean/median reward 34.3/13.0, min/max reward 1.0/212.0


 27%|██▋       | 33405/125000 [26:40<1:02:50, 24.30it/s]

finished frames 2672000, mean/median reward 35.1/13.0, min/max reward 1.0/212.0


 27%|██▋       | 33504/125000 [26:44<1:00:33, 25.18it/s]

finished frames 2680000, mean/median reward 18.7/20.0, min/max reward 0.0/58.0


 27%|██▋       | 33603/125000 [26:48<1:03:08, 24.13it/s]

finished frames 2688000, mean/median reward 13.1/8.0, min/max reward 2.0/49.0


 27%|██▋       | 33705/125000 [26:52<1:11:05, 21.40it/s]

finished frames 2696000, mean/median reward 19.6/15.0, min/max reward 1.0/63.0


 27%|██▋       | 33804/125000 [26:56<1:00:59, 24.92it/s]

finished frames 2704000, mean/median reward 17.9/8.0, min/max reward 0.0/132.0


 27%|██▋       | 33903/125000 [27:01<1:02:08, 24.44it/s]

finished frames 2712000, mean/median reward 25.4/14.0, min/max reward 0.0/134.0


 27%|██▋       | 34003/125000 [27:06<1:29:59, 16.85it/s]

finished frames 2720000, mean/median reward 30.8/18.0, min/max reward 5.0/134.0


 27%|██▋       | 34103/125000 [27:12<1:28:19, 17.15it/s]

finished frames 2728000, mean/median reward 33.0/20.0, min/max reward 3.0/173.0


 27%|██▋       | 34203/125000 [27:18<1:24:33, 17.90it/s]

finished frames 2736000, mean/median reward 11.2/6.0, min/max reward 0.0/39.0


 27%|██▋       | 34303/125000 [27:24<1:42:30, 14.75it/s]

finished frames 2744000, mean/median reward 9.6/6.0, min/max reward 0.0/29.0


 28%|██▊       | 34403/125000 [27:30<1:49:11, 13.83it/s]

finished frames 2752000, mean/median reward 17.2/13.0, min/max reward 0.0/47.0


 28%|██▊       | 34503/125000 [27:37<1:28:50, 16.98it/s]

finished frames 2760000, mean/median reward 16.4/7.0, min/max reward 4.0/65.0


 28%|██▊       | 34603/125000 [27:43<1:36:38, 15.59it/s]

finished frames 2768000, mean/median reward 18.9/12.0, min/max reward 0.0/62.0


 28%|██▊       | 34703/125000 [27:49<1:32:27, 16.28it/s]

finished frames 2776000, mean/median reward 12.8/5.0, min/max reward 0.0/61.0


 28%|██▊       | 34803/125000 [27:55<1:27:39, 17.15it/s]

finished frames 2784000, mean/median reward 9.4/5.0, min/max reward 0.0/29.0


 28%|██▊       | 34904/125000 [27:59<1:01:12, 24.54it/s]

finished frames 2792000, mean/median reward 20.7/19.0, min/max reward 0.0/49.0


 28%|██▊       | 35006/125000 [28:04<1:00:05, 24.96it/s]

finished frames 2800000, mean/median reward 24.0/16.0, min/max reward 0.0/92.0


 28%|██▊       | 35105/125000 [28:08<1:01:09, 24.50it/s]

finished frames 2808000, mean/median reward 13.0/9.0, min/max reward 0.0/77.0


 28%|██▊       | 35204/125000 [28:12<1:05:26, 22.87it/s]

finished frames 2816000, mean/median reward 21.6/11.0, min/max reward 1.0/126.0


 28%|██▊       | 35306/125000 [28:17<1:03:00, 23.72it/s]

finished frames 2824000, mean/median reward 15.2/7.0, min/max reward 1.0/58.0


 28%|██▊       | 35402/125000 [28:21<1:13:22, 20.35it/s]

finished frames 2832000, mean/median reward 36.1/16.0, min/max reward 2.0/196.0


 28%|██▊       | 35503/125000 [28:27<1:27:24, 17.06it/s]

finished frames 2840000, mean/median reward 12.0/0.0, min/max reward 0.0/56.0


 28%|██▊       | 35603/125000 [28:33<1:25:02, 17.52it/s]

finished frames 2848000, mean/median reward 25.4/12.0, min/max reward 0.0/148.0


 29%|██▊       | 35704/125000 [28:37<59:25, 25.04it/s]  

finished frames 2856000, mean/median reward 12.0/9.0, min/max reward 0.0/30.0


 29%|██▊       | 35803/125000 [28:41<1:02:50, 23.65it/s]

finished frames 2864000, mean/median reward 36.6/24.0, min/max reward 4.0/252.0


 29%|██▊       | 35905/125000 [28:46<1:06:00, 22.49it/s]

finished frames 2872000, mean/median reward 24.6/15.0, min/max reward 0.0/81.0


 29%|██▉       | 36004/125000 [28:50<1:06:13, 22.40it/s]

finished frames 2880000, mean/median reward 12.6/7.0, min/max reward 0.0/57.0


 29%|██▉       | 36103/125000 [28:54<1:06:06, 22.41it/s]

finished frames 2888000, mean/median reward 15.9/6.0, min/max reward 0.0/65.0


 29%|██▉       | 36203/125000 [29:00<1:29:51, 16.47it/s]

finished frames 2896000, mean/median reward 15.2/4.0, min/max reward 0.0/75.0


 29%|██▉       | 36303/125000 [29:06<1:27:42, 16.86it/s]

finished frames 2904000, mean/median reward 9.8/6.0, min/max reward 1.0/22.0


 29%|██▉       | 36403/125000 [29:12<1:27:50, 16.81it/s]

finished frames 2912000, mean/median reward 22.7/10.0, min/max reward 0.0/91.0


 29%|██▉       | 36503/125000 [29:18<1:24:27, 17.46it/s]

finished frames 2920000, mean/median reward 8.4/6.0, min/max reward 0.0/25.0


 29%|██▉       | 36603/125000 [29:24<1:31:16, 16.14it/s]

finished frames 2928000, mean/median reward 32.7/9.0, min/max reward 0.0/232.0


 29%|██▉       | 36703/125000 [29:29<1:21:55, 17.96it/s]

finished frames 2936000, mean/median reward 15.0/10.0, min/max reward 0.0/50.0


 29%|██▉       | 36803/125000 [29:35<1:22:34, 17.80it/s]

finished frames 2944000, mean/median reward 28.4/13.0, min/max reward 1.0/196.0


 30%|██▉       | 36904/125000 [29:40<58:49, 24.96it/s]  

finished frames 2952000, mean/median reward 22.9/9.0, min/max reward 0.0/196.0


 30%|██▉       | 37003/125000 [29:44<1:02:16, 23.55it/s]

finished frames 2960000, mean/median reward 27.2/11.0, min/max reward 0.0/196.0


 30%|██▉       | 37105/125000 [29:48<59:38, 24.56it/s]  

finished frames 2968000, mean/median reward 29.9/12.0, min/max reward 0.0/196.0


 30%|██▉       | 37204/125000 [29:52<59:03, 24.78it/s]  

finished frames 2976000, mean/median reward 26.2/12.0, min/max reward 0.0/196.0


 30%|██▉       | 37306/125000 [29:56<58:08, 25.14it/s]  

finished frames 2984000, mean/median reward 27.6/11.0, min/max reward 0.0/196.0


 30%|██▉       | 37405/125000 [30:00<1:01:44, 23.64it/s]

finished frames 2992000, mean/median reward 28.6/13.0, min/max reward 0.0/223.0


 30%|███       | 37504/125000 [30:06<1:22:13, 17.73it/s]

finished frames 3000000, mean/median reward 13.3/11.0, min/max reward 0.0/34.0


 30%|███       | 37604/125000 [30:11<1:22:16, 17.71it/s]

finished frames 3008000, mean/median reward 12.0/8.0, min/max reward 0.0/39.0


 30%|███       | 37704/125000 [30:17<1:26:30, 16.82it/s]

finished frames 3016000, mean/median reward 15.1/11.0, min/max reward 0.0/47.0


 30%|███       | 37804/125000 [30:23<1:21:13, 17.89it/s]

finished frames 3024000, mean/median reward 32.6/20.0, min/max reward 0.0/79.0


 30%|███       | 37904/125000 [30:29<1:22:41, 17.55it/s]

finished frames 3032000, mean/median reward 33.4/22.0, min/max reward 0.0/127.0


 30%|███       | 38004/125000 [30:35<1:24:18, 17.20it/s]

finished frames 3040000, mean/median reward 23.2/13.0, min/max reward 1.0/112.0


 30%|███       | 38104/125000 [30:39<57:16, 25.28it/s]  

finished frames 3048000, mean/median reward 22.9/19.0, min/max reward 4.0/95.0


 31%|███       | 38206/125000 [30:44<57:14, 25.27it/s]  

finished frames 3056000, mean/median reward 17.9/6.0, min/max reward 1.0/84.0


 31%|███       | 38305/125000 [30:48<57:18, 25.21it/s]  

finished frames 3064000, mean/median reward 29.8/17.0, min/max reward 0.0/158.0


 31%|███       | 38404/125000 [30:52<58:37, 24.62it/s]  

finished frames 3072000, mean/median reward 19.6/9.0, min/max reward 2.0/100.0


 31%|███       | 38506/125000 [30:56<57:37, 25.02it/s]  

finished frames 3080000, mean/median reward 19.2/12.0, min/max reward 2.0/71.0


 31%|███       | 38605/125000 [31:00<1:00:06, 23.95it/s]

finished frames 3088000, mean/median reward 16.4/8.0, min/max reward 0.0/77.0


 31%|███       | 38704/125000 [31:05<1:20:08, 17.94it/s]

finished frames 3096000, mean/median reward 17.2/14.0, min/max reward 0.0/45.0


 31%|███       | 38805/125000 [31:10<58:14, 24.66it/s]  

finished frames 3104000, mean/median reward 36.8/22.0, min/max reward 1.0/155.0


 31%|███       | 38904/125000 [31:14<57:07, 25.12it/s]  

finished frames 3112000, mean/median reward 28.5/10.0, min/max reward 1.0/179.0


 31%|███       | 39006/125000 [31:18<58:10, 24.63it/s]  

finished frames 3120000, mean/median reward 23.4/15.0, min/max reward 0.0/102.0


 31%|███▏      | 39105/125000 [31:23<57:23, 24.95it/s]  

finished frames 3128000, mean/median reward 24.0/13.0, min/max reward 2.0/68.0


 31%|███▏      | 39204/125000 [31:27<59:22, 24.08it/s]  

finished frames 3136000, mean/median reward 32.8/18.0, min/max reward 2.0/105.0


 31%|███▏      | 39303/125000 [31:31<59:41, 23.93it/s]  

finished frames 3144000, mean/median reward 13.7/6.0, min/max reward 0.0/92.0


 32%|███▏      | 39404/125000 [31:36<1:29:24, 15.96it/s]

finished frames 3152000, mean/median reward 3.9/0.0, min/max reward 0.0/31.0


 32%|███▏      | 39504/125000 [31:42<1:21:24, 17.50it/s]

finished frames 3160000, mean/median reward 1.9/2.0, min/max reward 0.0/5.0


 32%|███▏      | 39604/125000 [31:48<1:31:25, 15.57it/s]

finished frames 3168000, mean/median reward 8.0/6.0, min/max reward 0.0/22.0


 32%|███▏      | 39704/125000 [31:54<1:22:45, 17.18it/s]

finished frames 3176000, mean/median reward 27.2/12.0, min/max reward 1.0/223.0


 32%|███▏      | 39804/125000 [32:00<1:21:02, 17.52it/s]

finished frames 3184000, mean/median reward 14.4/10.0, min/max reward 2.0/45.0


 32%|███▏      | 39904/125000 [32:05<1:31:45, 15.46it/s]

finished frames 3192000, mean/median reward 23.1/7.0, min/max reward 1.0/106.0


 32%|███▏      | 40004/125000 [32:11<1:24:31, 16.76it/s]

finished frames 3200000, mean/median reward 25.0/9.0, min/max reward 1.0/74.0


 32%|███▏      | 40104/125000 [32:16<58:22, 24.24it/s]  

finished frames 3208000, mean/median reward 18.3/9.0, min/max reward 1.0/49.0


 32%|███▏      | 40203/125000 [32:20<57:26, 24.60it/s]  

finished frames 3216000, mean/median reward 18.3/13.0, min/max reward 0.0/70.0


 32%|███▏      | 40305/125000 [32:24<56:36, 24.93it/s]  

finished frames 3224000, mean/median reward 17.7/7.0, min/max reward 0.0/70.0


 32%|███▏      | 40404/125000 [32:28<55:17, 25.50it/s]  

finished frames 3232000, mean/median reward 18.6/8.0, min/max reward 4.0/61.0


 32%|███▏      | 40503/125000 [32:32<55:38, 25.31it/s]  

finished frames 3240000, mean/median reward 15.5/12.0, min/max reward 0.0/51.0


 32%|███▏      | 40605/125000 [32:36<56:56, 24.70it/s]  

finished frames 3248000, mean/median reward 20.6/15.0, min/max reward 0.0/61.0


 33%|███▎      | 40704/125000 [32:40<58:24, 24.06it/s]  

finished frames 3256000, mean/median reward 20.4/7.0, min/max reward 0.0/166.0


 33%|███▎      | 40805/125000 [32:46<1:07:13, 20.87it/s]

finished frames 3264000, mean/median reward 22.8/19.0, min/max reward 0.0/113.0


 33%|███▎      | 40904/125000 [32:51<1:00:58, 22.98it/s]

finished frames 3272000, mean/median reward 27.9/16.0, min/max reward 1.0/85.0


 33%|███▎      | 41003/125000 [32:55<1:04:55, 21.56it/s]

finished frames 3280000, mean/median reward 34.7/29.0, min/max reward 0.0/84.0


 33%|███▎      | 41105/125000 [33:00<1:00:39, 23.05it/s]

finished frames 3288000, mean/median reward 25.2/13.0, min/max reward 0.0/84.0


 33%|███▎      | 41204/125000 [33:04<1:05:01, 21.48it/s]

finished frames 3296000, mean/median reward 28.4/14.0, min/max reward 0.0/130.0


 33%|███▎      | 41303/125000 [33:10<1:18:45, 17.71it/s]

finished frames 3304000, mean/median reward 20.3/9.0, min/max reward 0.0/126.0


 33%|███▎      | 41405/125000 [33:15<1:01:48, 22.54it/s]

finished frames 3312000, mean/median reward 48.7/20.0, min/max reward 0.0/185.0


 33%|███▎      | 41504/125000 [33:19<56:26, 24.65it/s]  

finished frames 3320000, mean/median reward 19.9/11.0, min/max reward 1.0/64.0


 33%|███▎      | 41603/125000 [33:23<1:04:56, 21.40it/s]

finished frames 3328000, mean/median reward 18.1/16.0, min/max reward 4.0/64.0


 33%|███▎      | 41703/125000 [33:30<1:22:31, 16.82it/s]

finished frames 3336000, mean/median reward 31.4/16.0, min/max reward 4.0/195.0


 33%|███▎      | 41802/125000 [33:35<1:07:55, 20.41it/s]

finished frames 3344000, mean/median reward 26.4/19.0, min/max reward 2.0/64.0


 34%|███▎      | 41904/125000 [33:40<1:03:10, 21.92it/s]

finished frames 3352000, mean/median reward 26.5/15.0, min/max reward 0.0/95.0


 34%|███▎      | 42003/125000 [33:44<1:02:22, 22.18it/s]

finished frames 3360000, mean/median reward 15.5/6.0, min/max reward 1.0/60.0


 34%|███▎      | 42105/125000 [33:48<1:02:27, 22.12it/s]

finished frames 3368000, mean/median reward 17.7/11.0, min/max reward 2.0/54.0


 34%|███▍      | 42204/125000 [33:53<58:49, 23.46it/s]  

finished frames 3376000, mean/median reward 29.6/7.0, min/max reward 2.0/250.0


 34%|███▍      | 42303/125000 [33:57<58:47, 23.45it/s]  

finished frames 3384000, mean/median reward 15.0/12.0, min/max reward 2.0/39.0


 34%|███▍      | 42405/125000 [34:01<55:39, 24.73it/s]  

finished frames 3392000, mean/median reward 43.9/33.0, min/max reward 0.0/156.0


 34%|███▍      | 42503/125000 [34:06<1:16:27, 17.98it/s]

finished frames 3400000, mean/median reward 43.5/20.0, min/max reward 0.0/202.0


 34%|███▍      | 42604/125000 [34:11<1:02:32, 21.96it/s]

finished frames 3408000, mean/median reward 13.6/11.0, min/max reward 0.0/34.0


 34%|███▍      | 42703/125000 [34:15<55:06, 24.89it/s]  

finished frames 3416000, mean/median reward 30.4/11.0, min/max reward 4.0/208.0


 34%|███▍      | 42805/125000 [34:20<1:03:29, 21.58it/s]

finished frames 3424000, mean/median reward 26.2/12.0, min/max reward 4.0/193.0


 34%|███▍      | 42904/125000 [34:24<56:10, 24.35it/s]  

finished frames 3432000, mean/median reward 29.9/14.0, min/max reward 3.0/118.0


 34%|███▍      | 43003/125000 [34:28<1:00:29, 22.59it/s]

finished frames 3440000, mean/median reward 35.4/22.0, min/max reward 0.0/155.0


 34%|███▍      | 43103/125000 [34:35<1:37:18, 14.03it/s]

finished frames 3448000, mean/median reward 51.5/13.0, min/max reward 0.0/178.0


 35%|███▍      | 43205/125000 [34:41<1:05:06, 20.94it/s]

finished frames 3456000, mean/median reward 24.4/15.0, min/max reward 1.0/86.0


 35%|███▍      | 43303/125000 [34:45<1:05:35, 20.76it/s]

finished frames 3464000, mean/median reward 28.7/18.0, min/max reward 1.0/132.0


 35%|███▍      | 43404/125000 [34:49<59:48, 22.74it/s]  

finished frames 3472000, mean/median reward 34.8/18.0, min/max reward 1.0/168.0


 35%|███▍      | 43503/125000 [34:54<1:09:08, 19.64it/s]

finished frames 3480000, mean/median reward 23.8/9.0, min/max reward 1.0/106.0


 35%|███▍      | 43605/125000 [34:59<1:01:59, 21.89it/s]

finished frames 3488000, mean/median reward 28.0/17.0, min/max reward 2.0/110.0


 35%|███▍      | 43704/125000 [35:03<57:05, 23.74it/s]  

finished frames 3496000, mean/median reward 23.6/16.0, min/max reward 0.0/98.0


 35%|███▌      | 43803/125000 [35:08<58:33, 23.11it/s]  

finished frames 3504000, mean/median reward 21.2/7.0, min/max reward 0.0/152.0


 35%|███▌      | 43904/125000 [35:13<1:23:13, 16.24it/s]

finished frames 3512000, mean/median reward 17.7/8.0, min/max reward 1.0/81.0


 35%|███▌      | 44004/125000 [35:19<1:00:55, 22.16it/s]

finished frames 3520000, mean/median reward 55.1/13.0, min/max reward 0.0/275.0


 35%|███▌      | 44103/125000 [35:23<1:00:26, 22.31it/s]

finished frames 3528000, mean/median reward 43.3/13.0, min/max reward 0.0/149.0


 35%|███▌      | 44203/125000 [35:28<1:01:57, 21.74it/s]

finished frames 3536000, mean/median reward 35.2/7.0, min/max reward 0.0/203.0


 35%|███▌      | 44305/125000 [35:32<57:01, 23.58it/s]  

finished frames 3544000, mean/median reward 36.1/22.0, min/max reward 3.0/203.0


 36%|███▌      | 44404/125000 [35:36<53:25, 25.15it/s]

finished frames 3552000, mean/median reward 44.4/27.0, min/max reward 2.0/156.0


 36%|███▌      | 44503/125000 [35:40<1:00:55, 22.02it/s]

finished frames 3560000, mean/median reward 45.4/20.0, min/max reward 0.0/156.0


 36%|███▌      | 44604/125000 [35:46<1:02:23, 21.47it/s]

finished frames 3568000, mean/median reward 26.4/19.0, min/max reward 0.0/97.0


 36%|███▌      | 44706/125000 [35:50<55:09, 24.26it/s]  

finished frames 3576000, mean/median reward 36.6/21.0, min/max reward 0.0/213.0


 36%|███▌      | 44805/125000 [35:55<59:00, 22.65it/s]

finished frames 3584000, mean/median reward 18.9/11.0, min/max reward 0.0/45.0


 36%|███▌      | 44904/125000 [35:59<56:22, 23.68it/s]  

finished frames 3592000, mean/median reward 26.4/16.0, min/max reward 0.0/94.0


 36%|███▌      | 45003/125000 [36:03<59:33, 22.38it/s]  

finished frames 3600000, mean/median reward 23.1/17.0, min/max reward 1.0/84.0


 36%|███▌      | 45105/125000 [36:08<54:20, 24.51it/s]  

finished frames 3608000, mean/median reward 29.0/15.0, min/max reward 1.0/172.0


 36%|███▌      | 45204/125000 [36:12<58:56, 22.56it/s]

finished frames 3616000, mean/median reward 42.6/18.0, min/max reward 1.0/172.0


 36%|███▌      | 45303/125000 [36:16<54:40, 24.30it/s]

finished frames 3624000, mean/median reward 32.8/16.0, min/max reward 5.0/177.0


 36%|███▋      | 45405/125000 [36:20<1:01:10, 21.68it/s]

finished frames 3632000, mean/median reward 48.6/36.0, min/max reward 1.0/209.0


 36%|███▋      | 45504/125000 [36:25<53:58, 24.54it/s]  

finished frames 3640000, mean/median reward 28.4/10.0, min/max reward 0.0/209.0


 36%|███▋      | 45606/125000 [36:29<56:10, 23.56it/s]  

finished frames 3648000, mean/median reward 37.2/10.0, min/max reward 0.0/189.0


 37%|███▋      | 45702/125000 [36:33<59:53, 22.07it/s]

finished frames 3656000, mean/median reward 60.8/36.0, min/max reward 2.0/189.0


 37%|███▋      | 45804/125000 [36:40<1:29:55, 14.68it/s]

finished frames 3664000, mean/median reward 49.2/16.0, min/max reward 1.0/171.0


 37%|███▋      | 45906/125000 [36:44<53:26, 24.67it/s]  

finished frames 3672000, mean/median reward 48.5/13.0, min/max reward 1.0/184.0


 37%|███▋      | 46005/125000 [36:48<53:18, 24.70it/s]

finished frames 3680000, mean/median reward 41.2/15.0, min/max reward 0.0/220.0


 37%|███▋      | 46104/125000 [36:53<56:56, 23.09it/s]

finished frames 3688000, mean/median reward 24.0/11.0, min/max reward 1.0/124.0


 37%|███▋      | 46206/125000 [36:57<52:06, 25.20it/s]

finished frames 3696000, mean/median reward 49.2/14.0, min/max reward 4.0/271.0


 37%|███▋      | 46305/125000 [37:01<51:43, 25.36it/s]

finished frames 3704000, mean/median reward 68.6/25.0, min/max reward 0.0/294.0


 37%|███▋      | 46404/125000 [37:05<54:31, 24.02it/s]

finished frames 3712000, mean/median reward 33.6/22.0, min/max reward 1.0/118.0


 37%|███▋      | 46504/125000 [37:10<1:14:59, 17.45it/s]

finished frames 3720000, mean/median reward 30.9/12.0, min/max reward 0.0/160.0


 37%|███▋      | 46604/125000 [37:16<1:13:25, 17.80it/s]

finished frames 3728000, mean/median reward 45.6/12.0, min/max reward 0.0/264.0


 37%|███▋      | 46704/125000 [37:22<1:13:39, 17.72it/s]

finished frames 3736000, mean/median reward 18.3/8.0, min/max reward 0.0/123.0


 37%|███▋      | 46804/125000 [37:27<1:25:48, 15.19it/s]

finished frames 3744000, mean/median reward 39.6/22.0, min/max reward 0.0/201.0


 38%|███▊      | 46904/125000 [37:33<1:14:29, 17.47it/s]

finished frames 3752000, mean/median reward 2.9/2.0, min/max reward 0.0/12.0


 38%|███▊      | 47004/125000 [37:39<1:15:57, 17.11it/s]

finished frames 3760000, mean/median reward 6.3/4.0, min/max reward 1.0/21.0


 38%|███▊      | 47104/125000 [37:45<1:15:59, 17.08it/s]

finished frames 3768000, mean/median reward 27.5/11.0, min/max reward 2.0/225.0


 38%|███▊      | 47204/125000 [37:51<1:14:33, 17.39it/s]

finished frames 3776000, mean/median reward 42.0/18.0, min/max reward 2.0/277.0


 38%|███▊      | 47304/125000 [37:57<1:12:18, 17.91it/s]

finished frames 3784000, mean/median reward 19.1/11.0, min/max reward 0.0/98.0


 38%|███▊      | 47405/125000 [38:01<51:23, 25.16it/s]  

finished frames 3792000, mean/median reward 23.1/23.0, min/max reward 0.0/83.0


 38%|███▊      | 47504/125000 [38:05<52:23, 24.65it/s]

finished frames 3800000, mean/median reward 47.4/20.0, min/max reward 0.0/183.0


 38%|███▊      | 47606/125000 [38:09<52:27, 24.59it/s]

finished frames 3808000, mean/median reward 63.8/29.0, min/max reward 0.0/293.0


 38%|███▊      | 47705/125000 [38:13<50:39, 25.43it/s]

finished frames 3816000, mean/median reward 19.4/14.0, min/max reward 0.0/66.0


 38%|███▊      | 47804/125000 [38:17<52:21, 24.57it/s]  

finished frames 3824000, mean/median reward 21.8/14.0, min/max reward 2.0/83.0


 38%|███▊      | 47903/125000 [38:21<53:40, 23.94it/s]

finished frames 3832000, mean/median reward 66.4/44.0, min/max reward 7.0/200.0


 38%|███▊      | 48005/125000 [38:26<51:51, 24.74it/s]

finished frames 3840000, mean/median reward 43.4/22.0, min/max reward 1.0/200.0


 38%|███▊      | 48104/125000 [38:30<1:05:42, 19.51it/s]

finished frames 3848000, mean/median reward 26.1/19.0, min/max reward 1.0/110.0


 39%|███▊      | 48203/125000 [38:35<58:43, 21.80it/s]  

finished frames 3856000, mean/median reward 25.6/17.0, min/max reward 0.0/110.0


 39%|███▊      | 48305/125000 [38:40<52:34, 24.31it/s]

finished frames 3864000, mean/median reward 20.1/17.0, min/max reward 1.0/61.0


 39%|███▊      | 48403/125000 [38:44<1:03:58, 19.95it/s]

finished frames 3872000, mean/median reward 17.6/13.0, min/max reward 0.0/57.0


 39%|███▉      | 48505/125000 [38:48<52:00, 24.51it/s]  

finished frames 3880000, mean/median reward 23.4/12.0, min/max reward 0.0/176.0


 39%|███▉      | 48604/125000 [38:53<55:42, 22.86it/s]

finished frames 3888000, mean/median reward 29.6/12.0, min/max reward 1.0/220.0


 39%|███▉      | 48705/125000 [38:58<50:56, 24.97it/s]  

finished frames 3896000, mean/median reward 13.4/8.0, min/max reward 0.0/52.0


 39%|███▉      | 48804/125000 [39:02<53:06, 23.91it/s]

finished frames 3904000, mean/median reward 14.8/8.0, min/max reward 0.0/52.0


 39%|███▉      | 48906/125000 [39:06<49:53, 25.42it/s]

finished frames 3912000, mean/median reward 16.6/9.0, min/max reward 0.0/66.0


 39%|███▉      | 49005/125000 [39:10<50:09, 25.26it/s]

finished frames 3920000, mean/median reward 76.1/31.0, min/max reward 0.0/296.0


 39%|███▉      | 49104/125000 [39:15<55:08, 22.94it/s]

finished frames 3928000, mean/median reward 48.9/14.0, min/max reward 1.0/273.0


 39%|███▉      | 49206/125000 [39:19<50:51, 24.84it/s]

finished frames 3936000, mean/median reward 21.6/12.0, min/max reward 2.0/72.0


 39%|███▉      | 49303/125000 [39:23<1:20:11, 15.73it/s]

finished frames 3944000, mean/median reward 48.5/35.0, min/max reward 0.0/120.0


 40%|███▉      | 49403/125000 [39:29<1:16:35, 16.45it/s]

finished frames 3952000, mean/median reward 32.4/22.0, min/max reward 2.0/107.0


 40%|███▉      | 49503/125000 [39:35<1:19:38, 15.80it/s]

finished frames 3960000, mean/median reward 28.0/8.0, min/max reward 0.0/166.0


 40%|███▉      | 49603/125000 [39:41<1:10:14, 17.89it/s]

finished frames 3968000, mean/median reward 26.7/15.0, min/max reward 1.0/75.0


 40%|███▉      | 49703/125000 [39:47<1:14:15, 16.90it/s]

finished frames 3976000, mean/median reward 54.4/13.0, min/max reward 0.0/186.0


 40%|███▉      | 49803/125000 [39:53<1:15:08, 16.68it/s]

finished frames 3984000, mean/median reward 46.0/26.0, min/max reward 2.0/281.0


 40%|███▉      | 49903/125000 [39:58<1:13:03, 17.13it/s]

finished frames 3992000, mean/median reward 57.0/29.0, min/max reward 0.0/257.0


 40%|████      | 50003/125000 [40:04<1:09:06, 18.09it/s]

finished frames 4000000, mean/median reward 45.1/24.0, min/max reward 0.0/179.0


 40%|████      | 50103/125000 [40:10<1:11:24, 17.48it/s]

finished frames 4008000, mean/median reward 34.7/14.0, min/max reward 0.0/187.0


 40%|████      | 50206/125000 [40:15<50:46, 24.55it/s]  

finished frames 4016000, mean/median reward 28.2/14.0, min/max reward 0.0/187.0


 40%|████      | 50305/125000 [40:19<49:41, 25.06it/s]

finished frames 4024000, mean/median reward 32.6/15.0, min/max reward 0.0/192.0


 40%|████      | 50404/125000 [40:23<50:29, 24.62it/s]

finished frames 4032000, mean/median reward 45.9/27.0, min/max reward 5.0/159.0


 40%|████      | 50506/125000 [40:27<49:05, 25.29it/s]

finished frames 4040000, mean/median reward 79.5/39.0, min/max reward 5.0/284.0


 40%|████      | 50605/125000 [40:31<49:19, 25.14it/s]

finished frames 4048000, mean/median reward 47.3/36.0, min/max reward 6.0/129.0


 41%|████      | 50704/125000 [40:35<48:52, 25.33it/s]

finished frames 4056000, mean/median reward 28.6/16.0, min/max reward 0.0/151.0


 41%|████      | 50806/125000 [40:39<49:47, 24.84it/s]

finished frames 4064000, mean/median reward 25.3/10.0, min/max reward 0.0/111.0


 41%|████      | 50904/125000 [40:44<1:07:46, 18.22it/s]

finished frames 4072000, mean/median reward 28.2/10.0, min/max reward 2.0/148.0


 41%|████      | 51004/125000 [40:49<1:10:02, 17.61it/s]

finished frames 4080000, mean/median reward 17.8/11.0, min/max reward 2.0/43.0


 41%|████      | 51104/125000 [40:55<1:09:40, 17.68it/s]

finished frames 4088000, mean/median reward 29.2/14.0, min/max reward 0.0/182.0


 41%|████      | 51204/125000 [41:00<1:10:31, 17.44it/s]

finished frames 4096000, mean/median reward 14.6/10.0, min/max reward 0.0/97.0


 41%|████      | 51304/125000 [41:06<1:07:01, 18.33it/s]

finished frames 4104000, mean/median reward 22.1/14.0, min/max reward 0.0/97.0


 41%|████      | 51404/125000 [41:12<1:08:45, 17.84it/s]

finished frames 4112000, mean/median reward 28.9/12.0, min/max reward 0.0/141.0


 41%|████      | 51505/125000 [41:16<47:53, 25.58it/s]  

finished frames 4120000, mean/median reward 20.4/16.0, min/max reward 0.0/53.0


 41%|████▏     | 51604/125000 [41:20<48:44, 25.09it/s]

finished frames 4128000, mean/median reward 30.2/13.0, min/max reward 0.0/149.0


 41%|████▏     | 51706/125000 [41:24<48:29, 25.20it/s]

finished frames 4136000, mean/median reward 22.8/4.0, min/max reward 0.0/140.0


 41%|████▏     | 51805/125000 [41:28<49:42, 24.54it/s]

finished frames 4144000, mean/median reward 29.9/4.0, min/max reward 0.0/140.0


 42%|████▏     | 51904/125000 [41:32<48:15, 25.24it/s]

finished frames 4152000, mean/median reward 32.9/15.0, min/max reward 0.0/140.0


 42%|████▏     | 52006/125000 [41:36<49:20, 24.65it/s]

finished frames 4160000, mean/median reward 51.5/14.0, min/max reward 0.0/273.0


 42%|████▏     | 52103/125000 [41:41<1:06:17, 18.33it/s]

finished frames 4168000, mean/median reward 56.2/14.0, min/max reward 0.0/294.0


 42%|████▏     | 52203/125000 [41:47<1:08:08, 17.80it/s]

finished frames 4176000, mean/median reward 19.1/7.0, min/max reward 0.0/59.0


 42%|████▏     | 52303/125000 [41:53<1:06:52, 18.12it/s]

finished frames 4184000, mean/median reward 35.4/30.0, min/max reward 0.0/179.0


 42%|████▏     | 52403/125000 [41:58<1:10:32, 17.15it/s]

finished frames 4192000, mean/median reward 40.8/12.0, min/max reward 1.0/140.0


 42%|████▏     | 52503/125000 [42:04<1:08:48, 17.56it/s]

finished frames 4200000, mean/median reward 10.7/3.0, min/max reward 0.0/87.0


 42%|████▏     | 52606/125000 [42:09<47:58, 25.15it/s]  

finished frames 4208000, mean/median reward 23.2/11.0, min/max reward 0.0/87.0


 42%|████▏     | 52705/125000 [42:13<48:49, 24.68it/s]

finished frames 4216000, mean/median reward 39.2/16.0, min/max reward 0.0/168.0


 42%|████▏     | 52804/125000 [42:17<47:57, 25.09it/s]

finished frames 4224000, mean/median reward 76.1/37.0, min/max reward 2.0/312.0


 42%|████▏     | 52906/125000 [42:21<47:07, 25.50it/s]

finished frames 4232000, mean/median reward 29.7/15.0, min/max reward 2.0/102.0


 42%|████▏     | 53005/125000 [42:25<47:47, 25.11it/s]

finished frames 4240000, mean/median reward 57.0/33.0, min/max reward 0.0/281.0


 42%|████▏     | 53104/125000 [42:29<47:55, 25.01it/s]

finished frames 4248000, mean/median reward 19.9/12.0, min/max reward 0.0/61.0


 43%|████▎     | 53203/125000 [42:33<49:23, 24.22it/s]

finished frames 4256000, mean/median reward 20.7/12.0, min/max reward 0.0/71.0


 43%|████▎     | 53305/125000 [42:38<58:26, 20.44it/s]  

finished frames 4264000, mean/median reward 25.1/10.0, min/max reward 0.0/119.0


 43%|████▎     | 53404/125000 [42:42<49:04, 24.31it/s]

finished frames 4272000, mean/median reward 10.8/5.0, min/max reward 0.0/35.0


 43%|████▎     | 53503/125000 [42:46<51:51, 22.98it/s]

finished frames 4280000, mean/median reward 22.5/7.0, min/max reward 1.0/112.0


 43%|████▎     | 53605/125000 [42:51<49:58, 23.81it/s]

finished frames 4288000, mean/median reward 19.9/15.0, min/max reward 0.0/92.0


 43%|████▎     | 53704/125000 [42:55<52:09, 22.78it/s]

finished frames 4296000, mean/median reward 49.2/19.0, min/max reward 0.0/256.0


 43%|████▎     | 53803/125000 [43:00<49:23, 24.03it/s]

finished frames 4304000, mean/median reward 44.9/13.0, min/max reward 2.0/256.0


 43%|████▎     | 53905/125000 [43:04<53:42, 22.06it/s]

finished frames 4312000, mean/median reward 41.2/19.0, min/max reward 0.0/188.0


 43%|████▎     | 54004/125000 [43:08<47:24, 24.95it/s]

finished frames 4320000, mean/median reward 15.9/8.0, min/max reward 0.0/47.0


 43%|████▎     | 54103/125000 [43:13<52:18, 22.59it/s]

finished frames 4328000, mean/median reward 24.2/10.0, min/max reward 0.0/85.0


 43%|████▎     | 54205/125000 [43:17<47:49, 24.68it/s]

finished frames 4336000, mean/median reward 22.7/7.0, min/max reward 0.0/128.0


 43%|████▎     | 54304/125000 [43:21<54:23, 21.66it/s]

finished frames 4344000, mean/median reward 24.8/15.0, min/max reward 0.0/103.0


 44%|████▎     | 54403/125000 [43:25<47:44, 24.64it/s]

finished frames 4352000, mean/median reward 43.3/26.0, min/max reward 0.0/165.0


 44%|████▎     | 54503/125000 [43:30<1:10:11, 16.74it/s]

finished frames 4360000, mean/median reward 43.2/27.0, min/max reward 5.0/134.0


 44%|████▎     | 54603/125000 [43:36<1:05:43, 17.85it/s]

finished frames 4368000, mean/median reward 43.3/17.0, min/max reward 3.0/215.0


 44%|████▍     | 54704/125000 [43:41<52:13, 22.43it/s]  

finished frames 4376000, mean/median reward 47.4/20.0, min/max reward 0.0/277.0


 44%|████▍     | 54806/125000 [43:45<47:38, 24.56it/s]

finished frames 4384000, mean/median reward 31.8/19.0, min/max reward 0.0/104.0


 44%|████▍     | 54905/125000 [43:50<53:47, 21.72it/s]

finished frames 4392000, mean/median reward 35.8/23.0, min/max reward 1.0/98.0


 44%|████▍     | 55004/125000 [43:54<45:59, 25.37it/s]

finished frames 4400000, mean/median reward 35.6/15.0, min/max reward 1.0/146.0


 44%|████▍     | 55106/125000 [43:58<46:23, 25.11it/s]

finished frames 4408000, mean/median reward 93.6/37.0, min/max reward 1.0/307.0


 44%|████▍     | 55202/125000 [44:02<57:36, 20.19it/s]

finished frames 4416000, mean/median reward 37.6/14.0, min/max reward 0.0/304.0


 44%|████▍     | 55303/125000 [44:07<51:36, 22.51it/s]  

finished frames 4424000, mean/median reward 39.8/20.0, min/max reward 0.0/141.0


 44%|████▍     | 55405/125000 [44:12<48:24, 23.96it/s]

finished frames 4432000, mean/median reward 26.0/17.0, min/max reward 1.0/78.0


 44%|████▍     | 55504/125000 [44:16<47:16, 24.50it/s]

finished frames 4440000, mean/median reward 31.2/7.0, min/max reward 0.0/223.0


 44%|████▍     | 55606/125000 [44:20<47:26, 24.37it/s]

finished frames 4448000, mean/median reward 40.6/9.0, min/max reward 0.0/223.0


 45%|████▍     | 55703/125000 [44:25<1:05:19, 17.68it/s]

finished frames 4456000, mean/median reward 28.0/8.0, min/max reward 1.0/223.0


 45%|████▍     | 55803/125000 [44:29<51:44, 22.29it/s]  

finished frames 4464000, mean/median reward 29.2/8.0, min/max reward 2.0/317.0


 45%|████▍     | 55905/125000 [44:34<46:54, 24.55it/s]

finished frames 4472000, mean/median reward 24.9/11.0, min/max reward 1.0/116.0


 45%|████▍     | 56004/125000 [44:38<48:33, 23.68it/s]

finished frames 4480000, mean/median reward 22.3/7.0, min/max reward 2.0/196.0


 45%|████▍     | 56103/125000 [44:42<48:49, 23.52it/s]

finished frames 4488000, mean/median reward 37.1/5.0, min/max reward 1.0/196.0


 45%|████▍     | 56205/125000 [44:47<46:58, 24.41it/s]

finished frames 4496000, mean/median reward 26.9/15.0, min/max reward 0.0/101.0


 45%|████▌     | 56304/125000 [44:51<46:04, 24.85it/s]

finished frames 4504000, mean/median reward 32.0/25.0, min/max reward 1.0/96.0


 45%|████▌     | 56403/125000 [44:55<46:58, 24.34it/s]

finished frames 4512000, mean/median reward 25.0/27.0, min/max reward 6.0/49.0


 45%|████▌     | 56504/125000 [44:59<1:05:05, 17.54it/s]

finished frames 4520000, mean/median reward 16.3/8.0, min/max reward 0.0/70.0


 45%|████▌     | 56604/125000 [45:04<50:09, 22.73it/s]  

finished frames 4528000, mean/median reward 14.1/8.0, min/max reward 0.0/44.0


 45%|████▌     | 56703/125000 [45:08<46:05, 24.70it/s]

finished frames 4536000, mean/median reward 30.5/18.0, min/max reward 1.0/105.0


 45%|████▌     | 56805/125000 [45:13<50:26, 22.53it/s]

finished frames 4544000, mean/median reward 25.9/19.0, min/max reward 0.0/79.0


 46%|████▌     | 56904/125000 [45:17<46:56, 24.18it/s]

finished frames 4552000, mean/median reward 23.1/10.0, min/max reward 0.0/99.0


 46%|████▌     | 57006/125000 [45:21<44:38, 25.39it/s]

finished frames 4560000, mean/median reward 21.6/9.0, min/max reward 0.0/77.0


 46%|████▌     | 57105/125000 [45:25<53:08, 21.29it/s]

finished frames 4568000, mean/median reward 31.6/13.0, min/max reward 3.0/198.0


 46%|████▌     | 57203/125000 [45:31<50:43, 22.27it/s]  

finished frames 4576000, mean/median reward 66.8/22.0, min/max reward 3.0/287.0


 46%|████▌     | 57305/125000 [45:35<46:45, 24.13it/s]

finished frames 4584000, mean/median reward 45.5/13.0, min/max reward 0.0/314.0


 46%|████▌     | 57404/125000 [45:39<50:54, 22.13it/s]

finished frames 4592000, mean/median reward 68.4/22.0, min/max reward 1.0/314.0


 46%|████▌     | 57503/125000 [45:44<49:40, 22.64it/s]

finished frames 4600000, mean/median reward 18.2/5.0, min/max reward 1.0/71.0


 46%|████▌     | 57605/125000 [45:48<51:54, 21.64it/s]

finished frames 4608000, mean/median reward 47.6/7.0, min/max reward 1.0/344.0


 46%|████▌     | 57704/125000 [45:53<47:01, 23.85it/s]

finished frames 4616000, mean/median reward 8.4/5.0, min/max reward 0.0/25.0


 46%|████▌     | 57803/125000 [45:57<49:05, 22.81it/s]

finished frames 4624000, mean/median reward 10.5/7.0, min/max reward 4.0/35.0


 46%|████▋     | 57905/125000 [46:01<44:29, 25.14it/s]

finished frames 4632000, mean/median reward 42.8/11.0, min/max reward 3.0/288.0


 46%|████▋     | 58004/125000 [46:05<50:14, 22.23it/s]

finished frames 4640000, mean/median reward 42.9/19.0, min/max reward 1.0/269.0


 46%|████▋     | 58103/125000 [46:09<43:45, 25.48it/s]

finished frames 4648000, mean/median reward 40.3/8.0, min/max reward 1.0/251.0


 47%|████▋     | 58205/125000 [46:14<47:31, 23.42it/s]

finished frames 4656000, mean/median reward 54.6/29.0, min/max reward 0.0/251.0


 47%|████▋     | 58304/125000 [46:18<44:07, 25.19it/s]

finished frames 4664000, mean/median reward 26.9/19.0, min/max reward 0.0/100.0


 47%|████▋     | 58403/125000 [46:22<44:50, 24.75it/s]

finished frames 4672000, mean/median reward 30.8/17.0, min/max reward 0.0/130.0


 47%|████▋     | 58502/125000 [46:26<51:33, 21.49it/s]

finished frames 4680000, mean/median reward 56.8/18.0, min/max reward 3.0/294.0


 47%|████▋     | 58603/125000 [46:32<48:38, 22.75it/s]  

finished frames 4688000, mean/median reward 37.8/27.0, min/max reward 3.0/161.0


 47%|████▋     | 58705/125000 [46:36<45:15, 24.41it/s]

finished frames 4696000, mean/median reward 50.8/18.0, min/max reward 1.0/221.0


 47%|████▋     | 58804/125000 [46:40<45:15, 24.37it/s]

finished frames 4704000, mean/median reward 50.9/21.0, min/max reward 1.0/221.0


 47%|████▋     | 58906/125000 [46:44<44:34, 24.72it/s]

finished frames 4712000, mean/median reward 44.2/13.0, min/max reward 1.0/221.0


 47%|████▋     | 59004/125000 [46:49<1:03:07, 17.43it/s]

finished frames 4720000, mean/median reward 23.1/11.0, min/max reward 0.0/106.0


 47%|████▋     | 59105/125000 [46:54<48:31, 22.63it/s]  

finished frames 4728000, mean/median reward 18.0/8.0, min/max reward 3.0/109.0


 47%|████▋     | 59204/125000 [46:58<45:40, 24.01it/s]

finished frames 4736000, mean/median reward 16.6/9.0, min/max reward 0.0/73.0


 47%|████▋     | 59306/125000 [47:02<46:39, 23.47it/s]

finished frames 4744000, mean/median reward 33.8/21.0, min/max reward 1.0/160.0


 48%|████▊     | 59405/125000 [47:06<47:38, 22.95it/s]

finished frames 4752000, mean/median reward 41.0/23.0, min/max reward 0.0/160.0


 48%|████▊     | 59504/125000 [47:11<48:35, 22.47it/s]

finished frames 4760000, mean/median reward 45.0/12.0, min/max reward 4.0/314.0


 48%|████▊     | 59603/125000 [47:15<50:12, 21.71it/s]

finished frames 4768000, mean/median reward 57.5/12.0, min/max reward 4.0/314.0


 48%|████▊     | 59703/125000 [47:21<1:11:43, 15.17it/s]

finished frames 4776000, mean/median reward 25.2/9.0, min/max reward 2.0/122.0


 48%|████▊     | 59804/125000 [47:27<52:59, 20.50it/s]  

finished frames 4784000, mean/median reward 24.8/13.0, min/max reward 0.0/70.0


 48%|████▊     | 59903/125000 [47:31<47:37, 22.78it/s]

finished frames 4792000, mean/median reward 38.2/16.0, min/max reward 2.0/242.0


 48%|████▊     | 60005/125000 [47:36<42:34, 25.45it/s]

finished frames 4800000, mean/median reward 26.8/11.0, min/max reward 1.0/98.0


 48%|████▊     | 60104/125000 [47:40<44:05, 24.54it/s]

finished frames 4808000, mean/median reward 28.7/16.0, min/max reward 0.0/102.0


 48%|████▊     | 60185/125000 [47:43<42:29, 25.42it/s]