default- 5000, batch size=32 wins=~4%

In [1]:
import sys
import os
import random
import numpy as np
from collections import deque
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import Sequential, clone_model
from tensorflow.keras.losses import MSE
from tensorflow.keras.optimizers import Adam
from matplotlib import pyplot as plt
import tensorflow as tf
import datetime
from statistics import mean
from tqdm import tqdm
# use tf2
# https://github.com/VXU1230/Medium-Tutorials/blob/master/dqn/cart_pole.py
# https://towardsdatascience.com/deep-reinforcement-learning-build-a-deep-q-network-dqn-to-play-cartpole-with-tensorflow-2-and-gym-8e105744b998

sys.path.append(os.getcwd())

from wumpusworld.envs.WumpusGym import WumpusWorldEnv
from BeelineAgent import Agent, Action

WORLD_SIZE= 4


class MyModel(tf.keras.Model):
    def __init__(self, num_states, hidden_units, num_actions):
        super(MyModel, self).__init__()
        self.input_layer = tf.keras.layers.InputLayer(input_shape=(num_states,))
        self.hidden_layers = []
        for i in hidden_units:
            self.hidden_layers.append(tf.keras.layers.Dense(
                i, activation='tanh', kernel_initializer='RandomNormal'))
        self.output_layer = tf.keras.layers.Dense(
            num_actions, activation='linear', kernel_initializer='RandomNormal')

    @tf.function
    def call(self, inputs):
        z = self.input_layer(inputs)
        for layer in self.hidden_layers:
            z = layer(z)
        output = self.output_layer(z)
        return output


class DQN:
    def __init__(self, num_states, num_actions, hidden_units, gamma, max_experiences, min_experiences, batch_size, lr):
        self.num_actions = num_actions
        self.batch_size = batch_size
        self.optimizer = Adam(lr)
        self.gamma = gamma
        self.model = MyModel(num_states, hidden_units, num_actions)
        self.experience = {'s': [], 'a': [], 'r': [], 's2': [], 'done': []}
        self.max_experiences = max_experiences
        self.min_experiences = min_experiences

    def get_action(self, states, epsilon):
        if np.random.random() < epsilon:
            return np.random.choice(self.num_actions)
        else:
            return np.argmax(self.predict(np.atleast_2d(states))[0])

    def predict(self, inputs):
        return self.model(np.atleast_2d(inputs.astype('float32')))

    def add_experience(self, exp):
        if len(self.experience['s']) >= self.max_experiences:
            for key in self.experience.keys():
                self.experience[key].pop(0)
        for key, value in exp.items():
            self.experience[key].append(value)
        i = 1

    def copy_weights(self, TrainNet):
        variables1 = self.model.trainable_variables
        variables2 = TrainNet.model.trainable_variables
        for v1, v2 in zip(variables1, variables2):
            v1.assign(v2.numpy())

    def train(self, TargetNet, isdone):
        if isdone:
            pass
        elif len(self.experience['s']) < self.min_experiences:
            return 0

        # ids = np.random.randint(low=0, high=len(self.experience['s']), size=self.batch_size)
        ids = range(0, len(self.experience['s']))
        states = np.asarray([self.experience['s'][i] for i in ids])
        actions = np.asarray([self.experience['a'][i] for i in ids])
        rewards = np.asarray([self.experience['r'][i] for i in ids])
        states_next = np.asarray([self.experience['s2'][i] for i in ids])
        dones = np.asarray([self.experience['done'][i] for i in ids])
        value_next = np.max(TargetNet.predict(states_next), axis=-1)
        actual_values = np.where(dones, rewards, rewards + self.gamma * value_next.squeeze())

        with tf.GradientTape() as tape:
            selected_action_values = tf.math.reduce_sum(
                self.predict(states) * tf.one_hot(actions, self.num_actions), axis=-1)
            loss = tf.math.reduce_mean(tf.square(actual_values - selected_action_values))
        variables = self.model.trainable_variables
        gradients = tape.gradient(loss, variables)
        self.optimizer.apply_gradients(zip(gradients, variables))
        # if isinstance(loss, int):
        #     print(loss)
        # else:
        #     print(loss.numpy())

        return loss

    def train_onend(self, TargetNet):

        ids = np.random.randint(low=0, high=len(self.experience['s']), size=self.batch_size)
        states = np.asarray([self.experience['s'][i] for i in ids])
        actions = np.asarray([self.experience['a'][i] for i in ids])
        rewards = np.asarray([self.experience['r'][i] for i in ids])
        states_next = np.asarray([self.experience['s2'][i] for i in ids])
        dones = np.asarray([self.experience['done'][i] for i in ids])
        value_next = np.max(TargetNet.predict(states_next), axis=-1)
        actual_values = np.where(dones, rewards, rewards + self.gamma * value_next.squeeze())

        with tf.GradientTape() as tape:
            selected_action_values = tf.math.reduce_sum(
                self.predict(states) * tf.one_hot(actions, self.num_actions), axis=-1)
            loss = tf.math.reduce_mean(tf.square(actual_values - selected_action_values))
        variables = self.model.trainable_variables
        gradients = tape.gradient(loss, variables)
        self.optimizer.apply_gradients(zip(gradients, variables))
        return loss

l1 = 72
l2 = 150
l3 = 100
l4 = 6

loss_fn = MSE
learning_rate = 1e-3
optmizer = None


def action_to_string(action):
    """ action_to_string: return a string from the given action """
    if action == Action.WALK:
        return "WALK"
    if action == Action.TURNRIGHT:
        return "TURNRIGHT"
    if action == Action.TURNLEFT:
        return "TURNLEFT"
    if action == Action.SHOOT:
        return "SHOOT"
    if action == Action.GRAB:
        return "GRAB"
    if action == Action.CLIMB:
        return "CLIMB"
    return "UNKNOWN ACTION"

#
# def buildnetwork():
#     m = Sequential(
#         [
#             Input(shape=(l1,), name="layer1"),
#             Dense(l2, activation="relu", name="layer2"),
#             Dense(l3, activation="relu", name="layer3"),
#             Dense(l4, name="layer4"),
#         ]
#     )
#     m.compile(optimizer=Adam(learning_rate=learning_rate), loss=MSE)
#
#     return m


# def getoptimizer():
#     return Adam(learning_rate=learning_rate)
#
#
# def getQmodels():
#     qmodel = buildnetwork()
#     # targetmodel.set_weights(qmodel.get_weights())
#     targetmodel = clone_model(qmodel)
#     targetmodel.set_weights(qmodel.get_weights())
#     opt = getoptimizer()
#     return qmodel, targetmodel, opt


def play_game(agent, env, TrainNet, TargetNet, epsilon, copy_step):
    rewards = 0
    iter = 0
    done = False
    observations = env.reset()
    losses = list()
    gameswon = 0
    steps = 0
    while not done:
        state = agent.processPercepts(WORLD_SIZE, observations)
        # print(state[0:16])
        # print(state[16:32])
        # print(state[32:48])
        # print(state[48:64])
        # print(state[64:])
        action = TrainNet.get_action(state, epsilon)
        steps+=1
        # print(action_to_string(action))
        prev_observations = state
        observations, reward, done = env.step(action)

        state2 = agent.processPercepts(WORLD_SIZE, observations)

        rewards += int(reward)
        if done:
           env.reset()

        exp = {'s': prev_observations, 'a': action, 'r': int(reward), 's2': state2, 'done': done}
        TrainNet.add_experience(exp)

    loss = TrainNet.train(TargetNet, done)
    if isinstance(loss, int):
        losses.append(loss)
    else:
        losses.append(loss.numpy())
    iter += 1
    if iter % copy_step == 0:
        TargetNet.copy_weights(TrainNet)

    if int(reward) > 0:
        gameswon += 1

    print("steps=", steps)
    return rewards, mean(losses), gameswon


def test_model(model, epsilon):
    i = 0
    test_game = WumpusWorldEnv()
    agent = Agent()
    observations =test_game.reset()

    status = 1
    while status == 1:  # A
        state = agent.processPercepts(WORLD_SIZE, observations)
        action = model.get_action(state, epsilon)

        observations, reward, done = test_game.step(action)

        reward = int(reward)

        if reward != -1:
            if reward > 0:
                status = 2
            else:
                status = 0
        i += 1
        if i > 15:
            break

    win = True if status == 2 else False
    return win


def test(tnet, epsilon):
    max_games = 1000
    wins = 0
    for i in range(max_games):
        win = test_model(tnet, epsilon)
        if win:
            wins += 1
    win_perc = float(wins) / float(max_games)
    print("Games played: {0}, # of wins: {1}".format(max_games, wins))
    print("Win percentage: {}%".format(100.0 * win_perc))


def playgame():
    env = WumpusWorldEnv()
    env.render()
    agent = Agent()
    gamma = 0.99
    copy_step = 25
    num_states = l1
    num_actions = len(env.action_space)
    hidden_units = [200, 200]
    max_experiences = 32
    min_experiences = 32
    batch_size = 32
    lr = 1e-2
    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    log_dir = 'logs/dqn/' + current_time
    summary_writer = tf.summary.create_file_writer(log_dir)

    TrainNet = DQN(num_states, num_actions, hidden_units, gamma, max_experiences, min_experiences, batch_size, lr)
    TargetNet = DQN(num_states, num_actions, hidden_units, gamma, max_experiences, min_experiences, batch_size, lr)
    N = 5000
    total_rewards = np.empty(N)
    epsilon = 0.99
    decay = 0.9999
    min_epsilon = 0.1
    sumgameswon = 0
    for n in tqdm(range(N)):
        epsilon = max(min_epsilon, epsilon * decay)
        total_reward, losses, gameswon = play_game(agent, env, TrainNet, TargetNet, epsilon, copy_step)
        sumgameswon+= gameswon
        total_rewards[n] = total_reward
        avg_rewards = total_rewards[max(0, n - 100):(n + 1)].mean()
        with summary_writer.as_default():
            tf.summary.scalar('episode reward', total_reward, step=n)
            tf.summary.scalar('running avg reward(100)', avg_rewards, step=n)
            tf.summary.scalar('average loss)', losses, step=n)
        if n % 100 == 0:
                print("episode:", n, "episode reward:", total_reward, "eps:", epsilon, "avg reward (last 100):", avg_rewards,
                      "episode loss: ", losses)
                print("avg reward for last 100 episodes:", avg_rewards)

    print("gameswon=", sumgameswon)


In [2]:
playgame()

  0%|          | 0/5000 [00:00<?, ?it/s]

+---+---+---+---+
| P | P |   |W  |
|   |   |   |   |
+---+---+---+---+
|   |   |   |  G|
|   |   |   |   |
+---+---+---+---+
|   |   |   |   |
|   |   |   |   |
+---+---+---+---+
|   |   |   |   |
| A>|   |   |   |
+---+---+---+---+
Score : 0


  0%|          | 2/5000 [00:00<34:24,  2.42it/s]

steps= 210
episode: 0 episode reward: -1218 eps: 0.989901 avg reward (last 100): -1218.0 episode loss:  31254.824
avg reward for last 100 episodes: -1218.0
steps= 155
steps= 170


  0%|          | 6/5000 [00:00<19:49,  4.20it/s]

steps= 145
steps= 34
steps= 153
steps= 24


  0%|          | 8/5000 [00:01<16:36,  5.01it/s]

steps= 146


  0%|          | 10/5000 [00:01<16:06,  5.17it/s]

steps= 283
steps= 103


  0%|          | 13/5000 [00:01<11:54,  6.98it/s]

steps= 81
steps= 68
steps= 41
steps= 15


  0%|          | 17/5000 [00:02<09:23,  8.84it/s]

steps= 86
steps= 30
steps= 266


  0%|          | 21/5000 [00:02<07:28, 11.10it/s]

steps= 170
steps= 133
steps= 133
steps= 5


  0%|          | 25/5000 [00:02<06:34, 12.60it/s]

steps= 120
steps= 64
steps= 9
steps= 61


  1%|          | 27/5000 [00:03<08:13, 10.07it/s]

steps= 257
steps= 88


  1%|          | 29/5000 [00:03<09:11,  9.02it/s]

steps= 366
steps= 255
steps= 55


  1%|          | 32/5000 [00:03<10:23,  7.97it/s]

steps= 323
steps= 229


  1%|          | 34/5000 [00:03<09:44,  8.50it/s]

steps= 74
steps= 59


  1%|          | 36/5000 [00:04<10:49,  7.64it/s]

steps= 113
steps= 247


  1%|          | 38/5000 [00:04<11:43,  7.06it/s]

steps= 108
steps= 247
steps= 71


  1%|          | 40/5000 [00:04<09:28,  8.72it/s]

steps= 39
steps= 245


  1%|          | 44/5000 [00:05<10:15,  8.05it/s]

steps= 344
steps= 17
steps= 152


  1%|          | 46/5000 [00:05<09:43,  8.50it/s]

steps= 88
steps= 40


  1%|          | 48/5000 [00:05<09:51,  8.37it/s]

steps= 181
steps= 45
steps= 52


  1%|          | 51/5000 [00:06<12:13,  6.75it/s]

steps= 427
steps= 298


  1%|          | 53/5000 [00:06<10:14,  8.05it/s]

steps= 97
steps= 124
steps= 115


  1%|          | 57/5000 [00:06<07:47, 10.58it/s]

steps= 169
steps= 130
steps= 70
steps= 123


  1%|          | 59/5000 [00:06<07:35, 10.84it/s]

steps= 98
steps= 177


  1%|          | 62/5000 [00:07<12:39,  6.50it/s]

steps= 175
steps= 197


  1%|▏         | 64/5000 [00:07<12:01,  6.84it/s]

steps= 129
steps= 246


  1%|▏         | 66/5000 [00:07<10:52,  7.56it/s]

steps= 34
steps= 148


  1%|▏         | 68/5000 [00:08<10:27,  7.86it/s]

steps= 62
steps= 117
steps= 14


  1%|▏         | 71/5000 [00:08<10:14,  8.02it/s]

steps= 301
steps= 74


  1%|▏         | 72/5000 [00:08<10:35,  7.76it/s]

steps= 126
steps= 58
steps= 61


  2%|▏         | 77/5000 [00:09<08:10, 10.04it/s]

steps= 234
steps= 148
steps= 27
steps= 62


  2%|▏         | 81/5000 [00:09<06:53, 11.90it/s]

steps= 82
steps= 205
steps= 98


  2%|▏         | 84/5000 [00:09<06:19, 12.96it/s]

steps= 64
steps= 40
steps= 191
steps= 42


  2%|▏         | 86/5000 [00:09<09:35,  8.54it/s]

steps= 448
steps= 164


  2%|▏         | 88/5000 [00:10<10:34,  7.75it/s]

steps= 160
steps= 50


  2%|▏         | 91/5000 [00:10<11:49,  6.92it/s]

steps= 244
steps= 174


  2%|▏         | 93/5000 [00:10<10:52,  7.52it/s]

steps= 55
steps= 116
steps= 22


  2%|▏         | 97/5000 [00:11<08:36,  9.49it/s]

steps= 112
steps= 6
steps= 98


  2%|▏         | 99/5000 [00:11<09:30,  8.60it/s]

steps= 174
steps= 212


  2%|▏         | 101/5000 [00:11<10:51,  7.52it/s]

steps= 204
steps= 53
episode: 100 episode reward: -1061 eps: 0.980050830419928 avg reward (last 100): -1022.2277227722773 episode loss:  24025.24
avg reward for last 100 episodes: -1022.2277227722773


  2%|▏         | 103/5000 [00:12<11:13,  7.27it/s]

steps= 188
steps= 144


  2%|▏         | 105/5000 [00:12<11:51,  6.88it/s]

steps= 137
steps= 141


  2%|▏         | 107/5000 [00:12<11:49,  6.89it/s]

steps= 206
steps= 171


  2%|▏         | 110/5000 [00:13<11:25,  7.13it/s]

steps= 343
steps= 12
steps= 235


  2%|▏         | 113/5000 [00:13<09:33,  8.52it/s]

steps= 101
steps= 68
steps= 53


  2%|▏         | 115/5000 [00:13<11:20,  7.18it/s]

steps= 204
steps= 158


  2%|▏         | 116/5000 [00:14<17:31,  4.64it/s]

steps= 345
steps= 22


  2%|▏         | 118/5000 [00:14<15:24,  5.28it/s]

steps= 168
steps= 47


  2%|▏         | 120/5000 [00:14<14:35,  5.58it/s]

steps= 211
steps= 67


  2%|▏         | 123/5000 [00:15<11:28,  7.08it/s]

steps= 127
steps= 99


  2%|▎         | 125/5000 [00:15<10:40,  7.61it/s]

steps= 107
steps= 94


  3%|▎         | 128/5000 [00:15<10:10,  7.99it/s]

steps= 220
steps= 39
steps= 20


  3%|▎         | 130/5000 [00:15<09:54,  8.19it/s]

steps= 97
steps= 78
steps= 23


  3%|▎         | 132/5000 [00:15<08:25,  9.63it/s]

steps= 64
steps= 130


  3%|▎         | 134/5000 [00:16<09:35,  8.45it/s]

steps= 135
steps= 36


  3%|▎         | 136/5000 [00:16<09:46,  8.30it/s]

steps= 96


  3%|▎         | 139/5000 [00:17<12:40,  6.39it/s]

steps= 374
steps= 10
steps= 120


  3%|▎         | 141/5000 [00:17<11:36,  6.97it/s]

steps= 100
steps= 159


  3%|▎         | 143/5000 [00:17<12:48,  6.32it/s]

steps= 175
steps= 117


  3%|▎         | 146/5000 [00:17<10:18,  7.84it/s]

steps= 132
steps= 22
steps= 94


  3%|▎         | 149/5000 [00:18<11:51,  6.82it/s]

steps= 379
steps= 147
steps= 73


  3%|▎         | 154/5000 [00:18<07:53, 10.23it/s]

steps= 96
steps= 143
steps= 72
steps= 9
steps= 43


  3%|▎         | 158/5000 [00:18<07:01, 11.48it/s]

steps= 254
steps= 20
steps= 105
steps= 149


  3%|▎         | 160/5000 [00:19<07:11, 11.22it/s]

steps= 268
steps= 64
steps= 46


  3%|▎         | 165/5000 [00:19<06:53, 11.69it/s]

steps= 338
steps= 26
steps= 35
steps= 79


  3%|▎         | 167/5000 [00:19<08:19,  9.67it/s]

steps= 350
steps= 35
steps= 256


  3%|▎         | 169/5000 [00:20<11:26,  7.03it/s]

steps= 232


  3%|▎         | 172/5000 [00:20<13:38,  5.90it/s]

steps= 561
steps= 136
steps= 47


  3%|▎         | 174/5000 [00:21<12:39,  6.35it/s]

steps= 315
steps= 65
steps= 99


  4%|▎         | 178/5000 [00:21<10:49,  7.42it/s]

steps= 174
steps= 12
steps= 126


  4%|▎         | 180/5000 [00:21<12:11,  6.59it/s]

steps= 303
steps= 213


  4%|▎         | 181/5000 [00:22<12:27,  6.45it/s]

steps= 175
steps= 34
steps= 79


  4%|▎         | 185/5000 [00:22<10:23,  7.73it/s]

steps= 321
steps= 115
steps= 128


  4%|▎         | 187/5000 [00:22<09:08,  8.77it/s]

steps= 93


  4%|▍         | 190/5000 [00:23<10:45,  7.45it/s]

steps= 222
steps= 38
steps= 85


  4%|▍         | 192/5000 [00:23<10:06,  7.92it/s]

steps= 37
steps= 81


  4%|▍         | 193/5000 [00:23<11:02,  7.26it/s]

steps= 292
steps= 149


  4%|▍         | 197/5000 [00:23<09:09,  8.74it/s]

steps= 84
steps= 27
steps= 115


  4%|▍         | 199/5000 [00:24<09:13,  8.67it/s]

steps= 167
steps= 84
steps= 171


  4%|▍         | 201/5000 [00:24<10:24,  7.69it/s]

steps= 198
episode: 200 episode reward: -1206 eps: 0.9702986765411791 avg reward (last 100): -1005.009900990099 episode loss:  29372.96
avg reward for last 100 episodes: -1005.009900990099


  4%|▍         | 202/5000 [00:24<12:15,  6.53it/s]

steps= 401
steps= 61


  4%|▍         | 205/5000 [00:24<10:32,  7.58it/s]

steps= 219
steps= 52
steps= 13


  4%|▍         | 208/5000 [00:25<12:29,  6.39it/s]

steps= 382
steps= 143


  4%|▍         | 210/5000 [00:25<13:25,  5.95it/s]

steps= 132
steps= 143


  4%|▍         | 212/5000 [00:26<13:31,  5.90it/s]

steps= 5
steps= 356


  4%|▍         | 214/5000 [00:26<14:20,  5.56it/s]

steps= 257
steps= 48


  4%|▍         | 216/5000 [00:26<12:29,  6.38it/s]

steps= 51
steps= 84
steps= 7


  4%|▍         | 218/5000 [00:27<11:20,  7.02it/s]

steps= 177


  4%|▍         | 221/5000 [00:27<16:50,  4.73it/s]

steps= 488
steps= 70
steps= 99
steps= 4


  4%|▍         | 225/5000 [00:28<12:13,  6.51it/s]

steps= 205
steps= 52
steps= 64


  5%|▍         | 228/5000 [00:28<10:35,  7.51it/s]

steps= 159
steps= 20
steps= 37


  5%|▍         | 230/5000 [00:28<12:00,  6.62it/s]

steps= 322
steps= 69


  5%|▍         | 231/5000 [00:29<12:30,  6.35it/s]

steps= 161
steps= 96
steps= 29


  5%|▍         | 236/5000 [00:29<09:40,  8.21it/s]

steps= 114
steps= 53
steps= 85


  5%|▍         | 238/5000 [00:29<11:06,  7.15it/s]

steps= 98
steps= 124


  5%|▍         | 240/5000 [00:29<09:36,  8.26it/s]

steps= 83
steps= 40
steps= 42
steps= 6


  5%|▍         | 245/5000 [00:30<07:49, 10.12it/s]

steps= 199
steps= 21
steps= 55
steps= 84


  5%|▍         | 249/5000 [00:30<09:20,  8.47it/s]

steps= 236
steps= 22
steps= 124


  5%|▌         | 251/5000 [00:31<11:22,  6.96it/s]

steps= 75
steps= 140


  5%|▌         | 252/5000 [00:31<13:02,  6.06it/s]

steps= 137


  5%|▌         | 253/5000 [00:31<15:58,  4.95it/s]

steps= 140


  5%|▌         | 255/5000 [00:32<13:55,  5.68it/s]

steps= 201
steps= 100
steps= 140


  5%|▌         | 257/5000 [00:32<12:44,  6.20it/s]

steps= 298
steps= 194


  5%|▌         | 259/5000 [00:32<14:56,  5.29it/s]

steps= 162
steps= 66


  5%|▌         | 261/5000 [00:32<13:08,  6.01it/s]

steps= 91
steps= 58


  5%|▌         | 263/5000 [00:33<11:51,  6.65it/s]

steps= 122


  5%|▌         | 264/5000 [00:33<15:15,  5.17it/s]

steps= 339
steps= 43


  5%|▌         | 267/5000 [00:33<12:35,  6.26it/s]

steps= 168
steps= 92
steps= 47


  5%|▌         | 269/5000 [00:33<10:03,  7.84it/s]

steps= 116
steps= 143


  5%|▌         | 271/5000 [00:34<10:33,  7.46it/s]

steps= 136


  5%|▌         | 272/5000 [00:34<13:15,  5.94it/s]

steps= 237
steps= 41


  6%|▌         | 277/5000 [00:35<12:22,  6.36it/s]

steps= 848
steps= 45
steps= 23
steps= 62


  6%|▌         | 279/5000 [00:35<12:02,  6.53it/s]

steps= 263
steps= 97


  6%|▌         | 281/5000 [00:35<10:28,  7.51it/s]

steps= 217
steps= 48


  6%|▌         | 285/5000 [00:36<09:00,  8.73it/s]

steps= 397
steps= 54
steps= 76
steps= 148


  6%|▌         | 287/5000 [00:36<08:10,  9.61it/s]

steps= 35
steps= 167
steps= 67


  6%|▌         | 292/5000 [00:36<06:15, 12.53it/s]

steps= 35
steps= 48
steps= 37
steps= 125


  6%|▌         | 294/5000 [00:36<06:34, 11.92it/s]

steps= 88
steps= 46


  6%|▌         | 296/5000 [00:37<08:34,  9.15it/s]

steps= 399
steps= 211
steps= 228


  6%|▌         | 298/5000 [00:37<12:16,  6.38it/s]

steps= 326
steps= 41


  6%|▌         | 301/5000 [00:37<11:26,  6.85it/s]

steps= 167
steps= 179
episode: 300 episode reward: -1187 eps: 0.960643563042708 avg reward (last 100): -1064.8712871287128 episode loss:  27452.986
avg reward for last 100 episodes: -1064.8712871287128


  6%|▌         | 303/5000 [00:38<11:41,  6.70it/s]

steps= 130
steps= 151


  6%|▌         | 304/5000 [00:38<12:48,  6.11it/s]

steps= 164
steps= 18


  6%|▌         | 306/5000 [00:38<12:02,  6.50it/s]

steps= 215
steps= 51


  6%|▌         | 310/5000 [00:39<10:01,  7.79it/s]

steps= 247
steps= 60
steps= 45


  6%|▋         | 313/5000 [00:39<09:05,  8.60it/s]

steps= 157
steps= 29
steps= 117


  6%|▋         | 315/5000 [00:39<08:05,  9.65it/s]

steps= 47
steps= 63


  6%|▋         | 317/5000 [00:40<09:46,  7.98it/s]

steps= 234
steps= 80


  6%|▋         | 318/5000 [00:40<14:18,  5.46it/s]

steps= 319


  6%|▋         | 320/5000 [00:40<14:24,  5.41it/s]

steps= 253
steps= 147


  6%|▋         | 322/5000 [00:40<12:54,  6.04it/s]

steps= 70
steps= 118


  6%|▋         | 325/5000 [00:41<10:04,  7.74it/s]

steps= 101
steps= 25
steps= 73
steps= 45


  7%|▋         | 329/5000 [00:41<09:40,  8.05it/s]

steps= 237
steps= 24
steps= 119


  7%|▋         | 330/5000 [00:41<10:42,  7.26it/s]

steps= 116
steps= 50


  7%|▋         | 333/5000 [00:42<11:07,  6.99it/s]

steps= 255
steps= 129
steps= 88


  7%|▋         | 335/5000 [00:42<09:01,  8.61it/s]

steps= 52


  7%|▋         | 338/5000 [00:42<10:34,  7.35it/s]

steps= 532
steps= 40
steps= 259


  7%|▋         | 340/5000 [00:43<09:01,  8.60it/s]

steps= 86
steps= 91
steps= 176


  7%|▋         | 342/5000 [00:43<08:08,  9.54it/s]

steps= 83


  7%|▋         | 344/5000 [00:43<09:27,  8.21it/s]

steps= 354
steps= 124
steps= 122


  7%|▋         | 347/5000 [00:43<09:14,  8.39it/s]

steps= 96
steps= 245


  7%|▋         | 350/5000 [00:44<08:33,  9.05it/s]

steps= 36
steps= 36
steps= 168


  7%|▋         | 352/5000 [00:44<08:44,  8.86it/s]

steps= 50
steps= 336


  7%|▋         | 354/5000 [00:44<09:03,  8.54it/s]

steps= 53
steps= 181


  7%|▋         | 356/5000 [00:44<07:37, 10.16it/s]

steps= 56
steps= 35
steps= 210


  7%|▋         | 360/5000 [00:45<09:11,  8.41it/s]

steps= 135
steps= 34
steps= 70


  7%|▋         | 362/5000 [00:45<09:21,  8.26it/s]

steps= 175
steps= 106
steps= 28


  7%|▋         | 366/5000 [00:46<09:03,  8.53it/s]

steps= 354
steps= 43
steps= 66


  7%|▋         | 368/5000 [00:46<09:28,  8.15it/s]

steps= 60
steps= 205


  7%|▋         | 370/5000 [00:46<09:09,  8.43it/s]

steps= 15
steps= 70


  7%|▋         | 371/5000 [00:46<11:48,  6.53it/s]

steps= 219


  7%|▋         | 372/5000 [00:47<15:50,  4.87it/s]

steps= 352
steps= 45


  8%|▊         | 375/5000 [00:47<13:29,  5.71it/s]

steps= 131
steps= 257


  8%|▊         | 377/5000 [00:48<14:47,  5.21it/s]

steps= 218
steps= 148


  8%|▊         | 380/5000 [00:48<13:10,  5.84it/s]

steps= 234
steps= 47
steps= 106


  8%|▊         | 383/5000 [00:48<11:25,  6.74it/s]

steps= 124
steps= 22
steps= 94


  8%|▊         | 385/5000 [00:49<10:34,  7.27it/s]

steps= 63
steps= 50


  8%|▊         | 386/5000 [00:49<10:14,  7.51it/s]

steps= 79


  8%|▊         | 387/5000 [00:49<21:09,  3.63it/s]

steps= 574
steps= 31


  8%|▊         | 390/5000 [00:50<16:23,  4.69it/s]

steps= 118
steps= 210


  8%|▊         | 392/5000 [00:50<13:19,  5.76it/s]

steps= 127
steps= 144
steps= 32


  8%|▊         | 394/5000 [00:50<14:04,  5.46it/s]

steps= 355
steps= 42


  8%|▊         | 397/5000 [00:51<12:08,  6.32it/s]

steps= 114
steps= 86


  8%|▊         | 398/5000 [00:51<13:30,  5.68it/s]

steps= 235
steps= 57
steps= 136


  8%|▊         | 402/5000 [00:51<09:36,  7.98it/s]

steps= 48
episode: 400 episode reward: -1056 eps: 0.9510845243085565 avg reward (last 100): -1081.6237623762377 episode loss:  21364.5
avg reward for last 100 episodes: -1081.6237623762377
steps= 44
steps= 77


  8%|▊         | 405/5000 [00:52<10:40,  7.17it/s]

steps= 128
steps= 157


  8%|▊         | 408/5000 [00:52<09:03,  8.44it/s]

steps= 17
steps= 12
steps= 72


  8%|▊         | 410/5000 [00:52<08:44,  8.76it/s]

steps= 20
steps= 120


  8%|▊         | 412/5000 [00:52<07:24, 10.32it/s]

steps= 110
steps= 95
steps= 33


  8%|▊         | 414/5000 [00:53<09:44,  7.85it/s]

steps= 321
steps= 106


  8%|▊         | 417/5000 [00:53<09:52,  7.74it/s]

steps= 228
steps= 108
steps= 72


  8%|▊         | 421/5000 [00:53<07:51,  9.72it/s]

steps= 213
steps= 60
steps= 75


  8%|▊         | 423/5000 [00:54<09:07,  8.36it/s]

steps= 361
steps= 94


  9%|▊         | 426/5000 [00:54<08:43,  8.74it/s]

steps= 222
steps= 69
steps= 129


  9%|▊         | 428/5000 [00:54<09:07,  8.35it/s]

steps= 189
steps= 77
steps= 120


  9%|▊         | 431/5000 [00:55<09:06,  8.36it/s]

steps= 321
steps= 158
steps= 22


  9%|▊         | 435/5000 [00:55<07:07, 10.68it/s]

steps= 112
steps= 126
steps= 62
steps= 33


  9%|▊         | 437/5000 [00:55<06:16, 12.11it/s]

steps= 41
steps= 58


  9%|▉         | 442/5000 [00:56<06:32, 11.62it/s]

steps= 317
steps= 26
steps= 10
steps= 86


  9%|▉         | 444/5000 [00:56<08:40,  8.75it/s]

steps= 206
steps= 140


  9%|▉         | 446/5000 [00:56<09:16,  8.18it/s]

steps= 171
steps= 65
steps= 19


  9%|▉         | 448/5000 [00:56<08:42,  8.72it/s]

steps= 112


  9%|▉         | 451/5000 [00:57<09:49,  7.72it/s]

steps= 319
steps= 40
steps= 255


  9%|▉         | 453/5000 [00:57<11:18,  6.71it/s]

steps= 191
steps= 292


  9%|▉         | 455/5000 [00:57<09:44,  7.77it/s]

steps= 7
steps= 264
steps= 53


  9%|▉         | 457/5000 [00:57<08:49,  8.58it/s]

steps= 134


  9%|▉         | 461/5000 [00:58<09:02,  8.37it/s]

steps= 285
steps= 23
steps= 10
steps= 111
steps= 273


  9%|▉         | 463/5000 [00:58<11:45,  6.44it/s]

steps= 435


  9%|▉         | 465/5000 [00:59<11:48,  6.40it/s]

steps= 350
steps= 175
steps= 38
steps= 34


  9%|▉         | 468/5000 [00:59<09:34,  7.88it/s]

steps= 111
steps= 102


  9%|▉         | 470/5000 [00:59<10:13,  7.39it/s]

steps= 122


  9%|▉         | 472/5000 [01:00<11:30,  6.56it/s]

steps= 235
steps= 67


  9%|▉         | 474/5000 [01:00<20:27,  3.69it/s]

steps= 336
steps= 171


 10%|▉         | 475/5000 [01:01<17:29,  4.31it/s]

steps= 231
steps= 128


 10%|▉         | 478/5000 [01:01<14:00,  5.38it/s]

steps= 40
steps= 63


 10%|▉         | 481/5000 [01:01<10:19,  7.29it/s]

steps= 114
steps= 66
steps= 44


 10%|▉         | 483/5000 [01:01<09:06,  8.26it/s]

steps= 41
steps= 47
steps= 121


 10%|▉         | 485/5000 [01:02<08:27,  8.89it/s]

steps= 71
steps= 166


 10%|▉         | 488/5000 [01:02<09:35,  7.84it/s]

steps= 178
steps= 190
steps= 55


 10%|▉         | 493/5000 [01:02<07:03, 10.65it/s]

steps= 187
steps= 9
steps= 22
steps= 15


 10%|▉         | 495/5000 [01:03<10:00,  7.50it/s]

steps= 358
steps= 109


 10%|▉         | 497/5000 [01:03<10:37,  7.07it/s]

steps= 133
steps= 130


 10%|▉         | 498/5000 [01:03<09:46,  7.68it/s]

steps= 69
steps= 34


 10%|█         | 503/5000 [01:04<08:36,  8.71it/s]

steps= 274
steps= 30
episode: 500 episode reward: -1038 eps: 0.9416206043312847 avg reward (last 100): -1035.6534653465346 episode loss:  35130.8
avg reward for last 100 episodes: -1035.6534653465346
steps= 35
steps= 87


 10%|█         | 505/5000 [01:04<07:55,  9.45it/s]

steps= 31
steps= 106


 10%|█         | 507/5000 [01:04<12:24,  6.04it/s]

steps= 316
steps= 358


 10%|█         | 510/5000 [01:05<10:05,  7.42it/s]

steps= 44
steps= 82
steps= 183


 10%|█         | 514/5000 [01:05<07:48,  9.57it/s]

steps= 81
steps= 63
steps= 35
steps= 83
steps= 103


 10%|█         | 516/5000 [01:06<13:47,  5.42it/s]

steps= 393


 10%|█         | 518/5000 [01:07<22:00,  3.39it/s]

steps= 752
steps= 193


 10%|█         | 519/5000 [01:07<23:32,  3.17it/s]

steps= 349


 10%|█         | 521/5000 [01:07<20:25,  3.65it/s]

steps= 361
steps= 95
steps= 68


 10%|█         | 525/5000 [01:08<13:10,  5.66it/s]

steps= 36
steps= 34
steps= 72


 11%|█         | 526/5000 [01:08<14:29,  5.15it/s]

steps= 84


 11%|█         | 527/5000 [01:08<17:23,  4.29it/s]

steps= 154
steps= 65
steps= 248


 11%|█         | 529/5000 [01:09<14:27,  5.15it/s]

steps= 25
steps= 44


 11%|█         | 532/5000 [01:09<12:44,  5.84it/s]

steps= 168


 11%|█         | 533/5000 [01:09<15:00,  4.96it/s]

steps= 122
steps= 26


 11%|█         | 535/5000 [01:10<17:48,  4.18it/s]

steps= 319


 11%|█         | 538/5000 [01:10<13:50,  5.38it/s]

steps= 80
steps= 10
steps= 19


 11%|█         | 539/5000 [01:10<14:00,  5.31it/s]

steps= 83


 11%|█         | 541/5000 [01:11<16:18,  4.56it/s]

steps= 150
steps= 87


 11%|█         | 543/5000 [01:11<13:48,  5.38it/s]

steps= 59
steps= 93
steps= 155


 11%|█         | 546/5000 [01:12<12:08,  6.11it/s]

steps= 182
steps= 129


 11%|█         | 547/5000 [01:12<18:05,  4.10it/s]

steps= 325


 11%|█         | 548/5000 [01:13<22:11,  3.34it/s]

steps= 157
steps= 24


 11%|█         | 550/5000 [01:13<18:28,  4.02it/s]

steps= 233
steps= 26


 11%|█         | 552/5000 [01:13<15:28,  4.79it/s]

steps= 173
steps= 41


 11%|█         | 555/5000 [01:13<13:22,  5.54it/s]

steps= 190
steps= 106


 11%|█         | 556/5000 [01:14<12:53,  5.75it/s]

steps= 112


 11%|█         | 558/5000 [01:14<13:44,  5.39it/s]

steps= 146
steps= 228


 11%|█         | 559/5000 [01:14<12:38,  5.85it/s]

steps= 35
steps= 266


 11%|█         | 562/5000 [01:15<14:21,  5.15it/s]

steps= 191
steps= 74


 11%|█▏        | 564/5000 [01:15<13:58,  5.29it/s]

steps= 215
steps= 51


 11%|█▏        | 565/5000 [01:15<13:21,  5.53it/s]

steps= 85
steps= 32


 11%|█▏        | 568/5000 [01:16<10:39,  6.93it/s]

steps= 121
steps= 113


 11%|█▏        | 569/5000 [01:16<11:28,  6.44it/s]

steps= 190
steps= 77


 11%|█▏        | 573/5000 [01:16<11:29,  6.42it/s]

steps= 344
steps= 12
steps= 24
steps= 26


 12%|█▏        | 575/5000 [01:17<11:32,  6.39it/s]

steps= 171


 12%|█▏        | 576/5000 [01:17<14:07,  5.22it/s]

steps= 194


 12%|█▏        | 579/5000 [01:18<13:30,  5.45it/s]

steps= 247
steps= 60
steps= 96


 12%|█▏        | 581/5000 [01:18<11:01,  6.68it/s]

steps= 56
steps= 78
steps= 63


 12%|█▏        | 583/5000 [01:18<14:07,  5.21it/s]

steps= 450


 12%|█▏        | 586/5000 [01:19<14:02,  5.24it/s]

steps= 251
steps= 47
steps= 30


 12%|█▏        | 587/5000 [01:19<13:12,  5.56it/s]

steps= 204


 12%|█▏        | 588/5000 [01:19<14:12,  5.17it/s]

steps= 242
steps= 16
steps= 113


 12%|█▏        | 592/5000 [01:19<10:09,  7.24it/s]

steps= 86
steps= 81


 12%|█▏        | 593/5000 [01:20<12:21,  5.94it/s]

steps= 183
steps= 29


 12%|█▏        | 595/5000 [01:20<11:14,  6.53it/s]

steps= 150
steps= 74


 12%|█▏        | 598/5000 [01:20<11:38,  6.30it/s]

steps= 315
steps= 111


 12%|█▏        | 600/5000 [01:21<16:25,  4.47it/s]

steps= 545
steps= 152


 12%|█▏        | 601/5000 [01:21<14:43,  4.98it/s]

steps= 55
episode: 600 episode reward: -1063 eps: 0.9322508566163586 avg reward (last 100): -988.1881188118812 episode loss:  16474.578
avg reward for last 100 episodes: -988.1881188118812


 12%|█▏        | 604/5000 [01:22<13:12,  5.54it/s]

steps= 271
steps= 21
steps= 42


 12%|█▏        | 605/5000 [01:22<13:28,  5.44it/s]

steps= 71


 12%|█▏        | 606/5000 [01:22<14:13,  5.15it/s]

steps= 76


 12%|█▏        | 607/5000 [01:22<15:11,  4.82it/s]

steps= 135


 12%|█▏        | 608/5000 [01:23<17:52,  4.09it/s]

steps= 200


 12%|█▏        | 610/5000 [01:23<15:07,  4.84it/s]

steps= 319
steps= 139
steps= 34


 12%|█▏        | 613/5000 [01:23<12:47,  5.71it/s]

steps= 176
steps= 109
steps= 46


 12%|█▏        | 616/5000 [01:24<10:06,  7.22it/s]

steps= 79
steps= 133


 12%|█▏        | 618/5000 [01:24<10:37,  6.87it/s]

steps= 166
steps= 100


 12%|█▏        | 619/5000 [01:24<11:53,  6.14it/s]

steps= 172


 12%|█▏        | 621/5000 [01:25<13:50,  5.27it/s]

steps= 184
steps= 98


 12%|█▏        | 622/5000 [01:25<13:13,  5.52it/s]

steps= 35


 12%|█▏        | 623/5000 [01:25<15:44,  4.64it/s]

steps= 165


 12%|█▎        | 625/5000 [01:26<15:03,  4.84it/s]

steps= 183
steps= 83


 13%|█▎        | 626/5000 [01:26<14:20,  5.08it/s]

steps= 105
steps= 88
steps= 132


 13%|█▎        | 630/5000 [01:26<10:57,  6.64it/s]

steps= 51
steps= 148


 13%|█▎        | 633/5000 [01:26<09:52,  7.37it/s]

steps= 156
steps= 76
steps= 10
steps= 48


 13%|█▎        | 635/5000 [01:27<08:57,  8.12it/s]

steps= 92


 13%|█▎        | 636/5000 [01:27<11:09,  6.52it/s]

steps= 124


 13%|█▎        | 638/5000 [01:27<12:23,  5.86it/s]

steps= 150
steps= 152


 13%|█▎        | 640/5000 [01:28<10:29,  6.93it/s]

steps= 99
steps= 43
steps= 66


 13%|█▎        | 644/5000 [01:28<07:13, 10.05it/s]

steps= 44
steps= 60
steps= 11


 13%|█▎        | 646/5000 [01:28<07:21,  9.86it/s]

steps= 64
steps= 56


 13%|█▎        | 648/5000 [01:28<07:40,  9.45it/s]

steps= 34
steps= 70
steps= 242


 13%|█▎        | 650/5000 [01:29<10:13,  7.09it/s]

steps= 167
steps= 87


 13%|█▎        | 652/5000 [01:29<09:26,  7.67it/s]

steps= 77


 13%|█▎        | 656/5000 [01:29<09:39,  7.49it/s]

steps= 116
steps= 39
steps= 21
steps= 179


 13%|█▎        | 657/5000 [01:29<09:26,  7.67it/s]

steps= 54


 13%|█▎        | 659/5000 [01:30<10:13,  7.07it/s]

steps= 125
steps= 68
steps= 87


 13%|█▎        | 661/5000 [01:30<08:16,  8.75it/s]

steps= 40
steps= 243


 13%|█▎        | 664/5000 [01:30<09:44,  7.42it/s]

steps= 79
steps= 83


 13%|█▎        | 666/5000 [01:31<10:11,  7.09it/s]

steps= 231
steps= 133


 13%|█▎        | 667/5000 [01:31<14:04,  5.13it/s]

steps= 406
steps= 136


 13%|█▎        | 669/5000 [01:31<16:38,  4.34it/s]

steps= 333
steps= 67


 13%|█▎        | 671/5000 [01:32<14:01,  5.15it/s]

steps= 124


 13%|█▎        | 672/5000 [01:32<16:17,  4.43it/s]

steps= 274


 13%|█▎        | 674/5000 [01:32<16:22,  4.40it/s]

steps= 265
steps= 169
steps= 43


 14%|█▎        | 679/5000 [01:33<10:28,  6.87it/s]

steps= 207
steps= 21
steps= 35
steps= 94


 14%|█▎        | 681/5000 [01:33<08:39,  8.32it/s]

steps= 56
steps= 24


 14%|█▎        | 683/5000 [01:33<08:54,  8.07it/s]

steps= 187
steps= 51
steps= 93
steps= 75


 14%|█▎        | 687/5000 [01:33<06:54, 10.39it/s]

steps= 51
steps= 39


 14%|█▍        | 689/5000 [01:34<09:44,  7.38it/s]

steps= 290
steps= 62
steps= 96


 14%|█▍        | 691/5000 [01:34<10:56,  6.56it/s]

steps= 287


 14%|█▍        | 693/5000 [01:35<13:28,  5.33it/s]

steps= 328
steps= 154


 14%|█▍        | 694/5000 [01:35<13:01,  5.51it/s]

steps= 114


 14%|█▍        | 695/5000 [01:35<14:30,  4.94it/s]

steps= 218
steps= 33


 14%|█▍        | 698/5000 [01:36<11:11,  6.41it/s]

steps= 100
steps= 51


 14%|█▍        | 699/5000 [01:36<11:05,  6.46it/s]

steps= 94


 14%|█▍        | 701/5000 [01:36<10:58,  6.53it/s]

steps= 151
steps= 94
episode: 700 episode reward: -1102 eps: 0.9229743440874912 avg reward (last 100): -1006.6534653465346 episode loss:  24229.508
avg reward for last 100 episodes: -1006.6534653465346
steps= 10


 14%|█▍        | 704/5000 [01:36<10:15,  6.98it/s]

steps= 244
steps= 160


 14%|█▍        | 705/5000 [01:37<10:19,  6.93it/s]

steps= 141
steps= 53


 14%|█▍        | 707/5000 [01:37<16:12,  4.42it/s]

steps= 178


 14%|█▍        | 708/5000 [01:38<18:36,  3.84it/s]

steps= 167
steps= 12


 14%|█▍        | 710/5000 [01:38<17:15,  4.14it/s]

steps= 228
steps= 66


 14%|█▍        | 713/5000 [01:38<13:23,  5.33it/s]

steps= 197
steps= 123


 14%|█▍        | 715/5000 [01:39<13:03,  5.47it/s]

steps= 131
steps= 161


 14%|█▍        | 716/5000 [01:39<11:37,  6.14it/s]

steps= 129


 14%|█▍        | 718/5000 [01:39<13:53,  5.14it/s]

steps= 316
steps= 49


 14%|█▍        | 720/5000 [01:39<11:54,  5.99it/s]

steps= 18
steps= 166


 14%|█▍        | 723/5000 [01:40<08:42,  8.18it/s]

steps= 33
steps= 52
steps= 51
steps= 16


 15%|█▍        | 727/5000 [01:40<07:49,  9.10it/s]

steps= 57
steps= 31
steps= 160
steps= 76


 15%|█▍        | 730/5000 [01:41<09:20,  7.62it/s]

steps= 191
steps= 65
steps= 49


 15%|█▍        | 732/5000 [01:41<08:05,  8.80it/s]

steps= 50
steps= 46


 15%|█▍        | 735/5000 [01:41<08:32,  8.32it/s]

steps= 188
steps= 40
steps= 21
steps= 11


 15%|█▍        | 738/5000 [01:41<07:20,  9.67it/s]

steps= 166
steps= 98


 15%|█▍        | 740/5000 [01:42<07:41,  9.23it/s]

steps= 103
steps= 16
steps= 11


 15%|█▍        | 743/5000 [01:42<07:50,  9.05it/s]

steps= 308


 15%|█▍        | 744/5000 [01:42<10:09,  6.98it/s]

steps= 177


 15%|█▍        | 745/5000 [01:43<13:50,  5.12it/s]

steps= 257


 15%|█▍        | 747/5000 [01:43<13:02,  5.43it/s]

steps= 161
steps= 68


 15%|█▍        | 748/5000 [01:43<14:12,  4.99it/s]

steps= 139


 15%|█▌        | 750/5000 [01:43<12:33,  5.64it/s]

steps= 219
steps= 84


 15%|█▌        | 752/5000 [01:44<13:19,  5.32it/s]

steps= 263
steps= 96
steps= 6


 15%|█▌        | 755/5000 [01:44<11:16,  6.27it/s]

steps= 56
steps= 140


 15%|█▌        | 758/5000 [01:45<13:30,  5.23it/s]

steps= 358
steps= 64
steps= 87


 15%|█▌        | 759/5000 [01:45<13:12,  5.35it/s]

steps= 97
steps= 55


 15%|█▌        | 763/5000 [01:46<14:04,  5.02it/s]

steps= 452
steps= 73
steps= 100


 15%|█▌        | 764/5000 [01:47<32:29,  2.17it/s]

steps= 840
steps= 55


 15%|█▌        | 768/5000 [01:47<18:56,  3.72it/s]

steps= 183
steps= 34
steps= 21


 15%|█▌        | 769/5000 [01:47<16:16,  4.33it/s]

steps= 76


 15%|█▌        | 771/5000 [01:48<14:23,  4.90it/s]

steps= 126
steps= 80


 15%|█▌        | 772/5000 [01:48<20:59,  3.36it/s]

steps= 327


 15%|█▌        | 773/5000 [01:49<19:08,  3.68it/s]

steps= 225
steps= 40


 16%|█▌        | 775/5000 [01:49<17:14,  4.08it/s]

steps= 241


 16%|█▌        | 776/5000 [01:50<25:39,  2.74it/s]

steps= 582


 16%|█▌        | 780/5000 [01:50<17:24,  4.04it/s]

steps= 207
steps= 36
steps= 49
steps= 86


 16%|█▌        | 783/5000 [01:50<11:42,  6.00it/s]

steps= 153
steps= 73
steps= 56


 16%|█▌        | 785/5000 [01:51<12:38,  5.56it/s]

steps= 87
steps= 123
steps= 21


 16%|█▌        | 787/5000 [01:51<12:10,  5.77it/s]

steps= 168


 16%|█▌        | 789/5000 [01:51<14:17,  4.91it/s]

steps= 249
steps= 153


 16%|█▌        | 791/5000 [01:52<12:43,  5.51it/s]

steps= 59
steps= 146


 16%|█▌        | 794/5000 [01:52<10:09,  6.90it/s]

steps= 59
steps= 64
steps= 65


 16%|█▌        | 796/5000 [01:52<11:08,  6.28it/s]

steps= 97
steps= 147


 16%|█▌        | 797/5000 [01:53<13:42,  5.11it/s]

steps= 219


 16%|█▌        | 798/5000 [01:53<17:25,  4.02it/s]

steps= 140
steps= 35


 16%|█▌        | 801/5000 [01:53<14:04,  4.97it/s]

steps= 163
steps= 165
episode: 800 episode reward: -1173 eps: 0.913790138992923 avg reward (last 100): -1137.1881188118812 episode loss:  23945.385
avg reward for last 100 episodes: -1137.1881188118812


 16%|█▌        | 804/5000 [01:54<11:10,  6.26it/s]

steps= 140
steps= 54
steps= 23
steps= 24


 16%|█▌        | 806/5000 [01:55<15:18,  4.56it/s]

steps= 552


 16%|█▌        | 807/5000 [01:55<18:12,  3.84it/s]

steps= 205
steps= 61


 16%|█▌        | 810/5000 [01:56<15:51,  4.40it/s]

steps= 335
steps= 67


 16%|█▌        | 812/5000 [01:56<15:12,  4.59it/s]

steps= 136
steps= 135


 16%|█▋        | 813/5000 [01:56<12:47,  5.46it/s]

steps= 119


 16%|█▋        | 814/5000 [01:57<19:26,  3.59it/s]

steps= 193


 16%|█▋        | 815/5000 [01:57<19:09,  3.64it/s]

steps= 121


 16%|█▋        | 816/5000 [01:57<22:12,  3.14it/s]

steps= 190


 16%|█▋        | 818/5000 [01:58<18:20,  3.80it/s]

steps= 177
steps= 56


 16%|█▋        | 819/5000 [01:58<16:13,  4.30it/s]

steps= 126


 16%|█▋        | 820/5000 [01:58<17:24,  4.00it/s]

steps= 103
steps= 26


 16%|█▋        | 822/5000 [01:59<15:51,  4.39it/s]

steps= 114


 16%|█▋        | 823/5000 [01:59<17:27,  3.99it/s]

steps= 189


 16%|█▋        | 825/5000 [01:59<15:42,  4.43it/s]

steps= 132
steps= 23


 17%|█▋        | 827/5000 [02:00<16:06,  4.32it/s]

steps= 140
steps= 26


 17%|█▋        | 828/5000 [02:00<17:06,  4.06it/s]

steps= 94


 17%|█▋        | 829/5000 [02:00<16:26,  4.23it/s]

steps= 77


 17%|█▋        | 831/5000 [02:01<13:27,  5.16it/s]

steps= 122
steps= 153
steps= 57
steps= 9


 17%|█▋        | 834/5000 [02:01<10:11,  6.81it/s]

steps= 21


 17%|█▋        | 836/5000 [02:01<12:42,  5.46it/s]

steps= 353
steps= 60


 17%|█▋        | 837/5000 [02:01<11:41,  5.93it/s]

steps= 64
steps= 26


 17%|█▋        | 839/5000 [02:02<11:13,  6.18it/s]

steps= 143
steps= 29


 17%|█▋        | 842/5000 [02:02<09:41,  7.15it/s]

steps= 108
steps= 123
steps= 28


 17%|█▋        | 844/5000 [02:02<09:16,  7.47it/s]

steps= 253


 17%|█▋        | 846/5000 [02:03<14:09,  4.89it/s]

steps= 122
steps= 70


 17%|█▋        | 847/5000 [02:03<16:58,  4.08it/s]

steps= 196


 17%|█▋        | 849/5000 [02:04<17:15,  4.01it/s]

steps= 481
steps= 47


 17%|█▋        | 851/5000 [02:04<13:57,  4.95it/s]

steps= 20
steps= 70


 17%|█▋        | 852/5000 [02:04<15:18,  4.51it/s]

steps= 144
steps= 76


 17%|█▋        | 854/5000 [02:05<15:10,  4.56it/s]

steps= 480
steps= 46
steps= 11


 17%|█▋        | 857/5000 [02:05<12:54,  5.35it/s]

steps= 232
steps= 31


 17%|█▋        | 859/5000 [02:05<11:49,  5.84it/s]

steps= 129


 17%|█▋        | 861/5000 [02:06<16:45,  4.12it/s]

steps= 361
steps= 77


 17%|█▋        | 863/5000 [02:06<13:51,  4.98it/s]

steps= 53
steps= 70


 17%|█▋        | 864/5000 [02:06<14:23,  4.79it/s]

steps= 112
steps= 54


 17%|█▋        | 866/5000 [02:07<13:00,  5.30it/s]

steps= 134


 17%|█▋        | 867/5000 [02:07<14:47,  4.66it/s]

steps= 133


 17%|█▋        | 869/5000 [02:08<20:02,  3.44it/s]

steps= 319
steps= 27


 17%|█▋        | 870/5000 [02:08<19:44,  3.49it/s]

steps= 120
steps= 20


 17%|█▋        | 872/5000 [02:08<17:00,  4.05it/s]

steps= 325
steps= 96


 17%|█▋        | 874/5000 [02:09<16:41,  4.12it/s]

steps= 218


 18%|█▊        | 875/5000 [02:09<17:39,  3.89it/s]

steps= 146


 18%|█▊        | 876/5000 [02:10<23:49,  2.88it/s]

steps= 277


 18%|█▊        | 877/5000 [02:10<24:07,  2.85it/s]

steps= 146


 18%|█▊        | 879/5000 [02:11<22:36,  3.04it/s]

steps= 260
steps= 89


 18%|█▊        | 881/5000 [02:11<16:34,  4.14it/s]

steps= 82
steps= 85


 18%|█▊        | 884/5000 [02:11<12:07,  5.66it/s]

steps= 74
steps= 38
steps= 21


 18%|█▊        | 885/5000 [02:11<11:56,  5.75it/s]

steps= 113
steps= 78


 18%|█▊        | 887/5000 [02:12<11:31,  5.94it/s]

steps= 214


 18%|█▊        | 889/5000 [02:12<11:54,  5.76it/s]

steps= 227
steps= 107
steps= 69


 18%|█▊        | 891/5000 [02:12<10:46,  6.36it/s]

steps= 201


 18%|█▊        | 892/5000 [02:13<13:18,  5.14it/s]

steps= 326


 18%|█▊        | 895/5000 [02:13<11:33,  5.92it/s]

steps= 176
steps= 24
steps= 38


 18%|█▊        | 896/5000 [02:13<10:45,  6.36it/s]

steps= 66


 18%|█▊        | 898/5000 [02:14<13:09,  5.20it/s]

steps= 147
steps= 116


 18%|█▊        | 899/5000 [02:14<13:33,  5.04it/s]

steps= 245


 18%|█▊        | 902/5000 [02:14<13:13,  5.16it/s]

steps= 320
steps= 9
episode: 900 episode reward: -1017 eps: 0.9046973228126401 avg reward (last 100): -1060.6732673267327 episode loss:  49965.4
avg reward for last 100 episodes: -1060.6732673267327
steps= 57


 18%|█▊        | 903/5000 [02:15<11:30,  5.93it/s]

steps= 35


 18%|█▊        | 904/5000 [02:15<15:39,  4.36it/s]

steps= 158


 18%|█▊        | 906/5000 [02:15<15:10,  4.50it/s]

steps= 174
steps= 55


 18%|█▊        | 907/5000 [02:16<18:10,  3.75it/s]

steps= 220


 18%|█▊        | 908/5000 [02:16<16:56,  4.02it/s]

steps= 111


 18%|█▊        | 910/5000 [02:16<17:25,  3.91it/s]

steps= 266
steps= 112


 18%|█▊        | 912/5000 [02:17<13:07,  5.19it/s]

steps= 57
steps= 46


 18%|█▊        | 913/5000 [02:17<13:20,  5.10it/s]

steps= 56


 18%|█▊        | 914/5000 [02:17<16:34,  4.11it/s]

steps= 211


 18%|█▊        | 915/5000 [02:18<17:49,  3.82it/s]

steps= 250


 18%|█▊        | 917/5000 [02:18<15:13,  4.47it/s]

steps= 263
steps= 146


 18%|█▊        | 918/5000 [02:18<13:59,  4.86it/s]

steps= 187


 18%|█▊        | 919/5000 [02:18<15:45,  4.32it/s]

steps= 234


 18%|█▊        | 921/5000 [02:19<20:40,  3.29it/s]

steps= 464
steps= 71


 18%|█▊        | 922/5000 [02:20<20:15,  3.35it/s]

steps= 184
steps= 41


 18%|█▊        | 925/5000 [02:20<15:47,  4.30it/s]

steps= 122
steps= 105


 19%|█▊        | 928/5000 [02:20<12:32,  5.41it/s]

steps= 11
steps= 20
steps= 86


 19%|█▊        | 929/5000 [02:21<15:48,  4.29it/s]

steps= 280


 19%|█▊        | 931/5000 [02:22<21:21,  3.17it/s]

steps= 554
steps= 62
steps= 27


 19%|█▊        | 933/5000 [02:22<20:13,  3.35it/s]

steps= 292


 19%|█▊        | 935/5000 [02:23<17:50,  3.80it/s]

steps= 287
steps= 109


 19%|█▊        | 937/5000 [02:23<17:10,  3.94it/s]

steps= 313
steps= 59


 19%|█▉        | 939/5000 [02:23<12:44,  5.31it/s]

steps= 65
steps= 188


 19%|█▉        | 940/5000 [02:23<11:33,  5.86it/s]

steps= 54


 19%|█▉        | 941/5000 [02:24<15:27,  4.38it/s]

steps= 289
steps= 37
steps=

 19%|█▉        | 943/5000 [02:24<12:52,  5.25it/s]

 73


 19%|█▉        | 944/5000 [02:24<15:20,  4.41it/s]

steps= 175
steps= 11
steps= 66


 19%|█▉        | 948/5000 [02:25<14:56,  4.52it/s]

steps= 248
steps= 74


 19%|█▉        | 949/5000 [02:25<14:22,  4.69it/s]

steps= 103
steps= 39


 19%|█▉        | 951/5000 [02:26<12:44,  5.29it/s]

steps= 184


 19%|█▉        | 954/5000 [02:26<12:07,  5.56it/s]

steps= 267
steps= 96
steps= 46


 19%|█▉        | 955/5000 [02:26<18:24,  3.66it/s]

steps= 435
steps= 43


 19%|█▉        | 958/5000 [02:28<17:52,  3.77it/s]

steps= 643
steps= 66


 19%|█▉        | 959/5000 [02:28<16:46,  4.02it/s]

steps= 136


 19%|█▉        | 961/5000 [02:28<16:55,  3.98it/s]

steps= 291
steps= 127


 19%|█▉        | 962/5000 [02:29<17:55,  3.75it/s]

steps= 318
steps= 48
steps= 18


 19%|█▉        | 965/5000 [02:29<14:07,  4.76it/s]

steps= 129


 19%|█▉        | 967/5000 [02:29<12:31,  5.36it/s]

steps= 209
steps= 140


 19%|█▉        | 968/5000 [02:29<14:58,  4.49it/s]

steps= 166
steps= 29


 19%|█▉        | 970/5000 [02:30<14:11,  4.73it/s]

steps= 255
steps= 57


 19%|█▉        | 974/5000 [02:30<10:39,  6.29it/s]

steps= 210
steps= 7
steps= 49


 20%|█▉        | 975/5000 [02:31<16:46,  4.00it/s]

steps= 238
steps= 31


 20%|█▉        | 977/5000 [02:31<14:17,  4.69it/s]

steps= 98


 20%|█▉        | 979/5000 [02:31<14:06,  4.75it/s]

steps= 200
steps= 188


 20%|█▉        | 981/5000 [02:32<12:45,  5.25it/s]

steps= 129
steps= 234


 20%|█▉        | 982/5000 [02:32<11:23,  5.88it/s]

steps= 164


 20%|█▉        | 985/5000 [02:32<12:51,  5.20it/s]

steps= 500
steps= 53
steps= 103


 20%|█▉        | 986/5000 [02:33<12:42,  5.26it/s]

steps= 85


 20%|█▉        | 987/5000 [02:33<21:20,  3.13it/s]

steps= 452


 20%|█▉        | 989/5000 [02:34<15:42,  4.26it/s]

steps= 135
steps= 147


 20%|█▉        | 991/5000 [02:34<16:55,  3.95it/s]

steps= 229
steps= 190


 20%|█▉        | 993/5000 [02:34<13:32,  4.93it/s]

steps= 6
steps= 44


 20%|█▉        | 994/5000 [02:35<25:02,  2.67it/s]

steps= 379


 20%|█▉        | 995/5000 [02:36<32:20,  2.06it/s]

steps= 383


 20%|█▉        | 996/5000 [02:36<28:33,  2.34it/s]

steps= 260
steps= 24


 20%|█▉        | 998/5000 [02:36<22:23,  2.98it/s]

steps= 73


 20%|██        | 1000/5000 [02:37<19:23,  3.44it/s]

steps= 85
steps= 77


 20%|██        | 1002/5000 [02:37<17:12,  3.87it/s]

steps= 233
episode: 1000 episode reward: -1241 eps: 0.8956949861665088 avg reward (last 100): -1065.8910891089108 episode loss:  28111.877
avg reward for last 100 episodes: -1065.8910891089108
steps= 186


 20%|██        | 1005/5000 [02:38<13:16,  5.02it/s]

steps= 114
steps= 12
steps= 54


 20%|██        | 1006/5000 [02:38<11:30,  5.79it/s]

steps= 90


 20%|██        | 1007/5000 [02:38<13:01,  5.11it/s]

steps= 170


 20%|██        | 1009/5000 [02:38<11:44,  5.67it/s]

steps= 218
steps= 95


 20%|██        | 1010/5000 [02:39<12:13,  5.44it/s]

steps= 154


 20%|██        | 1012/5000 [02:39<12:22,  5.37it/s]

steps= 240
steps= 157


 20%|██        | 1013/5000 [02:39<11:54,  5.58it/s]

steps= 169
steps= 11


 20%|██        | 1016/5000 [02:40<12:18,  5.40it/s]

steps= 347
steps= 160


 20%|██        | 1018/5000 [02:40<11:06,  5.98it/s]

steps= 13
steps= 128


 20%|██        | 1019/5000 [02:40<13:48,  4.80it/s]

steps= 130


 20%|██        | 1021/5000 [02:41<13:09,  5.04it/s]

steps= 127
steps= 76


 20%|██        | 1023/5000 [02:41<16:37,  3.99it/s]

steps= 224
steps= 40


 20%|██        | 1024/5000 [02:42<21:48,  3.04it/s]

steps= 205


 20%|██        | 1025/5000 [02:42<22:37,  2.93it/s]

steps= 250
steps= 16


 21%|██        | 1028/5000 [02:43<17:01,  3.89it/s]

steps= 175
steps= 71


 21%|██        | 1029/5000 [02:43<14:39,  4.52it/s]

steps= 42
steps= 16


 21%|██        | 1031/5000 [02:43<13:31,  4.89it/s]

steps= 94


 21%|██        | 1033/5000 [02:44<13:03,  5.06it/s]

steps= 89
steps= 61


 21%|██        | 1035/5000 [02:44<14:02,  4.71it/s]

steps= 175
steps= 42


 21%|██        | 1038/5000 [02:45<13:38,  4.84it/s]

steps= 155
steps= 17
steps= 21


 21%|██        | 1039/5000 [02:45<17:12,  3.84it/s]

steps= 266


 21%|██        | 1041/5000 [02:46<20:25,  3.23it/s]

steps= 381
steps= 23


 21%|██        | 1042/5000 [02:46<17:58,  3.67it/s]

steps= 74


 21%|██        | 1043/5000 [02:47<21:51,  3.02it/s]

steps= 190


 21%|██        | 1044/5000 [02:47<24:10,  2.73it/s]

steps= 251


 21%|██        | 1045/5000 [02:48<29:05,  2.27it/s]

steps= 200


 21%|██        | 1046/5000 [02:48<33:54,  1.94it/s]

steps= 256


 21%|██        | 1047/5000 [02:49<30:35,  2.15it/s]

steps= 105
steps= 66


 21%|██        | 1050/5000 [02:49<19:49,  3.32it/s]

steps= 179
steps= 89


 21%|██        | 1053/5000 [02:50<17:01,  3.86it/s]

steps= 207
steps= 19
steps= 48


 21%|██        | 1054/5000 [02:50<21:15,  3.09it/s]

steps= 188


 21%|██        | 1056/5000 [02:51<20:28,  3.21it/s]

steps= 283
steps= 43


 21%|██        | 1057/5000 [02:51<20:53,  3.15it/s]

steps= 171


 21%|██        | 1059/5000 [02:52<22:52,  2.87it/s]

steps= 279
steps= 227


 21%|██        | 1061/5000 [02:52<18:11,  3.61it/s]

steps= 22
steps= 106


 21%|██▏       | 1064/5000 [02:53<12:22,  5.30it/s]

steps= 56
steps= 48
steps= 121


 21%|██▏       | 1065/5000 [02:53<12:23,  5.29it/s]

steps= 116


 21%|██▏       | 1067/5000 [02:53<13:35,  4.82it/s]

steps= 390
steps= 96


 21%|██▏       | 1068/5000 [02:54<13:47,  4.75it/s]

steps= 152


 21%|██▏       | 1070/5000 [02:54<13:38,  4.80it/s]

steps= 204
steps= 57
steps= 41


 21%|██▏       | 1072/5000 [02:54<11:30,  5.69it/s]

steps= 75
steps= 43


 21%|██▏       | 1074/5000 [02:55<10:23,  6.29it/s]

steps= 221


 22%|██▏       | 1076/5000 [02:55<11:38,  5.62it/s]

steps= 103
steps= 79


 22%|██▏       | 1077/5000 [02:55<18:04,  3.62it/s]

steps= 264


 22%|██▏       | 1079/5000 [02:56<23:35,  2.77it/s]

steps= 710
steps= 45


 22%|██▏       | 1081/5000 [02:57<17:36,  3.71it/s]

steps= 194
steps= 64


 22%|██▏       | 1083/5000 [02:57<14:52,  4.39it/s]

steps= 77
steps= 164


 22%|██▏       | 1086/5000 [02:57<11:08,  5.85it/s]

steps= 135
steps= 33
steps= 47
steps= 49
steps= 22


 22%|██▏       | 1091/5000 [02:58<07:20,  8.88it/s]

steps= 74
steps= 92
steps= 22


 22%|██▏       | 1093/5000 [02:58<07:55,  8.22it/s]

steps= 108
steps= 67


 22%|██▏       | 1096/5000 [02:59<09:30,  6.85it/s]

steps= 253
steps= 10
steps= 176


 22%|██▏       | 1097/5000 [02:59<10:40,  6.10it/s]

steps= 79


 22%|██▏       | 1099/5000 [02:59<13:50,  4.70it/s]

steps= 211
steps= 127


 22%|██▏       | 1100/5000 [03:00<15:49,  4.11it/s]

steps= 350
steps= 45
episode: 1100 episode reward: -1053 eps: 0.8867822287233291 avg reward (last 100): -1022.4653465346535 episode loss:  26128.07
avg reward for last 100 episodes: -1022.4653465346535


 22%|██▏       | 1102/5000 [03:00<13:09,  4.94it/s]

steps= 46


 22%|██▏       | 1103/5000 [03:00<18:35,  3.49it/s]

steps= 380


 22%|██▏       | 1104/5000 [03:01<19:00,  3.42it/s]

steps= 219


 22%|██▏       | 1105/5000 [03:01<24:43,  2.63it/s]

steps= 351


 22%|██▏       | 1108/5000 [03:02<20:56,  3.10it/s]

steps= 518
steps= 54
steps= 88


 22%|██▏       | 1110/5000 [03:02<17:20,  3.74it/s]

steps= 10
steps= 134


 22%|██▏       | 1112/5000 [03:03<20:05,  3.23it/s]

steps= 752
steps= 102
steps= 13


 22%|██▏       | 1115/5000 [03:04<15:14,  4.25it/s]

steps= 178
steps= 185


 22%|██▏       | 1116/5000 [03:04<13:08,  4.93it/s]

steps= 89


 22%|██▏       | 1118/5000 [03:04<14:19,  4.52it/s]

steps= 377
steps= 103


 22%|██▏       | 1119/5000 [03:04<13:20,  4.85it/s]

steps= 104


 22%|██▏       | 1120/5000 [03:05<14:23,  4.49it/s]

steps= 224


 22%|██▏       | 1121/5000 [03:05<14:05,  4.59it/s]

steps= 82


 22%|██▏       | 1122/5000 [03:05<14:36,  4.42it/s]

steps= 78
steps= 55


 22%|██▎       | 1125/5000 [03:06<14:21,  4.50it/s]

steps= 332
steps= 128


 23%|██▎       | 1126/5000 [03:06<15:04,  4.29it/s]

steps= 145


 23%|██▎       | 1127/5000 [03:07<21:52,  2.95it/s]

steps= 334
steps= 55


 23%|██▎       | 1129/5000 [03:07<18:08,  3.56it/s]

steps= 125
steps= 70


 23%|██▎       | 1133/5000 [03:07<11:24,  5.65it/s]

steps= 182
steps= 35
steps= 17


 23%|██▎       | 1134/5000 [03:07<11:06,  5.80it/s]

steps= 81
steps= 7


 23%|██▎       | 1137/5000 [03:08<11:40,  5.52it/s]

steps= 351
steps= 153


 23%|██▎       | 1138/5000 [03:08<12:19,  5.22it/s]

steps= 189


 23%|██▎       | 1139/5000 [03:09<20:43,  3.11it/s]

steps= 480


 23%|██▎       | 1142/5000 [03:10<20:47,  3.09it/s]

steps= 314
steps= 52
steps= 28


 23%|██▎       | 1144/5000 [03:10<14:59,  4.29it/s]

steps= 145
steps= 115


 23%|██▎       | 1146/5000 [03:10<12:33,  5.12it/s]

steps= 58
steps= 109


 23%|██▎       | 1147/5000 [03:10<12:25,  5.17it/s]

steps= 202


 23%|██▎       | 1148/5000 [03:11<13:56,  4.61it/s]

steps= 192


 23%|██▎       | 1149/5000 [03:11<17:54,  3.58it/s]

steps= 275
steps= 38


 23%|██▎       | 1151/5000 [03:11<15:26,  4.16it/s]

steps= 196


 23%|██▎       | 1152/5000 [03:12<21:28,  2.99it/s]

steps= 368


 23%|██▎       | 1155/5000 [03:12<15:29,  4.13it/s]

steps= 186
steps= 25
steps= 55


 23%|██▎       | 1157/5000 [03:13<15:09,  4.23it/s]

steps= 293
steps= 114


 23%|██▎       | 1160/5000 [03:13<10:58,  5.83it/s]

steps= 156
steps= 26
steps= 86


 23%|██▎       | 1161/5000 [03:14<13:39,  4.69it/s]

steps= 348


 23%|██▎       | 1162/5000 [03:14<17:31,  3.65it/s]

steps= 292


 23%|██▎       | 1164/5000 [03:14<14:54,  4.29it/s]

steps= 143
steps= 60
steps= 66


 23%|██▎       | 1166/5000 [03:15<13:12,  4.84it/s]

steps= 202


 23%|██▎       | 1167/5000 [03:15<14:49,  4.31it/s]

steps= 240


 23%|██▎       | 1169/5000 [03:15<13:43,  4.65it/s]

steps= 202
steps= 148


 23%|██▎       | 1171/5000 [03:16<20:04,  3.18it/s]

steps= 612
steps= 63
steps= 38


 23%|██▎       | 1174/5000 [03:17<14:13,  4.48it/s]

steps= 72
steps= 79


 24%|██▎       | 1175/5000 [03:17<12:18,  5.18it/s]

steps= 63


 24%|██▎       | 1177/5000 [03:18<15:53,  4.01it/s]

steps= 497
steps= 132


 24%|██▎       | 1178/5000 [03:18<16:22,  3.89it/s]

steps= 283
steps= 85


 24%|██▎       | 1180/5000 [03:18<14:24,  4.42it/s]

steps= 107


 24%|██▎       | 1182/5000 [03:18<12:45,  4.99it/s]

steps= 55
steps= 95


 24%|██▎       | 1184/5000 [03:19<11:08,  5.70it/s]

steps= 53
steps= 154


 24%|██▎       | 1185/5000 [03:19<10:57,  5.80it/s]

steps= 132


 24%|██▎       | 1186/5000 [03:19<17:01,  3.74it/s]

steps= 162


 24%|██▎       | 1187/5000 [03:20<20:04,  3.17it/s]

steps= 113
steps= 27


 24%|██▍       | 1189/5000 [03:20<16:17,  3.90it/s]

steps= 126
steps= 80


 24%|██▍       | 1191/5000 [03:20<15:00,  4.23it/s]

steps= 188
steps= 17


 24%|██▍       | 1193/5000 [03:21<12:30,  5.07it/s]

steps= 109


 24%|██▍       | 1194/5000 [03:21<14:24,  4.40it/s]

steps= 243


 24%|██▍       | 1195/5000 [03:21<15:07,  4.19it/s]

steps= 79


 24%|██▍       | 1196/5000 [03:21<15:55,  3.98it/s]

steps= 189
steps= 14


 24%|██▍       | 1199/5000 [03:22<13:45,  4.60it/s]

steps= 297
steps= 86


 24%|██▍       | 1201/5000 [03:22<11:05,  5.71it/s]

steps= 35
steps= 46
episode: 1200 episode reward: -1054 eps: 0.877958159110793 avg reward (last 100): -1106.2871287128712 episode loss:  28625.008
avg reward for last 100 episodes: -1106.2871287128712


 24%|██▍       | 1202/5000 [03:22<13:25,  4.71it/s]

steps= 176


 24%|██▍       | 1204/5000 [03:23<14:15,  4.44it/s]

steps= 252
steps= 85


 24%|██▍       | 1206/5000 [03:24<16:37,  3.80it/s]

steps= 309
steps= 139


 24%|██▍       | 1207/5000 [03:24<16:50,  3.75it/s]

steps= 191
steps= 67


 24%|██▍       | 1209/5000 [03:24<15:42,  4.02it/s]

steps= 242
steps= 41


 24%|██▍       | 1212/5000 [03:25<12:43,  4.96it/s]

steps= 81
steps= 110


 24%|██▍       | 1215/5000 [03:25<08:58,  7.03it/s]

steps= 67
steps= 33
steps= 44
steps= 51


 24%|██▍       | 1217/5000 [03:25<09:58,  6.32it/s]

steps= 181


 24%|██▍       | 1219/5000 [03:26<10:52,  5.80it/s]

steps= 133
steps= 51


 24%|██▍       | 1221/5000 [03:26<15:44,  4.00it/s]

steps= 305
steps= 87


 24%|██▍       | 1222/5000 [03:27<14:06,  4.46it/s]

steps= 79


 24%|██▍       | 1225/5000 [03:27<14:12,  4.43it/s]

steps= 279
steps= 11
steps= 16


 25%|██▍       | 1226/5000 [03:27<14:21,  4.38it/s]

steps= 163


 25%|██▍       | 1227/5000 [03:28<15:42,  4.00it/s]

steps= 280


 25%|██▍       | 1229/5000 [03:28<15:55,  3.95it/s]

steps= 272
steps= 204
steps= 50


 25%|██▍       | 1231/5000 [03:29<15:37,  4.02it/s]

steps= 126


 25%|██▍       | 1233/5000 [03:30<23:15,  2.70it/s]

steps= 549
steps= 62


 25%|██▍       | 1234/5000 [03:30<19:29,  3.22it/s]

steps= 72


 25%|██▍       | 1235/5000 [03:30<20:35,  3.05it/s]

steps= 212


 25%|██▍       | 1237/5000 [03:31<16:09,  3.88it/s]

steps= 173
steps= 80


 25%|██▍       | 1239/5000 [03:31<12:16,  5.11it/s]

steps= 106
steps= 59


 25%|██▍       | 1241/5000 [03:31<11:11,  5.60it/s]

steps= 76
steps= 208


 25%|██▍       | 1242/5000 [03:32<14:06,  4.44it/s]

steps= 231
steps= 11


 25%|██▍       | 1244/5000 [03:32<12:05,  5.18it/s]

steps= 42


 25%|██▍       | 1246/5000 [03:32<10:58,  5.70it/s]

steps= 121
steps= 47


 25%|██▍       | 1249/5000 [03:33<09:10,  6.82it/s]

steps= 109
steps= 18
steps= 45


 25%|██▌       | 1250/5000 [03:33<13:30,  4.63it/s]

steps= 175
steps= 17


 25%|██▌       | 1252/5000 [03:33<13:07,  4.76it/s]

steps= 134


 25%|██▌       | 1254/5000 [03:34<13:31,  4.62it/s]

steps= 184
steps= 147


 25%|██▌       | 1255/5000 [03:34<12:36,  4.95it/s]

steps= 78
steps= 17


 25%|██▌       | 1257/5000 [03:34<11:22,  5.48it/s]

steps= 114


 25%|██▌       | 1259/5000 [03:35<14:07,  4.42it/s]

steps= 215
steps= 134


 25%|██▌       | 1261/5000 [03:35<12:16,  5.08it/s]

steps= 58
steps= 122


 25%|██▌       | 1263/5000 [03:35<10:55,  5.70it/s]

steps= 101
steps= 136


 25%|██▌       | 1265/5000 [03:36<10:13,  6.09it/s]

steps= 104
steps= 70
steps= 60


 25%|██▌       | 1267/5000 [03:36<13:16,  4.69it/s]

steps= 315


 25%|██▌       | 1268/5000 [03:37<15:36,  3.99it/s]

steps= 254


 25%|██▌       | 1270/5000 [03:37<16:27,  3.78it/s]

steps= 239
steps= 92


 25%|██▌       | 1272/5000 [03:38<15:00,  4.14it/s]

steps= 337
steps= 127


 25%|██▌       | 1274/5000 [03:38<12:41,  4.89it/s]

steps= 124
steps= 163


 26%|██▌       | 1276/5000 [03:38<10:18,  6.02it/s]

steps= 90
steps= 83


 26%|██▌       | 1277/5000 [03:39<12:13,  5.08it/s]

steps= 170


 26%|██▌       | 1278/5000 [03:39<17:39,  3.51it/s]

steps= 230


 26%|██▌       | 1279/5000 [03:40<26:17,  2.36it/s]

steps= 397
steps= 71


 26%|██▌       | 1282/5000 [03:40<17:57,  3.45it/s]

steps= 290
steps= 155


 26%|██▌       | 1285/5000 [03:41<13:01,  4.76it/s]

steps= 71
steps= 62
steps= 119


 26%|██▌       | 1286/5000 [03:41<11:42,  5.28it/s]

steps= 138


 26%|██▌       | 1288/5000 [03:41<12:13,  5.06it/s]

steps= 190
steps= 56


 26%|██▌       | 1289/5000 [03:42<14:47,  4.18it/s]

steps= 314


 26%|██▌       | 1291/5000 [03:42<14:00,  4.41it/s]

steps= 225
steps= 71


 26%|██▌       | 1293/5000 [03:43<20:21,  3.03it/s]

steps= 463
steps= 105


 26%|██▌       | 1295/5000 [03:43<13:43,  4.50it/s]

steps= 59
steps= 48


 26%|██▌       | 1296/5000 [03:44<14:26,  4.28it/s]

steps= 179


 26%|██▌       | 1297/5000 [03:44<16:24,  3.76it/s]

steps= 248


 26%|██▌       | 1298/5000 [03:44<16:09,  3.82it/s]

steps= 235
steps= 74


 26%|██▌       | 1300/5000 [03:44<14:01,  4.40it/s]

steps= 224


 26%|██▌       | 1301/5000 [03:45<16:41,  3.69it/s]

steps= 219
episode: 1300 episode reward: -1227 eps: 0.8692218948263346 avg reward (last 100): -1052.4158415841584 episode loss:  28056.324
avg reward for last 100 episodes: -1052.4158415841584


 26%|██▌       | 1304/5000 [03:45<14:14,  4.32it/s]

steps= 182
steps= 29
steps= 24


 26%|██▌       | 1306/5000 [03:46<12:20,  4.99it/s]

steps= 121
steps= 83


 26%|██▌       | 1308/5000 [03:46<11:03,  5.56it/s]

steps= 143
steps= 157
steps= 24


 26%|██▌       | 1311/5000 [03:46<09:32,  6.45it/s]

steps= 91
steps= 86


 26%|██▌       | 1312/5000 [03:47<13:48,  4.45it/s]

steps= 392
steps= 25


 26%|██▋       | 1314/5000 [03:47<11:40,  5.26it/s]

steps= 196


 26%|██▋       | 1315/5000 [03:47<16:00,  3.84it/s]

steps= 374
steps= 22


 26%|██▋       | 1317/5000 [03:48<14:12,  4.32it/s]

steps= 208


 26%|██▋       | 1318/5000 [03:48<15:59,  3.84it/s]

steps= 303


 26%|██▋       | 1319/5000 [03:48<18:11,  3.37it/s]

steps= 180


 26%|██▋       | 1321/5000 [03:49<15:57,  3.84it/s]

steps= 250
steps= 181


 26%|██▋       | 1322/5000 [03:49<17:17,  3.54it/s]

steps= 272
steps= 85


 26%|██▋       | 1325/5000 [03:50<16:34,  3.70it/s]

steps= 421
steps= 79
steps= 76


 27%|██▋       | 1329/5000 [03:51<12:36,  4.86it/s]

steps= 166
steps= 19
steps= 34


 27%|██▋       | 1330/5000 [03:51<11:02,  5.54it/s]

steps= 101


 27%|██▋       | 1331/5000 [03:51<13:40,  4.47it/s]

steps= 230


 27%|██▋       | 1332/5000 [03:52<16:55,  3.61it/s]

steps= 320


 27%|██▋       | 1334/5000 [03:52<12:56,  4.72it/s]

steps= 234
steps= 89


 27%|██▋       | 1335/5000 [03:52<13:13,  4.62it/s]

steps= 164
steps= 52


 27%|██▋       | 1337/5000 [03:53<13:43,  4.45it/s]

steps= 317


 27%|██▋       | 1339/5000 [03:53<15:18,  3.99it/s]

steps= 376
steps= 116


 27%|██▋       | 1340/5000 [03:54<16:58,  3.59it/s]

steps= 425


 27%|██▋       | 1341/5000 [03:54<19:22,  3.15it/s]

steps= 397


 27%|██▋       | 1344/5000 [03:55<15:43,  3.88it/s]

steps= 285
steps= 17
steps= 86


 27%|██▋       | 1346/5000 [03:55<11:54,  5.11it/s]

steps= 25
steps= 90


 27%|██▋       | 1348/5000 [03:55<13:53,  4.38it/s]

steps= 430
steps= 129


 27%|██▋       | 1349/5000 [03:56<15:06,  4.03it/s]

steps= 218


 27%|██▋       | 1350/5000 [03:56<16:06,  3.78it/s]

steps= 154
steps= 51


 27%|██▋       | 1353/5000 [03:57<13:46,  4.41it/s]

steps= 268
steps= 50


 27%|██▋       | 1355/5000 [03:57<15:17,  3.97it/s]

steps= 273
steps= 51


 27%|██▋       | 1356/5000 [03:58<17:47,  3.41it/s]

steps= 241


 27%|██▋       | 1359/5000 [03:58<15:18,  3.96it/s]

steps= 277
steps= 46
steps= 42
steps= 40


 27%|██▋       | 1362/5000 [03:59<11:04,  5.47it/s]

steps= 87
steps= 146


 27%|██▋       | 1364/5000 [03:59<09:49,  6.17it/s]

steps= 62
steps= 178


 27%|██▋       | 1366/5000 [03:59<08:16,  7.32it/s]

steps= 91
steps= 46


 27%|██▋       | 1367/5000 [04:00<16:07,  3.76it/s]

steps= 386
steps= 32


 27%|██▋       | 1369/5000 [04:00<13:42,  4.41it/s]

steps= 120


 27%|██▋       | 1370/5000 [04:00<16:13,  3.73it/s]

steps= 268


 27%|██▋       | 1371/5000 [04:01<16:46,  3.61it/s]

steps= 196
steps= 120


 27%|██▋       | 1373/5000 [04:01<14:16,  4.23it/s]

steps= 179
steps= 24


 28%|██▊       | 1375/5000 [04:01<12:03,  5.01it/s]

steps= 109


 28%|██▊       | 1376/5000 [04:01<14:01,  4.30it/s]

steps= 207


 28%|██▊       | 1378/5000 [04:02<13:51,  4.35it/s]

steps= 308
steps= 136


 28%|██▊       | 1379/5000 [04:02<18:33,  3.25it/s]

steps= 389
steps=

 28%|██▊       | 1381/5000 [04:03<13:31,  4.46it/s]

 120
steps= 98
steps= 22


 28%|██▊       | 1383/5000 [04:03<15:38,  3.86it/s]

steps= 336


 28%|██▊       | 1384/5000 [04:04<19:00,  3.17it/s]

steps= 269


 28%|██▊       | 1385/5000 [04:04<19:04,  3.16it/s]

steps= 140


 28%|██▊       | 1387/5000 [04:05<17:50,  3.37it/s]

steps= 215
steps= 88


 28%|██▊       | 1388/5000 [04:05<14:46,  4.08it/s]

steps= 112


 28%|██▊       | 1389/5000 [04:05<19:33,  3.08it/s]

steps= 302
steps= 109


 28%|██▊       | 1391/5000 [04:06<18:28,  3.26it/s]

steps= 198


 28%|██▊       | 1392/5000 [04:06<20:56,  2.87it/s]

steps= 215


 28%|██▊       | 1393/5000 [04:07<20:42,  2.90it/s]

steps= 159


 28%|██▊       | 1395/5000 [04:08<23:54,  2.51it/s]

steps= 499
steps= 53


 28%|██▊       | 1396/5000 [04:08<26:21,  2.28it/s]

steps= 376


 28%|██▊       | 1398/5000 [04:09<21:22,  2.81it/s]

steps= 268
steps= 68


 28%|██▊       | 1399/5000 [04:09<19:08,  3.14it/s]

steps= 166


 28%|██▊       | 1400/5000 [04:10<20:52,  2.87it/s]

steps= 206


 28%|██▊       | 1402/5000 [04:10<18:17,  3.28it/s]

steps= 291
episode: 1400 episode reward: -1299 eps: 0.8605725621488737 avg reward (last 100): -1061.7623762376238 episode loss:  29105.46
avg reward for last 100 episodes: -1061.7623762376238
steps= 135


 28%|██▊       | 1403/5000 [04:10<17:07,  3.50it/s]

steps= 152


 28%|██▊       | 1405/5000 [04:11<15:43,  3.81it/s]

steps= 219
steps= 74


 28%|██▊       | 1407/5000 [04:11<13:58,  4.28it/s]

steps= 108
steps= 109


 28%|██▊       | 1408/5000 [04:12<21:29,  2.79it/s]

steps= 394
steps= 86


 28%|██▊       | 1410/5000 [04:12<16:58,  3.53it/s]

steps= 113
steps= 38


 28%|██▊       | 1413/5000 [04:13<12:26,  4.80it/s]

steps= 286
steps= 89
steps= 45


 28%|██▊       | 1415/5000 [04:13<11:53,  5.02it/s]

steps= 280
steps= 33


 28%|██▊       | 1417/5000 [04:13<11:58,  4.99it/s]

steps= 175


 28%|██▊       | 1418/5000 [04:14<21:31,  2.77it/s]

steps= 383


 28%|██▊       | 1419/5000 [04:14<20:08,  2.96it/s]

steps= 127


 28%|██▊       | 1422/5000 [04:15<14:18,  4.17it/s]

steps= 106
steps= 17
steps= 18


 28%|██▊       | 1425/5000 [04:16<18:10,  3.28it/s]

steps= 466
steps= 15
steps= 50


 29%|██▊       | 1427/5000 [04:16<14:34,  4.08it/s]

steps= 65
steps= 102


 29%|██▊       | 1428/5000 [04:16<20:24,  2.92it/s]

steps= 494


 29%|██▊       | 1429/5000 [04:17<26:49,  2.22it/s]

steps= 380


 29%|██▊       | 1431/5000 [04:18<21:25,  2.78it/s]

steps= 382
steps= 213


 29%|██▊       | 1433/5000 [04:18<17:10,  3.46it/s]

steps= 52
steps= 116


 29%|██▊       | 1436/5000 [04:18<12:17,  4.83it/s]

steps= 111
steps= 43
steps= 37


 29%|██▉       | 1438/5000 [04:19<09:54,  5.99it/s]

steps= 37
steps= 65


 29%|██▉       | 1439/5000 [04:19<15:05,  3.93it/s]

steps= 330


 29%|██▉       | 1441/5000 [04:19<13:07,  4.52it/s]

steps= 94
steps= 133


 29%|██▉       | 1442/5000 [04:19<11:37,  5.10it/s]

steps= 104


 29%|██▉       | 1444/5000 [04:20<11:16,  5.26it/s]

steps= 247
steps= 61
steps= 20
steps= 51


 29%|██▉       | 1449/5000 [04:20<07:24,  7.99it/s]

steps= 102
steps= 40
steps= 75
steps= 130


 29%|██▉       | 1451/5000 [04:21<12:04,  4.90it/s]

steps= 417


 29%|██▉       | 1452/5000 [04:22<20:28,  2.89it/s]

steps= 439
steps= 35


 29%|██▉       | 1456/5000 [04:22<13:19,  4.43it/s]

steps= 130
steps= 13
steps= 58


 29%|██▉       | 1457/5000 [04:23<18:24,  3.21it/s]

steps= 281


 29%|██▉       | 1458/5000 [04:23<27:14,  2.17it/s]

steps= 463


 29%|██▉       | 1460/5000 [04:24<19:38,  3.00it/s]

steps= 155
steps= 77


 29%|██▉       | 1461/5000 [04:24<21:28,  2.75it/s]

steps= 299


 29%|██▉       | 1462/5000 [04:25<20:26,  2.88it/s]

steps= 227


 29%|██▉       | 1463/5000 [04:25<20:24,  2.89it/s]

steps= 255
steps= 46


 29%|██▉       | 1467/5000 [04:25<12:28,  4.72it/s]

steps= 85
steps= 17
steps= 65


 29%|██▉       | 1470/5000 [04:26<08:51,  6.64it/s]

steps= 77
steps= 61
steps= 44


 29%|██▉       | 1473/5000 [04:26<09:15,  6.35it/s]

steps= 209
steps= 21
steps= 57
steps= 58


 30%|██▉       | 1476/5000 [04:26<08:00,  7.33it/s]

steps= 132
steps= 35


 30%|██▉       | 1477/5000 [04:27<11:13,  5.23it/s]

steps= 224


 30%|██▉       | 1478/5000 [04:27<13:06,  4.48it/s]

steps= 183


 30%|██▉       | 1479/5000 [04:28<18:27,  3.18it/s]

steps= 323


 30%|██▉       | 1480/5000 [04:28<16:48,  3.49it/s]

steps= 151


 30%|██▉       | 1481/5000 [04:28<16:13,  3.62it/s]

steps= 175


 30%|██▉       | 1483/5000 [04:28<14:32,  4.03it/s]

steps= 144
steps= 132


 30%|██▉       | 1484/5000 [04:29<12:58,  4.52it/s]

steps= 73


 30%|██▉       | 1486/5000 [04:29<11:19,  5.17it/s]

steps= 129
steps= 53


 30%|██▉       | 1488/5000 [04:30<15:57,  3.67it/s]

steps= 447
steps= 123


 30%|██▉       | 1489/5000 [04:30<15:28,  3.78it/s]

steps= 107
steps= 33


 30%|██▉       | 1493/5000 [04:30<10:30,  5.56it/s]

steps= 162
steps= 44
steps= 95


 30%|██▉       | 1496/5000 [04:31<07:45,  7.52it/s]

steps= 94
steps= 39
steps= 27
steps= 42


 30%|██▉       | 1498/5000 [04:31<07:47,  7.49it/s]

steps= 105


 30%|██▉       | 1499/5000 [04:31<09:06,  6.41it/s]

steps= 100


 30%|███       | 1501/5000 [04:32<10:24,  5.60it/s]

steps= 226
steps= 134
episode: 1500 episode reward: -1142 eps: 0.8520092960514319 avg reward (last 100): -1130.8217821782177 episode loss:  21376.088
avg reward for last 100 episodes: -1130.8217821782177
steps= 84


 30%|███       | 1503/5000 [04:32<10:03,  5.79it/s]

steps= 135


 30%|███       | 1504/5000 [04:32<16:41,  3.49it/s]

steps= 367


 30%|███       | 1507/5000 [04:33<12:57,  4.50it/s]

steps= 302
steps= 19
steps= 16
steps= 50


 30%|███       | 1509/5000 [04:33<12:35,  4.62it/s]

steps= 285


 30%|███       | 1510/5000 [04:33<12:47,  4.55it/s]

steps= 158


 30%|███       | 1513/5000 [04:34<14:37,  3.97it/s]

steps= 329
steps= 14
steps= 39


 30%|███       | 1514/5000 [04:34<14:53,  3.90it/s]

steps= 165


 30%|███       | 1515/5000 [04:35<16:24,  3.54it/s]

steps= 229


 30%|███       | 1517/5000 [04:35<13:48,  4.20it/s]

steps= 102
steps= 94


 30%|███       | 1519/5000 [04:36<12:02,  4.82it/s]

steps= 215
steps= 117


 30%|███       | 1520/5000 [04:36<10:26,  5.55it/s]

steps= 59


 30%|███       | 1522/5000 [04:36<14:28,  4.00it/s]

steps= 78
steps= 143
steps= 18


 30%|███       | 1524/5000 [04:37<15:39,  3.70it/s]

steps= 281
steps= 101


 31%|███       | 1526/5000 [04:37<12:25,  4.66it/s]

steps= 49


 31%|███       | 1527/5000 [04:38<14:54,  3.88it/s]

steps= 178


 31%|███       | 1528/5000 [04:38<20:37,  2.81it/s]

steps= 352


 31%|███       | 1531/5000 [04:39<15:16,  3.78it/s]

steps= 178
steps= 50
steps= 18
steps= 67


 31%|███       | 1534/5000 [04:39<11:20,  5.09it/s]

steps= 41
steps= 87


 31%|███       | 1535/5000 [04:39<10:02,  5.75it/s]

steps= 20


 31%|███       | 1537/5000 [04:40<11:04,  5.21it/s]

steps= 134
steps= 160


 31%|███       | 1538/5000 [04:40<11:23,  5.07it/s]

steps= 176


 31%|███       | 1539/5000 [04:40<18:26,  3.13it/s]

steps= 454


 31%|███       | 1540/5000 [04:41<23:15,  2.48it/s]

steps= 488
steps= 70


 31%|███       | 1542/5000 [04:41<18:13,  3.16it/s]

steps= 71


 31%|███       | 1544/5000 [04:42<14:35,  3.95it/s]

steps= 212
steps= 98


 31%|███       | 1547/5000 [04:42<12:35,  4.57it/s]

steps= 250
steps= 27
steps= 77


 31%|███       | 1548/5000 [04:42<10:44,  5.35it/s]

steps= 74
steps= 68


 31%|███       | 1550/5000 [04:43<09:52,  5.82it/s]

steps= 74


 31%|███       | 1551/5000 [04:43<10:41,  5.38it/s]

steps= 126


 31%|███       | 1553/5000 [04:43<10:30,  5.46it/s]

steps= 178
steps= 57


 31%|███       | 1555/5000 [04:44<12:48,  4.48it/s]

steps= 133
steps= 80
steps= 26


 31%|███       | 1558/5000 [04:44<10:47,  5.32it/s]

steps= 233
steps= 38


 31%|███       | 1560/5000 [04:44<08:40,  6.61it/s]

steps= 45
steps= 35


 31%|███       | 1561/5000 [04:45<14:41,  3.90it/s]

steps= 375


 31%|███       | 1562/5000 [04:45<18:30,  3.10it/s]

steps= 352


 31%|███▏      | 1563/5000 [04:46<21:00,  2.73it/s]

steps= 249


 31%|███▏      | 1565/5000 [04:46<16:47,  3.41it/s]

steps= 185
steps= 93


 31%|███▏      | 1566/5000 [04:47<14:34,  3.93it/s]

steps= 47


 31%|███▏      | 1568/5000 [04:47<12:30,  4.57it/s]

steps= 106
steps= 98


 31%|███▏      | 1569/5000 [04:47<12:13,  4.68it/s]

steps= 128


 31%|███▏      | 1571/5000 [04:47<11:09,  5.12it/s]

steps= 179
steps= 39


 31%|███▏      | 1572/5000 [04:48<14:23,  3.97it/s]

steps= 217


 31%|███▏      | 1573/5000 [04:48<15:40,  3.65it/s]

steps= 187


 31%|███▏      | 1574/5000 [04:48<16:17,  3.51it/s]

steps= 196
steps= 31


 32%|███▏      | 1576/5000 [04:49<14:33,  3.92it/s]

steps= 242


 32%|███▏      | 1577/5000 [04:49<18:10,  3.14it/s]

steps= 287


 32%|███▏      | 1579/5000 [04:50<14:58,  3.81it/s]

steps= 188
steps= 47
steps= 57


 32%|███▏      | 1581/5000 [04:50<12:11,  4.68it/s]

steps= 109


 32%|███▏      | 1582/5000 [04:50<12:18,  4.63it/s]

steps= 121
steps= 75


 32%|███▏      | 1584/5000 [04:51<11:49,  4.82it/s]

steps= 233


 32%|███▏      | 1585/5000 [04:51<14:57,  3.80it/s]

steps= 184


 32%|███▏      | 1586/5000 [04:51<16:53,  3.37it/s]

steps= 196
steps= 62


 32%|███▏      | 1588/5000 [04:52<15:11,  3.74it/s]

steps= 236


 32%|███▏      | 1589/5000 [04:52<18:42,  3.04it/s]

steps= 265


 32%|███▏      | 1591/5000 [04:53<15:28,  3.67it/s]

steps= 196
steps= 157


 32%|███▏      | 1593/5000 [04:53<12:42,  4.47it/s]

steps= 65
steps= 134


 32%|███▏      | 1595/5000 [04:53<10:03,  5.64it/s]

steps= 65
steps= 34


 32%|███▏      | 1596/5000 [04:53<14:28,  3.92it/s]

steps= 333
steps= 15


 32%|███▏      | 1599/5000 [04:54<10:53,  5.21it/s]

steps= 104
steps= 126


 32%|███▏      | 1601/5000 [04:55<14:15,  3.97it/s]

steps= 470
steps= 131
episode: 1600 episode reward: 861 eps: 0.843531240114621 avg reward (last 100): -1072.980198019802 episode loss:  39514.68
avg reward for last 100 episodes: -1072.980198019802


 32%|███▏      | 1603/5000 [04:55<11:12,  5.05it/s]

steps= 75
steps= 109


 32%|███▏      | 1605/5000 [04:55<11:48,  4.79it/s]

steps= 286
steps= 98


 32%|███▏      | 1608/5000 [04:56<09:02,  6.25it/s]

steps= 117
steps= 55
steps= 88
steps= 33


 32%|███▏      | 1610/5000 [04:56<10:07,  5.58it/s]

steps= 257


 32%|███▏      | 1612/5000 [04:56<09:57,  5.67it/s]

steps= 148
steps= 122


 32%|███▏      | 1614/5000 [04:57<08:58,  6.29it/s]

steps= 56
steps= 107


 32%|███▏      | 1617/5000 [04:57<11:26,  4.93it/s]

steps= 384
steps= 26
steps= 27


 32%|███▏      | 1618/5000 [04:58<14:27,  3.90it/s]

steps= 206
steps=

 32%|███▏      | 1619/5000 [04:58<13:40,  4.12it/s]

 196


 32%|███▏      | 1621/5000 [04:59<14:54,  3.78it/s]

steps= 304
steps= 54


 32%|███▏      | 1623/5000 [04:59<18:39,  3.02it/s]

steps= 550
steps= 110


 32%|███▏      | 1624/5000 [05:00<19:26,  2.89it/s]

steps= 275


 32%|███▎      | 1625/5000 [05:00<20:35,  2.73it/s]

steps= 252


 33%|███▎      | 1626/5000 [05:01<19:14,  2.92it/s]

steps= 229
steps= 29


 33%|███▎      | 1629/5000 [05:01<14:16,  3.94it/s]

steps= 179
steps= 61


 33%|███▎      | 1631/5000 [05:01<11:40,  4.81it/s]

steps= 104
steps= 90


 33%|███▎      | 1633/5000 [05:02<09:49,  5.71it/s]

steps= 128
steps= 32
steps= 24


 33%|███▎      | 1635/5000 [05:03<13:27,  4.17it/s]

steps= 402


 33%|███▎      | 1636/5000 [05:03<20:07,  2.78it/s]

steps= 405


 33%|███▎      | 1638/5000 [05:04<18:48,  2.98it/s]

steps= 372
steps= 77
steps= 35


 33%|███▎      | 1640/5000 [05:04<17:19,  3.23it/s]

steps= 262


 33%|███▎      | 1641/5000 [05:05<16:25,  3.41it/s]

steps= 247


 33%|███▎      | 1642/5000 [05:05<15:57,  3.51it/s]

steps= 224
steps= 11


 33%|███▎      | 1644/5000 [05:05<14:47,  3.78it/s]

steps= 311


 33%|███▎      | 1647/5000 [05:06<13:08,  4.25it/s]

steps= 337
steps= 56
steps= 40


 33%|███▎      | 1649/5000 [05:06<11:13,  4.98it/s]

steps= 35
steps= 75


 33%|███▎      | 1652/5000 [05:07<10:10,  5.49it/s]

steps= 264
steps= 26
steps= 101


 33%|███▎      | 1653/5000 [05:07<11:18,  4.94it/s]

steps= 95
steps= 68


 33%|███▎      | 1655/5000 [05:07<10:37,  5.25it/s]

steps= 140


 33%|███▎      | 1656/5000 [05:07<13:09,  4.24it/s]

steps= 134
steps= 6


 33%|███▎      | 1659/5000 [05:08<13:08,  4.24it/s]

steps= 445
steps= 137


 33%|███▎      | 1660/5000 [05:09<13:56,  3.99it/s]

steps= 280


 33%|███▎      | 1663/5000 [05:09<10:21,  5.37it/s]

steps= 205
steps= 58
steps= 14


 33%|███▎      | 1664/5000 [05:09<09:53,  5.62it/s]

steps= 134
steps= 54
steps= 34


 33%|███▎      | 1668/5000 [05:10<08:35,  6.46it/s]

steps= 247
steps= 33


 33%|███▎      | 1669/5000 [05:10<08:39,  6.41it/s]

steps= 121


 33%|███▎      | 1670/5000 [05:10<12:18,  4.51it/s]

steps= 161


 33%|███▎      | 1671/5000 [05:11<14:39,  3.79it/s]

steps= 186


 33%|███▎      | 1672/5000 [05:11<18:10,  3.05it/s]

steps= 137


 33%|███▎      | 1673/5000 [05:12<22:18,  2.49it/s]

steps= 185


 33%|███▎      | 1674/5000 [05:12<26:30,  2.09it/s]

steps= 423


 34%|███▎      | 1675/5000 [05:13<23:29,  2.36it/s]

steps= 79


 34%|███▎      | 1677/5000 [05:13<20:10,  2.74it/s]

steps= 106
steps= 69


 34%|███▎      | 1679/5000 [05:14<18:13,  3.04it/s]

steps= 344
steps= 20
steps= 12


 34%|███▎      | 1681/5000 [05:14<17:16,  3.20it/s]

steps= 206


 34%|███▎      | 1682/5000 [05:15<18:01,  3.07it/s]

steps= 92


 34%|███▎      | 1683/5000 [05:15<17:02,  3.24it/s]

steps= 114


 34%|███▎      | 1685/5000 [05:16<14:36,  3.78it/s]

steps= 103
steps= 27


 34%|███▎      | 1687/5000 [05:16<13:20,  4.14it/s]

steps= 231
steps= 69


 34%|███▍      | 1689/5000 [05:16<11:12,  4.92it/s]

steps= 84
steps= 21


 34%|███▍      | 1690/5000 [05:17<10:25,  5.29it/s]

steps= 53


 34%|███▍      | 1691/5000 [05:17<18:32,  2.97it/s]

steps= 308


 34%|███▍      | 1693/5000 [05:18<19:09,  2.88it/s]

steps= 196
steps= 53
steps= 15


 34%|███▍      | 1695/5000 [05:18<16:02,  3.43it/s]

steps= 57


 34%|███▍      | 1697/5000 [05:19<13:59,  3.94it/s]

steps= 115
steps= 85


 34%|███▍      | 1698/5000 [05:19<14:10,  3.88it/s]

steps= 149
steps= 42


 34%|███▍      | 1700/5000 [05:19<12:37,  4.36it/s]

steps= 96


 34%|███▍      | 1701/5000 [05:20<17:15,  3.19it/s]

steps= 266
episode: 1700 episode reward: 726 eps: 0.8351375464409935 avg reward (last 100): -1032.970297029703 episode loss:  39589.168
avg reward for last 100 episodes: -1032.970297029703


 34%|███▍      | 1703/5000 [05:21<17:35,  3.12it/s]

steps= 310
steps= 58


 34%|███▍      | 1704/5000 [05:21<16:17,  3.37it/s]

steps= 99


 34%|███▍      | 1705/5000 [05:21<17:02,  3.22it/s]

steps= 131


 34%|███▍      | 1707/5000 [05:22<13:38,  4.02it/s]

steps= 155
steps= 123
steps= 19


 34%|███▍      | 1709/5000 [05:22<13:22,  4.10it/s]

steps= 358


 34%|███▍      | 1711/5000 [05:23<17:22,  3.16it/s]

steps= 460
steps= 36


 34%|███▍      | 1712/5000 [05:23<15:21,  3.57it/s]

steps= 75


 34%|███▍      | 1713/5000 [05:24<20:01,  2.74it/s]

steps= 401
steps= 24


 34%|███▍      | 1717/5000 [05:24<13:45,  3.98it/s]

steps= 182
steps= 27
steps= 31


 34%|███▍      | 1718/5000 [05:25<12:11,  4.49it/s]

steps= 68
steps= 24


 34%|███▍      | 1720/5000 [05:25<11:03,  4.95it/s]

steps= 190


 34%|███▍      | 1723/5000 [05:25<10:35,  5.15it/s]

steps= 223
steps= 17
steps= 67


 34%|███▍      | 1724/5000 [05:26<10:44,  5.08it/s]

steps= 168


 35%|███▍      | 1727/5000 [05:26<09:24,  5.80it/s]

steps= 111
steps= 25
steps= 47


 35%|███▍      | 1728/5000 [05:26<12:31,  4.35it/s]

steps= 363


 35%|███▍      | 1730/5000 [05:27<11:24,  4.78it/s]

steps= 134
steps= 157


 35%|███▍      | 1733/5000 [05:27<07:54,  6.88it/s]

steps= 28
steps= 71
steps= 67


 35%|███▍      | 1734/5000 [05:27<09:12,  5.92it/s]

steps= 122
steps= 29


 35%|███▍      | 1737/5000 [05:28<09:13,  5.89it/s]

steps= 325
steps= 91


 35%|███▍      | 1738/5000 [05:28<08:42,  6.24it/s]

steps= 102


 35%|███▍      | 1740/5000 [05:28<11:25,  4.75it/s]

steps= 249
steps= 84


 35%|███▍      | 1742/5000 [05:29<11:23,  4.77it/s]

steps= 281
steps= 90


 35%|███▍      | 1745/5000 [05:30<13:27,  4.03it/s]

steps= 671
steps= 34
steps= 43


 35%|███▍      | 1746/5000 [05:30<12:17,  4.41it/s]

steps= 90


 35%|███▍      | 1748/5000 [05:30<12:26,  4.35it/s]

steps= 195
steps= 117


 35%|███▌      | 1750/5000 [05:30<09:59,  5.42it/s]

steps= 8
steps= 70


 35%|███▌      | 1751/5000 [05:31<09:01,  6.00it/s]

steps= 59


 35%|███▌      | 1753/5000 [05:31<08:54,  6.08it/s]

steps= 217
steps= 40


 35%|███▌      | 1754/5000 [05:31<11:20,  4.77it/s]

steps= 159


 35%|███▌      | 1755/5000 [05:32<12:12,  4.43it/s]

steps= 136


 35%|███▌      | 1756/5000 [05:32<14:00,  3.86it/s]

steps= 191


 35%|███▌      | 1757/5000 [05:32<14:42,  3.67it/s]

steps= 160


 35%|███▌      | 1759/5000 [05:33<14:48,  3.65it/s]

steps= 332
steps= 45
steps= 24


 35%|███▌      | 1762/5000 [05:33<11:59,  4.50it/s]

steps= 274
steps= 96


 35%|███▌      | 1763/5000 [05:34<11:01,  4.89it/s]

steps= 95


 35%|███▌      | 1764/5000 [05:34<16:41,  3.23it/s]

steps= 421


 35%|███▌      | 1766/5000 [05:35<14:33,  3.70it/s]

steps= 134
steps= 87


 35%|███▌      | 1767/5000 [05:35<21:00,  2.56it/s]

steps= 355
steps= 39


 35%|███▌      | 1770/5000 [05:36<13:56,  3.86it/s]

steps= 97
steps= 63


 35%|███▌      | 1771/5000 [05:36<17:47,  3.03it/s]

steps= 258


 35%|███▌      | 1773/5000 [05:37<17:50,  3.01it/s]

steps= 422
steps= 92


 36%|███▌      | 1776/5000 [05:37<11:40,  4.60it/s]

steps= 93
steps= 41
steps= 42


 36%|███▌      | 1778/5000 [05:38<12:34,  4.27it/s]

steps= 154
steps= 16


 36%|███▌      | 1779/5000 [05:38<11:54,  4.51it/s]

steps= 97


 36%|███▌      | 1780/5000 [05:38<12:06,  4.43it/s]

steps= 141
steps= 29


 36%|███▌      | 1782/5000 [05:39<11:33,  4.64it/s]

steps= 255


 36%|███▌      | 1784/5000 [05:39<11:35,  4.62it/s]

steps= 149
steps= 89


 36%|███▌      | 1785/5000 [05:39<11:36,  4.62it/s]

steps= 119


 36%|███▌      | 1786/5000 [05:40<11:53,  4.50it/s]

steps= 100


 36%|███▌      | 1787/5000 [05:41<25:07,  2.13it/s]

steps= 366


 36%|███▌      | 1788/5000 [05:41<23:03,  2.32it/s]

steps= 107


 36%|███▌      | 1789/5000 [05:41<21:50,  2.45it/s]

steps= 143


 36%|███▌      | 1791/5000 [05:43<24:53,  2.15it/s]

steps= 435
steps= 71


 36%|███▌      | 1792/5000 [05:43<20:54,  2.56it/s]

steps= 50


 36%|███▌      | 1793/5000 [05:43<21:16,  2.51it/s]

steps= 245


 36%|███▌      | 1795/5000 [05:44<18:07,  2.95it/s]

steps= 251
steps= 71


 36%|███▌      | 1796/5000 [05:44<17:27,  3.06it/s]

steps= 118


 36%|███▌      | 1798/5000 [05:45<15:23,  3.47it/s]

steps= 360
steps= 192


 36%|███▌      | 1800/5000 [05:45<12:19,  4.33it/s]

steps= 128
steps= 25


 36%|███▌      | 1801/5000 [05:46<24:14,  2.20it/s]

steps= 412
episode: 1800 episode reward: -1420 eps: 0.8268273755702407 avg reward (last 100): -1039.7029702970297 episode loss:  26165.832
avg reward for last 100 episodes: -1039.7029702970297


 36%|███▌      | 1803/5000 [05:47<21:12,  2.51it/s]

steps= 255
steps= 31


 36%|███▌      | 1804/5000 [05:47<19:21,  2.75it/s]

steps= 101


 36%|███▌      | 1805/5000 [05:48<20:43,  2.57it/s]

steps= 188


 36%|███▌      | 1806/5000 [05:48<21:22,  2.49it/s]

steps= 126


 36%|███▌      | 1807/5000 [05:48<19:49,  2.68it/s]

steps= 114


 36%|███▌      | 1808/5000 [05:49<23:58,  2.22it/s]

steps= 257
steps= 28


 36%|███▌      | 1810/5000 [05:49<21:17,  2.50it/s]

steps= 291


 36%|███▌      | 1811/5000 [05:50<24:11,  2.20it/s]

steps= 422
steps= 82


 36%|███▋      | 1813/5000 [05:50<18:59,  2.80it/s]

steps= 143


 36%|███▋      | 1815/5000 [05:51<14:57,  3.55it/s]

steps= 182
steps= 85


 36%|███▋      | 1816/5000 [05:51<13:54,  3.82it/s]

steps= 192


 36%|███▋      | 1818/5000 [05:52<14:42,  3.61it/s]

steps= 319
steps= 67


 36%|███▋      | 1819/5000 [05:52<21:27,  2.47it/s]

steps= 340


 36%|███▋      | 1820/5000 [05:53<28:25,  1.86it/s]

steps= 370
steps= 23


 36%|███▋      | 1822/5000 [05:53<22:02,  2.40it/s]

steps= 95


 36%|███▋      | 1823/5000 [05:54<18:58,  2.79it/s]

steps= 56


 36%|███▋      | 1824/5000 [05:54<16:42,  3.17it/s]

steps= 124


 37%|███▋      | 1827/5000 [05:55<15:19,  3.45it/s]

steps= 391
steps= 98
steps= 75
steps= 34


 37%|███▋      | 1830/5000 [05:55<12:59,  4.06it/s]

steps= 321
steps= 89


 37%|███▋      | 1832/5000 [05:56<10:32,  5.01it/s]

steps= 38
steps= 102


 37%|███▋      | 1833/5000 [05:56<09:25,  5.60it/s]

steps= 55


 37%|███▋      | 1836/5000 [05:56<12:32,  4.20it/s]

steps= 410
steps= 58
steps= 45


 37%|███▋      | 1837/5000 [05:57<10:50,  4.86it/s]

steps= 110
steps= 37


 37%|███▋      | 1839/5000 [05:57<11:55,  4.42it/s]

steps= 218


 37%|███▋      | 1840/5000 [05:57<12:29,  4.22it/s]

steps= 116


 37%|███▋      | 1841/5000 [05:58<12:21,  4.26it/s]

steps= 229
steps= 65


 37%|███▋      | 1844/5000 [05:58<10:36,  4.96it/s]

steps= 214
steps= 62


 37%|███▋      | 1845/5000 [05:58<10:41,  4.92it/s]

steps= 94


 37%|███▋      | 1847/5000 [05:59<14:05,  3.73it/s]

steps= 243
steps= 34


 37%|███▋      | 1848/5000 [05:59<13:55,  3.77it/s]

steps= 178


 37%|███▋      | 1849/5000 [06:00<13:24,  3.92it/s]

steps= 104


 37%|███▋      | 1851/5000 [06:00<12:31,  4.19it/s]

steps= 243
steps= 95


 37%|███▋      | 1854/5000 [06:01<11:58,  4.38it/s]

steps= 251
steps= 16
steps= 21


 37%|███▋      | 1855/5000 [06:01<10:29,  4.99it/s]

steps= 67


 37%|███▋      | 1856/5000 [06:01<16:06,  3.25it/s]

steps= 257


 37%|███▋      | 1857/5000 [06:02<16:15,  3.22it/s]

steps= 154


 37%|███▋      | 1858/5000 [06:02<20:17,  2.58it/s]

steps= 169


 37%|███▋      | 1859/5000 [06:03<29:15,  1.79it/s]

steps= 345


 37%|███▋      | 1860/5000 [06:03<26:25,  1.98it/s]

steps= 141


 37%|███▋      | 1861/5000 [06:04<26:24,  1.98it/s]

steps= 313


 37%|███▋      | 1862/5000 [06:04<24:07,  2.17it/s]

steps= 209


 37%|███▋      | 1863/5000 [06:05<23:53,  2.19it/s]

steps= 178


 37%|███▋      | 1864/5000 [06:05<25:13,  2.07it/s]

steps= 315


 37%|███▋      | 1865/5000 [06:06<21:13,  2.46it/s]

steps= 158


 37%|███▋      | 1866/5000 [06:06<23:21,  2.24it/s]

steps= 416


 37%|███▋      | 1867/5000 [06:06<20:45,  2.52it/s]

steps= 207


 37%|███▋      | 1868/5000 [06:07<22:46,  2.29it/s]

steps= 192


 37%|███▋      | 1869/5000 [06:07<19:08,  2.73it/s]

steps= 59


 37%|███▋      | 1870/5000 [06:07<17:41,  2.95it/s]

steps= 131
steps= 10


 37%|███▋      | 1874/5000 [06:08<10:44,  4.85it/s]

steps= 92
steps= 53
steps= 39


 38%|███▊      | 1875/5000 [06:08<16:34,  3.14it/s]

steps= 390


 38%|███▊      | 1876/5000 [06:09<16:51,  3.09it/s]

steps= 154


 38%|███▊      | 1877/5000 [06:09<20:34,  2.53it/s]

steps= 253


 38%|███▊      | 1878/5000 [06:10<22:29,  2.31it/s]

steps= 276
steps= 34
steps= 30


 38%|███▊      | 1881/5000 [06:10<18:00,  2.89it/s]

steps= 190


 38%|███▊      | 1882/5000 [06:10<16:42,  3.11it/s]

steps= 124
steps= 11


 38%|███▊      | 1885/5000 [06:11<13:27,  3.86it/s]

steps= 225
steps= 119


 38%|███▊      | 1888/5000 [06:12<10:57,  4.74it/s]

steps= 229
steps= 79
steps= 34


 38%|███▊      | 1889/5000 [06:12<09:23,  5.52it/s]

steps= 68


 38%|███▊      | 1893/5000 [06:12<08:42,  5.95it/s]

steps= 208
steps= 14
steps= 29
steps= 76
steps= 239


 38%|███▊      | 1896/5000 [06:13<10:41,  4.84it/s]

steps= 202
steps= 62


 38%|███▊      | 1898/5000 [06:13<09:00,  5.74it/s]

steps= 20
steps= 74
steps= 47


 38%|███▊      | 1900/5000 [06:14<11:08,  4.64it/s]

steps= 359


 38%|███▊      | 1901/5000 [06:14<13:28,  3.83it/s]

steps= 236
episode: 1900 episode reward: 756 eps: 0.8185998963952398 avg reward (last 100): -1024.1089108910892 episode loss:  36702.54
avg reward for last 100 episodes: -1024.1089108910892
steps= 28


 38%|███▊      | 1904/5000 [06:15<12:02,  4.29it/s]

steps= 201
steps= 113


 38%|███▊      | 1905/5000 [06:15<11:05,  4.65it/s]

steps= 177


 38%|███▊      | 1906/5000 [06:15<13:34,  3.80it/s]

steps= 262


 38%|███▊      | 1908/5000 [06:16<16:30,  3.12it/s]

steps= 388
steps= 132


 38%|███▊      | 1909/5000 [06:16<13:53,  3.71it/s]

steps= 69
steps= 65


 38%|███▊      | 1911/5000 [06:17<13:05,  3.93it/s]

steps= 310


 38%|███▊      | 1913/5000 [06:17<11:32,  4.46it/s]

steps= 108
steps= 95


 38%|███▊      | 1914/5000 [06:18<15:23,  3.34it/s]

steps= 153


 38%|███▊      | 1916/5000 [06:18<14:26,  3.56it/s]

steps= 338
steps= 130


 38%|███▊      | 1917/5000 [06:18<13:24,  3.83it/s]

steps= 198
steps= 26


 38%|███▊      | 1920/5000 [06:19<10:03,  5.10it/s]

steps= 155
steps= 139


 38%|███▊      | 1922/5000 [06:19<08:55,  5.74it/s]

steps= 125
steps= 36


 38%|███▊      | 1923/5000 [06:19<09:39,  5.31it/s]

steps= 189
steps= 9


 38%|███▊      | 1925/5000 [06:20<09:36,  5.33it/s]

steps= 256


 39%|███▊      | 1929/5000 [06:20<09:34,  5.34it/s]

steps= 394
steps= 27
steps= 32
steps= 54


 39%|███▊      | 1932/5000 [06:21<10:20,  4.94it/s]

steps= 428
steps= 107
steps= 20


 39%|███▊      | 1933/5000 [06:21<10:34,  4.83it/s]

steps= 83
steps= 99


 39%|███▊      | 1937/5000 [06:22<09:54,  5.16it/s]

steps= 290
steps= 33
steps= 25


 39%|███▉      | 1938/5000 [06:22<10:11,  5.00it/s]

steps= 207
steps= 49


 39%|███▉      | 1941/5000 [06:22<08:19,  6.13it/s]

steps= 121
steps= 34


 39%|███▉      | 1942/5000 [06:23<08:17,  6.14it/s]

steps= 44


 39%|███▉      | 1943/5000 [06:23<11:51,  4.29it/s]

steps= 223


 39%|███▉      | 1944/5000 [06:24<18:22,  2.77it/s]

steps= 423
steps= 30


 39%|███▉      | 1946/5000 [06:24<17:40,  2.88it/s]

steps= 319


 39%|███▉      | 1948/5000 [06:25<13:45,  3.70it/s]

steps= 190
steps= 41


 39%|███▉      | 1949/5000 [06:25<15:36,  3.26it/s]

steps= 170


 39%|███▉      | 1950/5000 [06:25<15:27,  3.29it/s]

steps= 112


 39%|███▉      | 1952/5000 [06:26<14:50,  3.42it/s]

steps= 277
steps= 165


 39%|███▉      | 1954/5000 [06:26<13:22,  3.80it/s]

steps= 158
steps= 90


 39%|███▉      | 1955/5000 [06:27<11:54,  4.26it/s]

steps= 88


 39%|███▉      | 1957/5000 [06:27<11:28,  4.42it/s]

steps= 282
steps= 71


 39%|███▉      | 1958/5000 [06:27<09:46,  5.19it/s]

steps= 46


 39%|███▉      | 1960/5000 [06:28<12:58,  3.90it/s]

steps= 275
steps= 59


 39%|███▉      | 1963/5000 [06:28<10:09,  4.98it/s]

steps= 118
steps= 13
steps= 145


 39%|███▉      | 1965/5000 [06:29<08:32,  5.93it/s]

steps= 35
steps= 69
steps= 48


 39%|███▉      | 1968/5000 [06:29<07:52,  6.41it/s]

steps= 153
steps= 64


 39%|███▉      | 1969/5000 [06:29<10:03,  5.02it/s]

steps= 341
steps= 25


 39%|███▉      | 1971/5000 [06:30<09:47,  5.16it/s]

steps= 323


 39%|███▉      | 1972/5000 [06:30<10:22,  4.87it/s]

steps= 78


 39%|███▉      | 1974/5000 [06:30<09:15,  5.45it/s]

steps= 79
steps= 44


 40%|███▉      | 1975/5000 [06:30<08:39,  5.83it/s]

steps= 48


 40%|███▉      | 1976/5000 [06:31<09:12,  5.48it/s]

steps= 92


 40%|███▉      | 1978/5000 [06:31<10:41,  4.71it/s]

steps= 263
steps= 102
steps= 19


 40%|███▉      | 1981/5000 [06:32<08:55,  5.64it/s]

steps= 77
steps= 130


 40%|███▉      | 1983/5000 [06:32<08:58,  5.61it/s]

steps= 77
steps= 44


 40%|███▉      | 1984/5000 [06:32<12:31,  4.02it/s]

steps= 88


 40%|███▉      | 1986/5000 [06:33<13:27,  3.73it/s]

steps= 335
steps= 78
steps= 38


 40%|███▉      | 1988/5000 [06:33<10:35,  4.74it/s]

steps= 79


 40%|███▉      | 1990/5000 [06:34<09:41,  5.18it/s]

steps= 110
steps= 111


 40%|███▉      | 1992/5000 [06:34<08:44,  5.73it/s]

steps= 63
steps= 100


 40%|███▉      | 1993/5000 [06:34<10:06,  4.96it/s]

steps= 133
steps= 26


 40%|███▉      | 1995/5000 [06:34<09:01,  5.55it/s]

steps= 86


 40%|███▉      | 1996/5000 [06:35<12:38,  3.96it/s]

steps= 356


 40%|███▉      | 1998/5000 [06:35<14:48,  3.38it/s]

steps= 461
steps= 64


 40%|████      | 2000/5000 [06:36<12:05,  4.14it/s]

steps= 26
steps= 68


 40%|████      | 2002/5000 [06:36<13:44,  3.63it/s]

steps= 263
episode: 2000 episode reward: 729 eps: 0.8104542860789328 avg reward (last 100): -947.3663366336634 episode loss:  37815.316
avg reward for last 100 episodes: -947.3663366336634
steps= 39


 40%|████      | 2003/5000 [06:37<17:19,  2.88it/s]

steps= 281


 40%|████      | 2005/5000 [06:37<14:08,  3.53it/s]

steps= 127
steps= 60


 40%|████      | 2007/5000 [06:38<15:44,  3.17it/s]

steps= 349
steps= 31


 40%|████      | 2008/5000 [06:38<15:09,  3.29it/s]

steps= 189
steps= 81


 40%|████      | 2010/5000 [06:39<12:27,  4.00it/s]

steps= 178
steps= 78


 40%|████      | 2012/5000 [06:39<10:23,  4.79it/s]

steps= 95


 40%|████      | 2013/5000 [06:39<10:58,  4.54it/s]

steps= 217
steps= 31
steps= 58


 40%|████      | 2016/5000 [06:40<11:36,  4.28it/s]

steps= 154


 40%|████      | 2017/5000 [06:40<12:21,  4.03it/s]

steps= 112


 40%|████      | 2018/5000 [06:41<18:15,  2.72it/s]

steps= 371
steps= 16


 40%|████      | 2021/5000 [06:41<12:00,  4.13it/s]

steps= 166
steps= 94


 40%|████      | 2023/5000 [06:42<13:27,  3.68it/s]

steps= 261
steps= 62


 40%|████      | 2024/5000 [06:43<23:51,  2.08it/s]

steps= 499


 41%|████      | 2026/5000 [06:43<16:52,  2.94it/s]

steps= 94
steps= 146


 41%|████      | 2028/5000 [06:43<11:28,  4.32it/s]

steps= 60
steps= 119


 41%|████      | 2030/5000 [06:44<11:13,  4.41it/s]

steps= 196
steps= 39
steps= 43


 41%|████      | 2033/5000 [06:44<09:20,  5.30it/s]

steps= 179
steps= 54


 41%|████      | 2034/5000 [06:45<15:06,  3.27it/s]

steps= 266
steps= 53


 41%|████      | 2036/5000 [06:45<12:43,  3.88it/s]

steps= 97
steps= 66


 41%|████      | 2041/5000 [06:46<08:16,  5.96it/s]

steps= 152
steps= 19
steps= 10
steps= 33


 41%|████      | 2043/5000 [06:46<08:54,  5.53it/s]

steps= 109
steps= 127


 41%|████      | 2045/5000 [06:46<08:22,  5.88it/s]

steps= 100
steps= 146


 41%|████      | 2047/5000 [06:47<12:26,  3.95it/s]

steps= 269
steps= 97
steps= 14


 41%|████      | 2049/5000 [06:48<12:12,  4.03it/s]

steps= 243


 41%|████      | 2050/5000 [06:48<12:59,  3.78it/s]

steps= 255


 41%|████      | 2052/5000 [06:49<14:17,  3.44it/s]

steps= 229
steps= 72


 41%|████      | 2053/5000 [06:49<13:50,  3.55it/s]

steps= 91


 41%|████      | 2054/5000 [06:49<13:41,  3.59it/s]

steps= 158


 41%|████      | 2057/5000 [06:49<10:21,  4.73it/s]

steps= 223
steps= 47
steps= 89


 41%|████      | 2058/5000 [06:50<11:47,  4.16it/s]

steps= 129


 41%|████      | 2059/5000 [06:50<12:31,  3.91it/s]

steps= 174


 41%|████      | 2060/5000 [06:50<12:47,  3.83it/s]

steps= 196


 41%|████      | 2061/5000 [06:51<12:03,  4.06it/s]

steps= 108
steps= 48


 41%|████▏     | 2064/5000 [06:51<08:50,  5.53it/s]

steps= 46
steps= 92


 41%|████▏     | 2066/5000 [06:51<10:03,  4.86it/s]

steps= 143
steps= 153


 41%|████▏     | 2067/5000 [06:52<10:04,  4.85it/s]

steps= 166


 41%|████▏     | 2068/5000 [06:52<11:17,  4.33it/s]

steps= 143


 41%|████▏     | 2069/5000 [06:52<11:08,  4.38it/s]

steps= 89


 41%|████▏     | 2070/5000 [06:53<16:11,  3.01it/s]

steps= 201


 41%|████▏     | 2072/5000 [06:53<12:33,  3.89it/s]

steps= 94
steps= 64


 41%|████▏     | 2073/5000 [06:53<12:14,  3.99it/s]

steps= 198


 42%|████▏     | 2076/5000 [06:54<10:14,  4.76it/s]

steps= 206
steps= 66
steps= 16


 42%|████▏     | 2077/5000 [06:54<15:57,  3.05it/s]

steps= 299


 42%|████▏     | 2078/5000 [06:55<17:10,  2.83it/s]

steps= 250
steps= 19


 42%|████▏     | 2080/5000 [06:55<13:30,  3.60it/s]

steps= 84
steps= 59


 42%|████▏     | 2083/5000 [06:56<11:04,  4.39it/s]

steps= 192
steps= 36


 42%|████▏     | 2084/5000 [06:56<11:53,  4.09it/s]

steps= 110


 42%|████▏     | 2086/5000 [06:56<10:25,  4.66it/s]

steps= 137
steps= 75


 42%|████▏     | 2089/5000 [06:57<08:00,  6.05it/s]

steps= 119
steps= 73
steps= 93


 42%|████▏     | 2091/5000 [06:57<06:54,  7.01it/s]

steps= 11
steps= 56


 42%|████▏     | 2092/5000 [06:57<08:07,  5.97it/s]

steps= 211


 42%|████▏     | 2094/5000 [06:58<12:41,  3.81it/s]

steps= 298
steps= 115


 42%|████▏     | 2095/5000 [06:58<10:24,  4.65it/s]

steps= 84


 42%|████▏     | 2096/5000 [06:58<12:47,  3.78it/s]

steps= 319


 42%|████▏     | 2097/5000 [06:59<20:08,  2.40it/s]

steps= 444


 42%|████▏     | 2100/5000 [07:00<14:01,  3.45it/s]

steps= 207
steps= 42
steps= 96


 42%|████▏     | 2101/5000 [07:00<12:25,  3.89it/s]

steps= 74
episode: 2100 episode reward: -1082 eps: 0.802389729972035 avg reward (last 100): -1062.8712871287128 episode loss:  22217.71
avg reward for last 100 episodes: -1062.8712871287128


 42%|████▏     | 2103/5000 [07:00<11:19,  4.26it/s]

steps= 121
steps= 157


 42%|████▏     | 2105/5000 [07:00<08:54,  5.42it/s]

steps= 48
steps= 25


 42%|████▏     | 2107/5000 [07:01<11:19,  4.26it/s]

steps= 164
steps= 89
steps= 13


 42%|████▏     | 2109/5000 [07:02<12:39,  3.81it/s]

steps= 268


 42%|████▏     | 2110/5000 [07:02<14:25,  3.34it/s]

steps= 115


 42%|████▏     | 2111/5000 [07:02<15:07,  3.18it/s]

steps= 166


 42%|████▏     | 2112/5000 [07:03<15:10,  3.17it/s]

steps= 148


 42%|████▏     | 2114/5000 [07:03<14:56,  3.22it/s]

steps= 331
steps= 69


 42%|████▏     | 2115/5000 [07:03<12:03,  3.99it/s]

steps= 99


 42%|████▏     | 2117/5000 [07:04<11:49,  4.06it/s]

steps= 228
steps= 62


 42%|████▏     | 2119/5000 [07:04<09:30,  5.05it/s]

steps= 66
steps= 81


 42%|████▏     | 2121/5000 [07:05<09:16,  5.17it/s]

steps= 209
steps= 81


 42%|████▏     | 2122/5000 [07:05<09:06,  5.26it/s]

steps= 122
steps= 53


 42%|████▎     | 2125/5000 [07:05<07:38,  6.27it/s]

steps= 145
steps= 25


 43%|████▎     | 2126/5000 [07:05<07:30,  6.38it/s]

steps= 107


 43%|████▎     | 2127/5000 [07:06<14:01,  3.41it/s]

steps= 425


 43%|████▎     | 2128/5000 [07:06<15:57,  3.00it/s]

steps= 332


 43%|████▎     | 2130/5000 [07:07<12:35,  3.80it/s]

steps= 286
steps= 30


 43%|████▎     | 2131/5000 [07:07<16:08,  2.96it/s]

steps= 202
steps= 21


 43%|████▎     | 2134/5000 [07:08<10:33,  4.52it/s]

steps= 67
steps= 25


 43%|████▎     | 2135/5000 [07:08<11:24,  4.19it/s]

steps= 110


 43%|████▎     | 2136/5000 [07:09<18:42,  2.55it/s]

steps= 377
steps= 28


 43%|████▎     | 2140/5000 [07:09<11:20,  4.21it/s]

steps= 83
steps= 77
steps= 22


 43%|████▎     | 2142/5000 [07:10<09:27,  5.04it/s]

steps= 131
steps= 107


 43%|████▎     | 2144/5000 [07:10<11:44,  4.06it/s]

steps= 373
steps= 60


 43%|████▎     | 2147/5000 [07:10<07:55,  6.01it/s]

steps= 50
steps= 56
steps= 33


 43%|████▎     | 2148/5000 [07:11<10:04,  4.72it/s]

steps= 236


 43%|████▎     | 2149/5000 [07:11<16:50,  2.82it/s]

steps= 439


 43%|████▎     | 2151/5000 [07:12<13:37,  3.49it/s]

steps= 119
steps= 61


 43%|████▎     | 2152/5000 [07:12<12:04,  3.93it/s]

steps= 93


 43%|████▎     | 2154/5000 [07:13<12:47,  3.71it/s]

steps= 108
steps= 144


 43%|████▎     | 2156/5000 [07:13<09:38,  4.92it/s]

steps= 83
steps= 87


 43%|████▎     | 2158/5000 [07:14<11:26,  4.14it/s]

steps= 163
steps= 90


 43%|████▎     | 2159/5000 [07:14<17:28,  2.71it/s]

steps= 340


 43%|████▎     | 2160/5000 [07:15<18:10,  2.60it/s]

steps= 258


 43%|████▎     | 2161/5000 [07:15<17:40,  2.68it/s]

steps= 125
steps= 17


 43%|████▎     | 2164/5000 [07:15<12:17,  3.84it/s]

steps= 106
steps= 62


 43%|████▎     | 2165/5000 [07:16<13:54,  3.40it/s]

steps= 205


 43%|████▎     | 2166/5000 [07:16<15:51,  2.98it/s]

steps= 225


 43%|████▎     | 2168/5000 [07:17<13:06,  3.60it/s]

steps= 181
steps= 62


 43%|████▎     | 2169/5000 [07:17<14:55,  3.16it/s]

steps= 152


 43%|████▎     | 2171/5000 [07:18<11:44,  4.01it/s]

steps= 220
steps= 36


 43%|████▎     | 2173/5000 [07:18<11:33,  4.07it/s]

steps= 247
steps= 52


 43%|████▎     | 2174/5000 [07:18<09:49,  4.80it/s]

steps= 32


 44%|████▎     | 2175/5000 [07:20<26:15,  1.79it/s]

steps= 481


 44%|████▎     | 2177/5000 [07:20<18:18,  2.57it/s]

steps= 93
steps= 58
steps= 47


 44%|████▎     | 2179/5000 [07:20<14:15,  3.30it/s]

steps= 26


 44%|████▎     | 2181/5000 [07:21<11:41,  4.02it/s]

steps= 73
steps= 166


 44%|████▎     | 2182/5000 [07:21<13:46,  3.41it/s]

steps= 257
steps= 28
steps= 21


 44%|████▎     | 2186/5000 [07:22<09:45,  4.81it/s]

steps= 204
steps= 114


 44%|████▎     | 2187/5000 [07:22<09:07,  5.14it/s]

steps= 157


 44%|████▍     | 2188/5000 [07:22<10:17,  4.55it/s]

steps= 124


 44%|████▍     | 2189/5000 [07:22<11:01,  4.25it/s]

steps= 167


 44%|████▍     | 2190/5000 [07:23<19:55,  2.35it/s]

steps= 361


 44%|████▍     | 2191/5000 [07:23<17:05,  2.74it/s]

steps= 191


 44%|████▍     | 2192/5000 [07:24<23:32,  1.99it/s]

steps= 472


 44%|████▍     | 2193/5000 [07:24<20:21,  2.30it/s]

steps= 254


 44%|████▍     | 2194/5000 [07:25<18:14,  2.56it/s]

steps= 224


 44%|████▍     | 2197/5000 [07:25<13:29,  3.46it/s]

steps= 206
steps= 21
steps= 81


 44%|████▍     | 2198/5000 [07:25<11:19,  4.12it/s]

steps= 100
steps= 25


 44%|████▍     | 2200/5000 [07:26<09:30,  4.91it/s]

steps= 114


 44%|████▍     | 2201/5000 [07:26<15:50,  2.95it/s]

steps= 282
episode: 2200 episode reward: -1290 eps: 0.7944054215315619 avg reward (last 100): -1089.6039603960396 episode loss:  22318.496
avg reward for last 100 episodes: -1089.6039603960396
steps= 98


 44%|████▍     | 2204/5000 [07:27<12:54,  3.61it/s]

steps= 178
steps= 86


 44%|████▍     | 2206/5000 [07:27<08:51,  5.26it/s]

steps= 72
steps= 76


 44%|████▍     | 2207/5000 [07:27<09:19,  4.99it/s]

steps= 60


 44%|████▍     | 2208/5000 [07:28<14:05,  3.30it/s]

steps= 299


 44%|████▍     | 2209/5000 [07:28<15:03,  3.09it/s]

steps= 232


 44%|████▍     | 2211/5000 [07:29<13:21,  3.48it/s]

steps= 261
steps= 35


 44%|████▍     | 2213/5000 [07:30<14:03,  3.30it/s]

steps= 277
steps= 76


 44%|████▍     | 2215/5000 [07:30<12:45,  3.64it/s]

steps= 217
steps= 60


 44%|████▍     | 2216/5000 [07:30<10:30,  4.42it/s]

steps= 67


 44%|████▍     | 2217/5000 [07:31<12:09,  3.82it/s]

steps= 101


 44%|████▍     | 2219/5000 [07:31<10:35,  4.38it/s]

steps= 134
steps= 38


 44%|████▍     | 2220/5000 [07:31<11:15,  4.12it/s]

steps= 104
steps= 9


 44%|████▍     | 2222/5000 [07:32<09:41,  4.78it/s]

steps= 130


 44%|████▍     | 2223/5000 [07:32<13:24,  3.45it/s]

steps= 280


 44%|████▍     | 2224/5000 [07:33<20:23,  2.27it/s]

steps= 460


 44%|████▍     | 2225/5000 [07:33<18:28,  2.50it/s]

steps= 152


 45%|████▍     | 2227/5000 [07:34<15:23,  3.00it/s]

steps= 261
steps= 70


 45%|████▍     | 2228/5000 [07:34<13:53,  3.32it/s]

steps= 140
steps= 41


 45%|████▍     | 2231/5000 [07:34<10:08,  4.55it/s]

steps= 212
steps= 65


 45%|████▍     | 2232/5000 [07:35<12:33,  3.67it/s]

steps= 363
steps= 49


 45%|████▍     | 2234/5000 [07:35<12:06,  3.81it/s]

steps= 237


 45%|████▍     | 2235/5000 [07:36<16:41,  2.76it/s]

steps= 330


 45%|████▍     | 2236/5000 [07:36<17:19,  2.66it/s]

steps= 184


 45%|████▍     | 2237/5000 [07:37<25:28,  1.81it/s]

steps= 528


 45%|████▍     | 2238/5000 [07:38<20:51,  2.21it/s]

steps= 54
steps= 14
steps= 5


 45%|████▍     | 2241/5000 [07:38<15:57,  2.88it/s]

steps= 188


 45%|████▍     | 2242/5000 [07:39<21:03,  2.18it/s]

steps= 400


 45%|████▍     | 2244/5000 [07:39<14:08,  3.25it/s]

steps= 229
steps= 59


 45%|████▍     | 2245/5000 [07:40<19:29,  2.36it/s]

steps= 385


 45%|████▍     | 2246/5000 [07:40<16:55,  2.71it/s]

steps= 108


 45%|████▍     | 2248/5000 [07:40<12:40,  3.62it/s]

steps= 108
steps= 67


 45%|████▍     | 2249/5000 [07:40<12:49,  3.58it/s]

steps= 142


 45%|████▌     | 2250/5000 [07:41<14:04,  3.26it/s]

steps= 243


 45%|████▌     | 2251/5000 [07:41<13:15,  3.46it/s]

steps= 161


 45%|████▌     | 2252/5000 [07:41<12:14,  3.74it/s]

steps= 134


 45%|████▌     | 2254/5000 [07:42<09:35,  4.77it/s]

steps= 172
steps= 93


 45%|████▌     | 2256/5000 [07:42<08:27,  5.41it/s]

steps= 199
steps= 51


 45%|████▌     | 2258/5000 [07:43<10:07,  4.51it/s]

steps= 246
steps= 77


 45%|████▌     | 2259/5000 [07:43<10:11,  4.48it/s]

steps= 88


 45%|████▌     | 2260/5000 [07:43<14:25,  3.17it/s]

steps= 241


 45%|████▌     | 2263/5000 [07:44<15:22,  2.97it/s]

steps= 410
steps= 16
steps= 56


 45%|████▌     | 2264/5000 [07:44<12:41,  3.59it/s]

steps= 91


 45%|████▌     | 2265/5000 [07:45<13:18,  3.43it/s]

steps= 200


 45%|████▌     | 2267/5000 [07:45<11:09,  4.08it/s]

steps= 83
steps= 163


 45%|████▌     | 2268/5000 [07:45<09:24,  4.84it/s]

steps= 47
steps= 174


 45%|████▌     | 2270/5000 [07:46<09:21,  4.87it/s]

steps= 112


 45%|████▌     | 2271/5000 [07:47<20:51,  2.18it/s]

steps= 796


 45%|████▌     | 2272/5000 [07:48<27:44,  1.64it/s]

steps= 462
steps= 57


 45%|████▌     | 2274/5000 [07:48<21:30,  2.11it/s]

steps= 86


 46%|████▌     | 2276/5000 [07:48<14:55,  3.04it/s]

steps= 214
steps= 106
steps= 31


 46%|████▌     | 2278/5000 [07:49<12:45,  3.56it/s]

steps= 190


 46%|████▌     | 2279/5000 [07:49<13:25,  3.38it/s]

steps= 213


 46%|████▌     | 2281/5000 [07:50<12:30,  3.62it/s]

steps= 175
steps= 169


 46%|████▌     | 2282/5000 [07:50<11:13,  4.04it/s]

steps= 148


 46%|████▌     | 2283/5000 [07:50<11:10,  4.05it/s]

steps= 151


 46%|████▌     | 2285/5000 [07:51<11:23,  3.97it/s]

steps= 167
steps= 86


 46%|████▌     | 2286/5000 [07:51<10:48,  4.18it/s]

steps= 151


 46%|████▌     | 2287/5000 [07:51<11:41,  3.87it/s]

steps= 183


 46%|████▌     | 2289/5000 [07:52<11:08,  4.06it/s]

steps= 137
steps= 57


 46%|████▌     | 2290/5000 [07:52<10:10,  4.44it/s]

steps= 81


 46%|████▌     | 2292/5000 [07:52<09:28,  4.77it/s]

steps= 136
steps= 101


 46%|████▌     | 2293/5000 [07:52<09:50,  4.59it/s]

steps= 135


 46%|████▌     | 2294/5000 [07:53<15:21,  2.94it/s]

steps= 207


 46%|████▌     | 2295/5000 [07:53<16:30,  2.73it/s]

steps= 187


 46%|████▌     | 2297/5000 [07:54<13:02,  3.45it/s]

steps= 101
steps= 56


 46%|████▌     | 2298/5000 [07:54<13:45,  3.27it/s]

steps= 129


 46%|████▌     | 2299/5000 [07:55<14:52,  3.03it/s]

steps= 252
steps= 37


 46%|████▌     | 2301/5000 [07:55<11:51,  3.79it/s]

steps= 86
episode: 2300 episode reward: -1094 eps: 0.786500562240163 avg reward (last 100): -1128.3762376237623 episode loss:  19394.205
avg reward for last 100 episodes: -1128.3762376237623
steps= 40
steps= 76


 46%|████▌     | 2304/5000 [07:56<15:35,  2.88it/s]

steps= 283


 46%|████▌     | 2305/5000 [07:56<15:33,  2.89it/s]

steps= 98


 46%|████▌     | 2306/5000 [07:57<29:03,  1.55it/s]

steps= 529


 46%|████▌     | 2307/5000 [07:58<24:06,  1.86it/s]

steps= 72


 46%|████▌     | 2309/5000 [07:59<24:33,  1.83it/s]

steps= 345
steps= 57


 46%|████▌     | 2310/5000 [07:59<18:49,  2.38it/s]

steps= 45
steps= 98


 46%|████▌     | 2312/5000 [07:59<12:35,  3.56it/s]

steps= 17


 46%|████▋     | 2313/5000 [08:00<15:56,  2.81it/s]

steps= 186


 46%|████▋     | 2314/5000 [08:00<16:43,  2.68it/s]

steps= 140


 46%|████▋     | 2315/5000 [08:01<17:35,  2.54it/s]

steps= 177


 46%|████▋     | 2316/5000 [08:01<20:57,  2.13it/s]

steps= 244
steps= 49


 46%|████▋     | 2318/5000 [08:02<18:46,  2.38it/s]

steps= 268


 46%|████▋     | 2319/5000 [08:03<25:12,  1.77it/s]

steps= 402


 46%|████▋     | 2320/5000 [08:03<21:54,  2.04it/s]

steps= 127


 46%|████▋     | 2321/5000 [08:03<18:20,  2.43it/s]

steps= 77


 46%|████▋     | 2322/5000 [08:04<19:57,  2.24it/s]

steps= 189


 46%|████▋     | 2323/5000 [08:04<18:41,  2.39it/s]

steps= 200


 46%|████▋     | 2325/5000 [08:05<16:43,  2.67it/s]

steps= 199
steps= 37


 47%|████▋     | 2326/5000 [08:05<13:23,  3.33it/s]

steps= 54
steps= 40


 47%|████▋     | 2328/5000 [08:05<10:48,  4.12it/s]

steps= 39
steps= 21


 47%|████▋     | 2330/5000 [08:06<09:35,  4.64it/s]

steps= 132


 47%|████▋     | 2332/5000 [08:06<09:52,  4.50it/s]

steps= 81
steps= 28


 47%|████▋     | 2333/5000 [08:06<09:22,  4.74it/s]

steps= 58
steps= 30


 47%|████▋     | 2335/5000 [08:07<10:19,  4.30it/s]

steps= 198


 47%|████▋     | 2336/5000 [08:07<10:22,  4.28it/s]

steps= 181


 47%|████▋     | 2337/5000 [08:08<10:49,  4.10it/s]

steps= 197


 47%|████▋     | 2339/5000 [08:08<09:21,  4.74it/s]

steps= 172
steps= 48


 47%|████▋     | 2340/5000 [08:08<10:06,  4.39it/s]

steps= 68


 47%|████▋     | 2341/5000 [08:09<16:15,  2.72it/s]

steps= 251


 47%|████▋     | 2342/5000 [08:09<15:18,  2.89it/s]

steps= 167


 47%|████▋     | 2343/5000 [08:10<16:06,  2.75it/s]

steps= 199


 47%|████▋     | 2345/5000 [08:10<13:20,  3.32it/s]

steps= 161
steps= 74


 47%|████▋     | 2346/5000 [08:10<12:40,  3.49it/s]

steps= 86


 47%|████▋     | 2348/5000 [08:11<12:40,  3.49it/s]

steps= 165
steps= 103


 47%|████▋     | 2350/5000 [08:11<09:43,  4.54it/s]

steps= 135
steps= 36


 47%|████▋     | 2353/5000 [08:12<06:47,  6.50it/s]

steps= 38
steps= 24
steps= 21


 47%|████▋     | 2354/5000 [08:12<07:02,  6.27it/s]

steps= 54


 47%|████▋     | 2355/5000 [08:12<12:01,  3.66it/s]

steps= 274


 47%|████▋     | 2356/5000 [08:13<11:51,  3.72it/s]

steps= 87


 47%|████▋     | 2357/5000 [08:13<18:13,  2.42it/s]

steps= 418


 47%|████▋     | 2358/5000 [08:14<18:46,  2.34it/s]

steps= 190


 47%|████▋     | 2359/5000 [08:14<18:24,  2.39it/s]

steps= 227
steps= 31


 47%|████▋     | 2361/5000 [08:14<14:18,  3.07it/s]

steps= 130


 47%|████▋     | 2362/5000 [08:15<13:19,  3.30it/s]

steps= 191


 47%|████▋     | 2363/5000 [08:15<12:55,  3.40it/s]

steps= 204


 47%|████▋     | 2364/5000 [08:15<12:54,  3.40it/s]

steps= 148


 47%|████▋     | 2365/5000 [08:16<14:01,  3.13it/s]

steps= 181


 47%|████▋     | 2367/5000 [08:16<12:14,  3.58it/s]

steps= 280
steps= 30


 47%|████▋     | 2369/5000 [08:16<10:29,  4.18it/s]

steps= 116
steps= 154


 47%|████▋     | 2370/5000 [08:17<13:41,  3.20it/s]

steps= 301
steps= 58


 47%|████▋     | 2372/5000 [08:17<12:44,  3.44it/s]

steps= 251


 47%|████▋     | 2373/5000 [08:18<13:16,  3.30it/s]

steps= 309
steps= 18


 48%|████▊     | 2375/5000 [08:18<10:53,  4.01it/s]

steps= 103


 48%|████▊     | 2376/5000 [08:18<13:19,  3.28it/s]

steps= 198


 48%|████▊     | 2377/5000 [08:19<18:32,  2.36it/s]

steps= 417


 48%|████▊     | 2379/5000 [08:20<13:09,  3.32it/s]

steps= 156
steps= 80
steps= 36


 48%|████▊     | 2381/5000 [08:20<13:02,  3.35it/s]

steps= 216
steps= 20
steps= 76


 48%|████▊     | 2385/5000 [08:21<08:19,  5.23it/s]

steps= 59
steps= 134


 48%|████▊     | 2387/5000 [08:21<08:43,  4.99it/s]

steps= 204
steps= 33


 48%|████▊     | 2388/5000 [08:21<10:53,  4.00it/s]

steps= 271


 48%|████▊     | 2390/5000 [08:22<09:59,  4.35it/s]

steps= 168
steps= 123


 48%|████▊     | 2391/5000 [08:22<10:25,  4.17it/s]

steps= 125


 48%|████▊     | 2392/5000 [08:23<15:15,  2.85it/s]

steps= 183


 48%|████▊     | 2393/5000 [08:23<18:10,  2.39it/s]

steps= 298


 48%|████▊     | 2394/5000 [08:24<22:54,  1.90it/s]

steps= 388


 48%|████▊     | 2395/5000 [08:25<24:39,  1.76it/s]

steps= 343
steps= 52


 48%|████▊     | 2398/5000 [08:25<15:44,  2.75it/s]

steps= 142
steps= 80


 48%|████▊     | 2400/5000 [08:26<12:48,  3.38it/s]

steps= 38
steps= 181


 48%|████▊     | 2402/5000 [08:26<11:12,  3.86it/s]

steps= 128
episode: 2400 episode reward: -1136 eps: 0.7786743615262652 avg reward (last 100): -1054.7821782178219 episode loss:  25004.113
avg reward for last 100 episodes: -1054.7821782178219
steps= 100


 48%|████▊     | 2404/5000 [08:27<12:46,  3.39it/s]

steps= 216
steps= 112


 48%|████▊     | 2406/5000 [08:27<10:56,  3.95it/s]

steps= 243
steps= 32


 48%|████▊     | 2407/5000 [08:27<09:36,  4.50it/s]

steps= 17


 48%|████▊     | 2408/5000 [08:28<11:53,  3.63it/s]

steps= 203


 48%|████▊     | 2410/5000 [08:28<09:39,  4.47it/s]

steps= 132
steps= 45


 48%|████▊     | 2411/5000 [08:29<12:41,  3.40it/s]

steps= 256


 48%|████▊     | 2413/5000 [08:29<11:50,  3.64it/s]

steps= 296
steps= 40


 48%|████▊     | 2414/5000 [08:29<11:24,  3.78it/s]

steps= 120


 48%|████▊     | 2415/5000 [08:30<16:40,  2.58it/s]

steps= 542


 48%|████▊     | 2416/5000 [08:31<18:25,  2.34it/s]

steps= 383


 48%|████▊     | 2417/5000 [08:31<20:34,  2.09it/s]

steps= 469


 48%|████▊     | 2418/5000 [08:32<23:19,  1.84it/s]

steps= 498


 48%|████▊     | 2420/5000 [08:32<17:40,  2.43it/s]

steps= 370
steps= 38


 48%|████▊     | 2422/5000 [08:33<13:01,  3.30it/s]

steps= 138
steps= 116


 48%|████▊     | 2424/5000 [08:33<10:29,  4.09it/s]

steps= 80
steps= 73


 48%|████▊     | 2425/5000 [08:33<09:38,  4.45it/s]

steps= 28


 49%|████▊     | 2426/5000 [08:34<13:41,  3.13it/s]

steps= 287


 49%|████▊     | 2427/5000 [08:34<14:57,  2.87it/s]

steps= 313


 49%|████▊     | 2428/5000 [08:35<16:26,  2.61it/s]

steps= 211


 49%|████▊     | 2429/5000 [08:35<15:38,  2.74it/s]

steps= 104


 49%|████▊     | 2430/5000 [08:36<18:28,  2.32it/s]

steps= 451


 49%|████▊     | 2431/5000 [08:36<17:04,  2.51it/s]

steps= 155


 49%|████▊     | 2432/5000 [08:37<21:30,  1.99it/s]

steps= 486
steps= 22


 49%|████▊     | 2434/5000 [08:37<17:27,  2.45it/s]

steps= 132


 49%|████▊     | 2436/5000 [08:38<14:20,  2.98it/s]

steps= 258
steps= 135


 49%|████▊     | 2437/5000 [08:38<16:08,  2.65it/s]

steps= 450


 49%|████▉     | 2438/5000 [08:39<15:39,  2.73it/s]

steps= 145


 49%|████▉     | 2439/5000 [08:39<20:15,  2.11it/s]

steps= 608


 49%|████▉     | 2441/5000 [08:40<13:44,  3.10it/s]

steps= 132
steps= 47


 49%|████▉     | 2442/5000 [08:40<11:46,  3.62it/s]

steps= 132


 49%|████▉     | 2444/5000 [08:41<12:57,  3.29it/s]

steps= 463
steps= 68


 49%|████▉     | 2446/5000 [08:41<09:50,  4.33it/s]

steps= 101
steps= 49


 49%|████▉     | 2448/5000 [08:42<11:29,  3.70it/s]

steps= 333
steps= 35


 49%|████▉     | 2449/5000 [08:42<12:08,  3.50it/s]

steps= 116


 49%|████▉     | 2450/5000 [08:43<17:15,  2.46it/s]

steps= 238


 49%|████▉     | 2451/5000 [08:43<14:57,  2.84it/s]

steps= 64


 49%|████▉     | 2452/5000 [08:43<13:19,  3.19it/s]

steps= 76


 49%|████▉     | 2453/5000 [08:43<13:39,  3.11it/s]

steps= 142


 49%|████▉     | 2454/5000 [08:44<12:21,  3.44it/s]

steps= 89
steps= 32


 49%|████▉     | 2456/5000 [08:45<14:42,  2.88it/s]

steps= 424


 49%|████▉     | 2458/5000 [08:45<11:51,  3.57it/s]

steps= 76
steps= 79


 49%|████▉     | 2459/5000 [08:45<10:38,  3.98it/s]

steps= 67


 49%|████▉     | 2460/5000 [08:46<11:37,  3.64it/s]

steps= 239
steps= 52


 49%|████▉     | 2462/5000 [08:47<13:42,  3.09it/s]

steps= 300
steps= 37


 49%|████▉     | 2464/5000 [08:47<11:21,  3.72it/s]

steps= 69
steps= 49


 49%|████▉     | 2466/5000 [08:47<09:17,  4.55it/s]

steps= 74


 49%|████▉     | 2468/5000 [08:48<15:12,  2.78it/s]

steps= 379
steps= 86


 49%|████▉     | 2469/5000 [08:49<21:35,  1.95it/s]

steps= 316


 49%|████▉     | 2470/5000 [08:49<18:46,  2.25it/s]

steps= 94


 49%|████▉     | 2471/5000 [08:50<17:01,  2.48it/s]

steps= 86


 49%|████▉     | 2472/5000 [08:50<17:38,  2.39it/s]

steps= 224


 49%|████▉     | 2473/5000 [08:52<30:27,  1.38it/s]

steps= 515


 49%|████▉     | 2474/5000 [08:52<25:29,  1.65it/s]

steps= 127


 50%|████▉     | 2475/5000 [08:52<21:22,  1.97it/s]

steps= 122


 50%|████▉     | 2476/5000 [08:53<30:43,  1.37it/s]

steps= 763


 50%|████▉     | 2477/5000 [08:54<24:45,  1.70it/s]

steps= 132


 50%|████▉     | 2479/5000 [08:54<16:37,  2.53it/s]

steps= 197
steps= 116
steps= 41


 50%|████▉     | 2481/5000 [08:55<16:17,  2.58it/s]

steps= 234


 50%|████▉     | 2482/5000 [08:55<15:03,  2.79it/s]

steps= 129


 50%|████▉     | 2483/5000 [08:56<17:25,  2.41it/s]

steps= 237


 50%|████▉     | 2484/5000 [08:57<22:41,  1.85it/s]

steps= 413


 50%|████▉     | 2485/5000 [08:57<18:57,  2.21it/s]

steps= 51


 50%|████▉     | 2487/5000 [08:57<14:29,  2.89it/s]

steps= 176
steps= 72


 50%|████▉     | 2488/5000 [08:58<17:24,  2.41it/s]

steps= 385
steps= 134


 50%|████▉     | 2490/5000 [08:58<13:31,  3.09it/s]

steps= 142


 50%|████▉     | 2491/5000 [08:59<15:51,  2.64it/s]

steps= 245


 50%|████▉     | 2493/5000 [09:00<14:09,  2.95it/s]

steps= 261
steps= 57


 50%|████▉     | 2495/5000 [09:01<17:53,  2.33it/s]

steps= 543
steps= 36


 50%|████▉     | 2496/5000 [09:02<22:06,  1.89it/s]

steps= 264


 50%|████▉     | 2497/5000 [09:02<19:32,  2.14it/s]

steps= 125


 50%|████▉     | 2498/5000 [09:02<19:42,  2.12it/s]

steps= 311


 50%|████▉     | 2499/5000 [09:03<23:03,  1.81it/s]

steps= 406


 50%|█████     | 2502/5000 [09:04<14:51,  2.80it/s]

steps= 156
steps= 53
episode: 2500 episode reward: -1061 eps: 0.7709260366850046 avg reward (last 100): -1182.5940594059407 episode loss:  22114.639
avg reward for last 100 episodes: -1182.5940594059407
steps= 65


 50%|█████     | 2503/5000 [09:04<12:48,  3.25it/s]

steps= 140


 50%|█████     | 2505/5000 [09:04<09:59,  4.16it/s]

steps= 132
steps= 57


 50%|█████     | 2508/5000 [09:04<06:50,  6.07it/s]

steps= 47
steps= 51
steps= 55


 50%|█████     | 2510/5000 [09:05<09:03,  4.58it/s]

steps= 344
steps= 119


 50%|█████     | 2512/5000 [09:05<07:28,  5.55it/s]

steps= 129
steps= 75


 50%|█████     | 2514/5000 [09:06<06:12,  6.67it/s]

steps= 83
steps= 51


 50%|█████     | 2515/5000 [09:06<06:47,  6.10it/s]

steps= 140


 50%|█████     | 2517/5000 [09:06<07:07,  5.81it/s]

steps= 140
steps= 59


 50%|█████     | 2519/5000 [09:06<06:38,  6.22it/s]

steps= 69
steps= 137
steps= 16
steps= 118


 50%|█████     | 2522/5000 [09:07<07:41,  5.36it/s]

steps= 139


 50%|█████     | 2523/5000 [09:07<10:24,  3.96it/s]

steps= 173


 50%|█████     | 2524/5000 [09:08<12:17,  3.36it/s]

steps= 211


 50%|█████     | 2525/5000 [09:08<16:24,  2.52it/s]

steps= 311
steps= 28


 51%|█████     | 2527/5000 [09:09<14:32,  2.83it/s]

steps= 187


 51%|█████     | 2528/5000 [09:09<16:05,  2.56it/s]

steps= 131


 51%|█████     | 2529/5000 [09:10<17:32,  2.35it/s]

steps= 266


 51%|█████     | 2531/5000 [09:11<17:50,  2.31it/s]

steps= 330
steps= 71


 51%|█████     | 2533/5000 [09:11<11:58,  3.43it/s]

steps= 171
steps= 106


 51%|█████     | 2534/5000 [09:11<11:33,  3.56it/s]

steps= 198


 51%|█████     | 2536/5000 [09:12<11:11,  3.67it/s]

steps= 357
steps= 107


 51%|█████     | 2537/5000 [09:13<14:46,  2.78it/s]

steps= 257


 51%|█████     | 2538/5000 [09:13<13:49,  2.97it/s]

steps= 158


 51%|█████     | 2539/5000 [09:13<12:40,  3.24it/s]

steps= 131


 51%|█████     | 2541/5000 [09:14<13:30,  3.03it/s]

steps= 468
steps= 122


 51%|█████     | 2543/5000 [09:14<09:54,  4.13it/s]

steps= 52
steps= 104


 51%|█████     | 2544/5000 [09:14<08:27,  4.84it/s]

steps= 40


 51%|█████     | 2546/5000 [09:16<15:28,  2.64it/s]

steps= 620
steps= 53


 51%|█████     | 2547/5000 [09:16<12:50,  3.18it/s]

steps= 100
steps= 24


 51%|█████     | 2550/5000 [09:16<09:29,  4.30it/s]

steps= 210
steps= 55


 51%|█████     | 2551/5000 [09:17<11:35,  3.52it/s]

steps= 236


 51%|█████     | 2553/5000 [09:17<08:54,  4.58it/s]

steps= 179
steps= 65


 51%|█████     | 2555/5000 [09:17<08:01,  5.08it/s]

steps= 195
steps= 77


 51%|█████     | 2556/5000 [09:17<07:01,  5.80it/s]

steps= 52


 51%|█████     | 2558/5000 [09:18<08:08,  5.00it/s]

steps= 150
steps= 47


 51%|█████     | 2559/5000 [09:18<10:47,  3.77it/s]

steps= 153
steps= 61


 51%|█████     | 2561/5000 [09:19<09:54,  4.10it/s]

steps= 292


 51%|█████▏    | 2563/5000 [09:20<12:01,  3.38it/s]

steps= 309
steps= 39


 51%|█████▏    | 2564/5000 [09:20<11:14,  3.61it/s]

steps= 66


 51%|█████▏    | 2565/5000 [09:20<16:57,  2.39it/s]

steps= 227


 51%|█████▏    | 2566/5000 [09:22<24:13,  1.67it/s]

steps= 468
steps= 8


 51%|█████▏    | 2570/5000 [09:22<14:37,  2.77it/s]

steps= 258
steps= 15
steps= 92


 51%|█████▏    | 2572/5000 [09:22<10:44,  3.77it/s]

steps= 94
steps= 108


 51%|█████▏    | 2573/5000 [09:23<09:55,  4.08it/s]

steps= 128


 51%|█████▏    | 2574/5000 [09:23<10:06,  4.00it/s]

steps= 149


 52%|█████▏    | 2576/5000 [09:24<12:03,  3.35it/s]

steps= 222
steps= 68


 52%|█████▏    | 2577/5000 [09:24<09:51,  4.10it/s]

steps= 34


 52%|█████▏    | 2578/5000 [09:24<09:20,  4.32it/s]

steps= 69


 52%|█████▏    | 2579/5000 [09:24<09:48,  4.11it/s]

steps= 149


 52%|█████▏    | 2580/5000 [09:25<12:14,  3.29it/s]

steps= 203


 52%|█████▏    | 2581/5000 [09:25<14:33,  2.77it/s]

steps= 263


 52%|█████▏    | 2583/5000 [09:26<15:29,  2.60it/s]

steps= 378
steps= 54
steps= 70


 52%|█████▏    | 2585/5000 [09:26<12:48,  3.14it/s]

steps= 201


 52%|█████▏    | 2586/5000 [09:27<14:56,  2.69it/s]

steps= 342


 52%|█████▏    | 2587/5000 [09:27<13:38,  2.95it/s]

steps= 227


 52%|█████▏    | 2588/5000 [09:28<22:09,  1.81it/s]

steps= 480


 52%|█████▏    | 2590/5000 [09:29<14:47,  2.72it/s]

steps= 66
steps= 106


 52%|█████▏    | 2592/5000 [09:29<12:09,  3.30it/s]

steps= 236
steps= 103


 52%|█████▏    | 2593/5000 [09:30<12:26,  3.22it/s]

steps= 176


 52%|█████▏    | 2595/5000 [09:30<10:26,  3.84it/s]

steps= 103
steps= 135


 52%|█████▏    | 2596/5000 [09:30<11:06,  3.61it/s]

steps= 195


 52%|█████▏    | 2597/5000 [09:31<12:56,  3.09it/s]

steps= 248


 52%|█████▏    | 2599/5000 [09:31<12:56,  3.09it/s]

steps= 272
steps= 137


 52%|█████▏    | 2600/5000 [09:32<14:49,  2.70it/s]

steps= 312


 52%|█████▏    | 2601/5000 [09:32<13:57,  2.87it/s]

steps= 224
episode: 2600 episode reward: -1232 eps: 0.7632548127999497 avg reward (last 100): -1065.3465346534654 episode loss:  23749.295
avg reward for last 100 episodes: -1065.3465346534654


 52%|█████▏    | 2603/5000 [09:33<13:15,  3.01it/s]

steps= 458
steps= 99


 52%|█████▏    | 2604/5000 [09:33<10:33,  3.78it/s]

steps= 60


 52%|█████▏    | 2606/5000 [09:34<10:07,  3.94it/s]

steps= 183
steps= 129


 52%|█████▏    | 2607/5000 [09:34<13:42,  2.91it/s]

steps= 373


 52%|█████▏    | 2609/5000 [09:35<15:13,  2.62it/s]

steps= 406
steps= 31


 52%|█████▏    | 2610/5000 [09:35<15:49,  2.52it/s]

steps= 173


 52%|█████▏    | 2612/5000 [09:37<17:06,  2.33it/s]

steps= 506
steps= 71


 52%|█████▏    | 2615/5000 [09:37<11:44,  3.38it/s]

steps= 129
steps= 12
steps= 148


 52%|█████▏    | 2618/5000 [09:37<07:27,  5.32it/s]

steps= 40
steps= 16
steps= 53
steps= 6


 52%|█████▏    | 2620/5000 [09:37<06:08,  6.46it/s]

steps= 51
steps= 78


 52%|█████▏    | 2622/5000 [09:38<09:49,  4.03it/s]

steps= 395


 52%|█████▏    | 2623/5000 [09:39<14:48,  2.68it/s]

steps= 294


 52%|█████▏    | 2624/5000 [09:39<13:13,  2.99it/s]

steps= 100
steps= 21


 53%|█████▎    | 2627/5000 [09:40<09:28,  4.17it/s]

steps= 249
steps= 28
steps= 14


 53%|█████▎    | 2629/5000 [09:41<12:06,  3.27it/s]

steps= 619


 53%|█████▎    | 2630/5000 [09:41<14:24,  2.74it/s]

steps= 185


 53%|█████▎    | 2631/5000 [09:42<18:18,  2.16it/s]

steps= 361
steps= 66


 53%|█████▎    | 2633/5000 [09:43<17:18,  2.28it/s]

steps= 272


 53%|█████▎    | 2636/5000 [09:43<11:23,  3.46it/s]

steps= 167
steps= 34
steps= 23


 53%|█████▎    | 2637/5000 [09:44<14:12,  2.77it/s]

steps= 279


 53%|█████▎    | 2639/5000 [09:45<23:07,  1.70it/s]

steps= 1058
steps= 83


 53%|█████▎    | 2641/5000 [09:46<19:25,  2.02it/s]

steps= 444
steps= 77


 53%|█████▎    | 2643/5000 [09:47<12:13,  3.22it/s]

steps= 78
steps= 67


 53%|█████▎    | 2644/5000 [09:47<14:19,  2.74it/s]

steps= 186


 53%|█████▎    | 2645/5000 [09:47<12:55,  3.04it/s]

steps= 129


 53%|█████▎    | 2647/5000 [09:48<11:36,  3.38it/s]

steps= 275
steps= 79


 53%|█████▎    | 2648/5000 [09:48<09:46,  4.01it/s]

steps= 109


 53%|█████▎    | 2649/5000 [09:48<09:56,  3.94it/s]

steps= 122
steps= 9


 53%|█████▎    | 2651/5000 [09:49<09:14,  4.23it/s]

steps= 106


 53%|█████▎    | 2652/5000 [09:50<16:28,  2.38it/s]

steps= 417
steps= 153


 53%|█████▎    | 2654/5000 [09:50<11:13,  3.48it/s]

steps= 107


 53%|█████▎    | 2655/5000 [09:50<10:41,  3.66it/s]

steps= 143


 53%|█████▎    | 2656/5000 [09:51<14:47,  2.64it/s]

steps= 343


 53%|█████▎    | 2657/5000 [09:51<14:04,  2.78it/s]

steps= 189


 53%|█████▎    | 2659/5000 [09:52<11:59,  3.25it/s]

steps= 373
steps= 67


 53%|█████▎    | 2661/5000 [09:52<10:39,  3.66it/s]

steps= 210
steps= 78


 53%|█████▎    | 2664/5000 [09:52<07:09,  5.44it/s]

steps= 101
steps= 12
steps= 70


 53%|█████▎    | 2665/5000 [09:53<11:18,  3.44it/s]

steps= 338
steps= 45


 53%|█████▎    | 2667/5000 [09:54<16:12,  2.40it/s]

steps= 783
steps= 52


 53%|█████▎    | 2669/5000 [09:55<13:39,  2.84it/s]

steps= 252


 53%|█████▎    | 2670/5000 [09:55<13:28,  2.88it/s]

steps= 120


 53%|█████▎    | 2671/5000 [09:55<11:46,  3.30it/s]

steps= 79


 53%|█████▎    | 2672/5000 [09:56<11:30,  3.37it/s]

steps= 173


 53%|█████▎    | 2674/5000 [09:56<09:14,  4.20it/s]

steps= 87
steps= 64


 54%|█████▎    | 2675/5000 [09:56<08:22,  4.63it/s]

steps= 116


 54%|█████▎    | 2676/5000 [09:57<11:41,  3.31it/s]

steps= 281


 54%|█████▎    | 2677/5000 [09:57<12:46,  3.03it/s]

steps= 148


 54%|█████▎    | 2678/5000 [09:58<15:08,  2.55it/s]

steps= 352
steps= 22


 54%|█████▎    | 2681/5000 [09:58<11:59,  3.22it/s]

steps= 340
steps= 131


 54%|█████▎    | 2682/5000 [09:59<11:13,  3.44it/s]

steps= 143


 54%|█████▎    | 2684/5000 [09:59<08:46,  4.40it/s]

steps= 113
steps= 44


 54%|█████▎    | 2685/5000 [10:00<13:07,  2.94it/s]

steps= 305


 54%|█████▎    | 2686/5000 [10:00<12:09,  3.17it/s]

steps= 130


 54%|█████▎    | 2687/5000 [10:00<13:28,  2.86it/s]

steps= 201


 54%|█████▍    | 2689/5000 [10:01<12:17,  3.13it/s]

steps= 254
steps= 71


 54%|█████▍    | 2690/5000 [10:02<15:22,  2.50it/s]

steps= 305


 54%|█████▍    | 2691/5000 [10:02<16:29,  2.33it/s]

steps= 255


 54%|█████▍    | 2692/5000 [10:02<14:04,  2.73it/s]

steps= 99


 54%|█████▍    | 2693/5000 [10:03<13:01,  2.95it/s]

steps= 164


 54%|█████▍    | 2694/5000 [10:03<14:21,  2.68it/s]

steps= 355


 54%|█████▍    | 2695/5000 [10:04<22:05,  1.74it/s]

steps= 537


 54%|█████▍    | 2697/5000 [10:04<14:37,  2.63it/s]

steps= 84
steps= 64


 54%|█████▍    | 2698/5000 [10:05<12:23,  3.09it/s]

steps= 62


 54%|█████▍    | 2699/5000 [10:05<16:38,  2.30it/s]

steps= 308


 54%|█████▍    | 2700/5000 [10:06<19:43,  1.94it/s]

steps= 374


 54%|█████▍    | 2702/5000 [10:07<15:42,  2.44it/s]

steps= 314
episode: 2700 episode reward: -1322 eps: 0.755659922665597 avg reward (last 100): -1136.4752475247524 episode loss:  23531.559
avg reward for last 100 episodes: -1136.4752475247524
steps= 59


 54%|█████▍    | 2703/5000 [10:07<15:53,  2.41it/s]

steps= 204


 54%|█████▍    | 2705/5000 [10:08<13:11,  2.90it/s]

steps= 144
steps= 70


 54%|█████▍    | 2706/5000 [10:08<12:14,  3.12it/s]

steps= 150


 54%|█████▍    | 2708/5000 [10:09<11:41,  3.27it/s]

steps= 265
steps= 143


 54%|█████▍    | 2710/5000 [10:09<10:32,  3.62it/s]

steps= 108
steps= 25


 54%|█████▍    | 2712/5000 [10:10<09:16,  4.11it/s]

steps= 104
steps= 65


 54%|█████▍    | 2714/5000 [10:10<08:36,  4.43it/s]

steps= 79
steps= 28


 54%|█████▍    | 2715/5000 [10:11<14:14,  2.67it/s]

steps= 461


 54%|█████▍    | 2716/5000 [10:11<16:27,  2.31it/s]

steps= 309


 54%|█████▍    | 2717/5000 [10:12<16:25,  2.32it/s]

steps= 191
steps= 68


 54%|█████▍    | 2719/5000 [10:12<12:59,  2.93it/s]

steps= 104


 54%|█████▍    | 2721/5000 [10:12<10:03,  3.78it/s]

steps= 129
steps= 40


 54%|█████▍    | 2723/5000 [10:13<09:43,  3.90it/s]

steps= 187
steps= 163


 54%|█████▍    | 2724/5000 [10:13<09:21,  4.05it/s]

steps= 175
steps= 37


 55%|█████▍    | 2727/5000 [10:14<09:30,  3.98it/s]

steps= 429
steps= 70


 55%|█████▍    | 2728/5000 [10:15<09:58,  3.80it/s]

steps= 79


 55%|█████▍    | 2729/5000 [10:15<10:09,  3.73it/s]

steps= 125


 55%|█████▍    | 2730/5000 [10:15<09:25,  4.01it/s]

steps= 160


 55%|█████▍    | 2731/5000 [10:15<11:43,  3.23it/s]

steps= 208


 55%|█████▍    | 2732/5000 [10:16<15:44,  2.40it/s]

steps= 475


 55%|█████▍    | 2734/5000 [10:17<17:48,  2.12it/s]

steps= 896
steps= 46


 55%|█████▍    | 2735/5000 [10:18<15:09,  2.49it/s]

steps= 93
steps= 27


 55%|█████▍    | 2738/5000 [10:18<10:39,  3.54it/s]

steps= 103
steps= 68


 55%|█████▍    | 2739/5000 [10:19<22:13,  1.70it/s]

steps= 628


 55%|█████▍    | 2740/5000 [10:20<20:05,  1.87it/s]

steps= 176
steps= 53


 55%|█████▍    | 2742/5000 [10:20<17:37,  2.13it/s]

steps= 390


 55%|█████▍    | 2744/5000 [10:21<12:46,  2.94it/s]

steps= 159
steps= 53


 55%|█████▍    | 2745/5000 [10:21<11:05,  3.39it/s]

steps= 77


 55%|█████▍    | 2746/5000 [10:22<17:28,  2.15it/s]

steps= 467


 55%|█████▍    | 2748/5000 [10:22<13:23,  2.80it/s]

steps= 192
steps= 43


 55%|█████▍    | 2749/5000 [10:24<24:24,  1.54it/s]

steps= 762


 55%|█████▌    | 2750/5000 [10:25<26:07,  1.44it/s]

steps= 270


 55%|█████▌    | 2751/5000 [10:25<20:52,  1.80it/s]

steps= 76
steps= 18


 55%|█████▌    | 2754/5000 [10:25<13:27,  2.78it/s]

steps= 121
steps= 56


 55%|█████▌    | 2755/5000 [10:26<11:04,  3.38it/s]

steps= 112


 55%|█████▌    | 2756/5000 [10:26<12:35,  2.97it/s]

steps= 180


 55%|█████▌    | 2757/5000 [10:26<13:04,  2.86it/s]

steps= 199


 55%|█████▌    | 2759/5000 [10:27<10:27,  3.57it/s]

steps= 154
steps= 101


 55%|█████▌    | 2760/5000 [10:27<08:30,  4.39it/s]

steps= 38
steps= 60


 55%|█████▌    | 2763/5000 [10:28<10:11,  3.66it/s]

steps= 790
steps= 99


 55%|█████▌    | 2764/5000 [10:29<12:47,  2.91it/s]

steps= 327


 55%|█████▌    | 2765/5000 [10:29<12:37,  2.95it/s]

steps= 146


 55%|█████▌    | 2766/5000 [10:29<11:23,  3.27it/s]

steps= 42


 55%|█████▌    | 2768/5000 [10:31<19:11,  1.94it/s]

steps= 616
steps= 90


 55%|█████▌    | 2770/5000 [10:31<13:44,  2.71it/s]

steps= 168
steps= 91


 55%|█████▌    | 2772/5000 [10:32<11:42,  3.17it/s]

steps= 181
steps= 84


 56%|█████▌    | 2775/5000 [10:33<14:34,  2.55it/s]

steps= 551
steps= 36
steps= 79


 56%|█████▌    | 2778/5000 [10:33<09:23,  3.94it/s]

steps= 124
steps= 13
steps= 75


 56%|█████▌    | 2779/5000 [10:34<08:27,  4.38it/s]

steps= 112


 56%|█████▌    | 2781/5000 [10:34<07:50,  4.71it/s]

steps= 141
steps= 122


 56%|█████▌    | 2783/5000 [10:34<06:50,  5.40it/s]

steps= 33
steps= 137


 56%|█████▌    | 2785/5000 [10:35<06:01,  6.13it/s]

steps= 41
steps= 126


 56%|█████▌    | 2786/5000 [10:35<09:41,  3.80it/s]

steps= 217


 56%|█████▌    | 2787/5000 [10:36<13:36,  2.71it/s]

steps= 203


 56%|█████▌    | 2789/5000 [10:36<11:42,  3.15it/s]

steps= 214
steps= 74


 56%|█████▌    | 2791/5000 [10:37<09:54,  3.72it/s]

steps= 192
steps= 68


 56%|█████▌    | 2793/5000 [10:37<07:56,  4.64it/s]

steps= 112
steps= 35


 56%|█████▌    | 2794/5000 [10:37<08:06,  4.53it/s]

steps= 125
steps= 36


 56%|█████▌    | 2796/5000 [10:38<06:52,  5.35it/s]

steps= 111


 56%|█████▌    | 2798/5000 [10:38<09:33,  3.84it/s]

steps= 301
steps= 146
steps= 53


 56%|█████▌    | 2800/5000 [10:39<08:56,  4.10it/s]

steps= 136


 56%|█████▌    | 2801/5000 [10:39<10:48,  3.39it/s]

steps= 258
episode: 2800 episode reward: -1266 eps: 0.7481406067106477 avg reward (last 100): -1060.2970297029703 episode loss:  26172.334
avg reward for last 100 episodes: -1060.2970297029703


 56%|█████▌    | 2802/5000 [10:40<16:40,  2.20it/s]

steps= 225


 56%|█████▌    | 2804/5000 [10:40<12:31,  2.92it/s]

steps= 276
steps= 88


 56%|█████▌    | 2805/5000 [10:41<10:49,  3.38it/s]

steps= 92


 56%|█████▌    | 2807/5000 [10:41<11:06,  3.29it/s]

steps= 317
steps= 63


 56%|█████▌    | 2808/5000 [10:41<10:31,  3.47it/s]

steps= 143
steps= 30


 56%|█████▌    | 2810/5000 [10:42<08:56,  4.08it/s]

steps= 112


 56%|█████▌    | 2811/5000 [10:42<08:40,  4.21it/s]

steps= 119


 56%|█████▌    | 2812/5000 [10:42<09:02,  4.03it/s]

steps= 122


 56%|█████▋    | 2814/5000 [10:43<08:29,  4.29it/s]

steps= 204
steps= 186


 56%|█████▋    | 2815/5000 [10:43<10:35,  3.44it/s]

steps= 222


 56%|█████▋    | 2816/5000 [10:43<10:22,  3.51it/s]

steps= 203


 56%|█████▋    | 2818/5000 [10:44<12:38,  2.88it/s]

steps= 392
steps= 55


 56%|█████▋    | 2819/5000 [10:45<12:23,  2.93it/s]

steps= 199


 56%|█████▋    | 2820/5000 [10:45<13:15,  2.74it/s]

steps= 158


 56%|█████▋    | 2821/5000 [10:46<18:29,  1.96it/s]

steps= 412


 56%|█████▋    | 2822/5000 [10:46<17:47,  2.04it/s]

steps= 219


 56%|█████▋    | 2824/5000 [10:47<14:22,  2.52it/s]

steps= 434
steps= 62


 56%|█████▋    | 2825/5000 [10:47<14:07,  2.57it/s]

steps= 199


 57%|█████▋    | 2826/5000 [10:48<13:34,  2.67it/s]

steps= 121


 57%|█████▋    | 2828/5000 [10:49<12:56,  2.80it/s]

steps= 394
steps= 56


 57%|█████▋    | 2829/5000 [10:49<11:33,  3.13it/s]

steps= 164


 57%|█████▋    | 2830/5000 [10:50<18:20,  1.97it/s]

steps= 613


 57%|█████▋    | 2831/5000 [10:50<15:50,  2.28it/s]

steps= 224
steps= 12


 57%|█████▋    | 2834/5000 [10:50<10:25,  3.46it/s]

steps= 120
steps= 102


 57%|█████▋    | 2835/5000 [10:51<10:12,  3.53it/s]

steps= 189


 57%|█████▋    | 2836/5000 [10:51<15:11,  2.37it/s]

steps= 594
steps= 39


 57%|█████▋    | 2839/5000 [10:52<10:06,  3.56it/s]

steps= 139
steps= 103


 57%|█████▋    | 2841/5000 [10:52<08:01,  4.49it/s]

steps= 48
steps= 44


 57%|█████▋    | 2842/5000 [10:52<06:47,  5.29it/s]

steps= 41


 57%|█████▋    | 2843/5000 [10:52<07:02,  5.10it/s]

steps= 101


 57%|█████▋    | 2844/5000 [10:53<09:14,  3.89it/s]

steps= 276


 57%|█████▋    | 2845/5000 [10:53<09:13,  3.89it/s]

steps= 204


 57%|█████▋    | 2847/5000 [10:54<09:36,  3.73it/s]

steps= 288
steps= 100
steps= 21


 57%|█████▋    | 2849/5000 [10:54<07:46,  4.61it/s]

steps= 95


 57%|█████▋    | 2850/5000 [10:54<07:50,  4.57it/s]

steps= 170


 57%|█████▋    | 2851/5000 [10:54<09:47,  3.66it/s]

steps= 208


 57%|█████▋    | 2853/5000 [10:55<10:17,  3.48it/s]

steps= 108
steps= 30


 57%|█████▋    | 2854/5000 [10:55<08:57,  3.99it/s]

steps= 122


 57%|█████▋    | 2857/5000 [10:56<09:00,  3.97it/s]

steps= 400
steps= 45
steps= 63


 57%|█████▋    | 2860/5000 [10:56<07:28,  4.77it/s]

steps= 135
steps= 40
steps= 54


 57%|█████▋    | 2861/5000 [10:57<07:47,  4.57it/s]

steps= 170


 57%|█████▋    | 2863/5000 [10:57<07:17,  4.89it/s]

steps= 235
steps= 57


 57%|█████▋    | 2865/5000 [10:58<09:00,  3.95it/s]

steps= 460
steps= 99


 57%|█████▋    | 2866/5000 [10:58<08:34,  4.15it/s]

steps= 162
steps= 11
steps= 35


 57%|█████▋    | 2870/5000 [10:58<06:09,  5.77it/s]

steps= 206
steps= 58


 57%|█████▋    | 2871/5000 [10:59<05:42,  6.22it/s]

steps= 43


 57%|█████▋    | 2872/5000 [10:59<06:51,  5.17it/s]

steps= 117


 57%|█████▋    | 2873/5000 [10:59<10:57,  3.23it/s]

steps= 333


 57%|█████▊    | 2875/5000 [11:00<10:52,  3.26it/s]

steps= 366
steps= 75


 58%|█████▊    | 2877/5000 [11:00<08:08,  4.35it/s]

steps= 50
steps= 33


 58%|█████▊    | 2878/5000 [11:00<08:02,  4.39it/s]

steps= 102


 58%|█████▊    | 2879/5000 [11:01<08:04,  4.38it/s]

steps= 78


 58%|█████▊    | 2881/5000 [11:01<07:09,  4.94it/s]

steps= 136
steps= 64


 58%|█████▊    | 2884/5000 [11:01<06:26,  5.47it/s]

steps= 116
steps= 61
steps= 73


 58%|█████▊    | 2886/5000 [11:02<07:44,  4.56it/s]

steps= 221
steps= 66


 58%|█████▊    | 2888/5000 [11:02<06:54,  5.10it/s]

steps= 57
steps= 84


 58%|█████▊    | 2889/5000 [11:03<08:25,  4.17it/s]

steps= 259


 58%|█████▊    | 2890/5000 [11:03<09:05,  3.87it/s]

steps= 158


 58%|█████▊    | 2892/5000 [11:03<08:06,  4.33it/s]

steps= 205
steps= 34


 58%|█████▊    | 2893/5000 [11:04<10:12,  3.44it/s]

steps= 143


 58%|█████▊    | 2895/5000 [11:05<10:46,  3.26it/s]

steps= 185
steps= 65
steps= 25


 58%|█████▊    | 2897/5000 [11:05<09:13,  3.80it/s]

steps= 104


 58%|█████▊    | 2898/5000 [11:05<08:36,  4.07it/s]

steps= 116


 58%|█████▊    | 2899/5000 [11:06<12:07,  2.89it/s]

steps= 287


 58%|█████▊    | 2900/5000 [11:06<16:52,  2.07it/s]

steps= 326
steps= 77
episode: 2900 episode reward: -1085 eps: 0.7406961129220389 avg reward (last 100): -1045.6732673267327 episode loss:  23632.592
avg reward for last 100 episodes: -1045.6732673267327


 58%|█████▊    | 2902/5000 [11:07<13:21,  2.62it/s]

steps= 114
steps= 25


 58%|█████▊    | 2905/5000 [11:07<09:12,  3.79it/s]

steps= 137
steps= 142


 58%|█████▊    | 2906/5000 [11:08<09:39,  3.61it/s]

steps= 246
steps= 24


 58%|█████▊    | 2908/5000 [11:08<09:19,  3.74it/s]

steps= 174


 58%|█████▊    | 2909/5000 [11:08<10:19,  3.38it/s]

steps= 142


 58%|█████▊    | 2910/5000 [11:09<09:34,  3.64it/s]

steps= 63


 58%|█████▊    | 2911/5000 [11:09<11:01,  3.16it/s]

steps= 126


 58%|█████▊    | 2912/5000 [11:10<15:33,  2.24it/s]

steps= 343


 58%|█████▊    | 2913/5000 [11:10<13:30,  2.57it/s]

steps= 155


 58%|█████▊    | 2915/5000 [11:10<09:22,  3.71it/s]

steps= 84
steps= 73
steps= 35


 58%|█████▊    | 2917/5000 [11:11<08:50,  3.93it/s]

steps= 233
steps= 51


 58%|█████▊    | 2919/5000 [11:11<07:42,  4.50it/s]

steps= 124
steps= 30


 58%|█████▊    | 2921/5000 [11:12<08:35,  4.03it/s]

steps= 406


 58%|█████▊    | 2922/5000 [11:12<08:35,  4.03it/s]

steps= 109


 58%|█████▊    | 2923/5000 [11:13<12:45,  2.71it/s]

steps= 317


 58%|█████▊    | 2924/5000 [11:13<12:42,  2.72it/s]

steps= 183


 58%|█████▊    | 2925/5000 [11:13<12:19,  2.81it/s]

steps= 184


 59%|█████▊    | 2926/5000 [11:14<11:05,  3.11it/s]

steps= 111


 59%|█████▊    | 2927/5000 [11:14<10:12,  3.38it/s]

steps= 149


 59%|█████▊    | 2928/5000 [11:15<16:44,  2.06it/s]

steps= 398


 59%|█████▊    | 2929/5000 [11:15<14:56,  2.31it/s]

steps= 115


 59%|█████▊    | 2930/5000 [11:16<17:32,  1.97it/s]

steps= 296


 59%|█████▊    | 2931/5000 [11:16<15:36,  2.21it/s]

steps= 311


 59%|█████▊    | 2934/5000 [11:16<10:52,  3.17it/s]

steps= 129
steps= 63
steps= 18


 59%|█████▊    | 2935/5000 [11:17<14:16,  2.41it/s]

steps= 226
steps= 69


 59%|█████▊    | 2937/5000 [11:17<11:23,  3.02it/s]

steps= 175


 59%|█████▉    | 2938/5000 [11:18<12:04,  2.85it/s]

steps= 299
steps= 4


 59%|█████▉    | 2940/5000 [11:18<10:44,  3.20it/s]

steps= 103


 59%|█████▉    | 2941/5000 [11:19<15:36,  2.20it/s]

steps= 233


 59%|█████▉    | 2942/5000 [11:19<15:16,  2.25it/s]

steps= 249


 59%|█████▉    | 2944/5000 [11:21<17:46,  1.93it/s]

steps= 483
steps= 107
steps= 33


 59%|█████▉    | 2947/5000 [11:21<11:54,  2.87it/s]

steps= 269
steps= 133


 59%|█████▉    | 2949/5000 [11:22<08:31,  4.01it/s]

steps= 88
steps= 75


 59%|█████▉    | 2950/5000 [11:22<11:09,  3.06it/s]

steps= 173


 59%|█████▉    | 2951/5000 [11:22<10:47,  3.16it/s]

steps= 154


 59%|█████▉    | 2953/5000 [11:23<09:20,  3.65it/s]

steps= 97
steps= 42


 59%|█████▉    | 2954/5000 [11:23<08:24,  4.05it/s]

steps= 60


 59%|█████▉    | 2955/5000 [11:23<08:34,  3.97it/s]

steps= 118


 59%|█████▉    | 2956/5000 [11:25<19:05,  1.78it/s]

steps= 559


 59%|█████▉    | 2957/5000 [11:25<16:56,  2.01it/s]

steps= 214


 59%|█████▉    | 2959/5000 [11:25<11:30,  2.95it/s]

steps= 130
steps= 63


 59%|█████▉    | 2960/5000 [11:26<10:27,  3.25it/s]

steps= 167
steps= 34


 59%|█████▉    | 2963/5000 [11:26<08:25,  4.03it/s]

steps= 276
steps= 153


 59%|█████▉    | 2964/5000 [11:27<09:09,  3.71it/s]

steps= 210


 59%|█████▉    | 2965/5000 [11:27<08:42,  3.89it/s]

steps= 85


 59%|█████▉    | 2966/5000 [11:27<08:33,  3.96it/s]

steps= 105


 59%|█████▉    | 2967/5000 [11:27<08:55,  3.79it/s]

steps= 196


 59%|█████▉    | 2970/5000 [11:28<08:23,  4.03it/s]

steps= 189
steps= 16
steps= 111


 59%|█████▉    | 2972/5000 [11:28<06:40,  5.06it/s]

steps= 37
steps= 147


 59%|█████▉    | 2973/5000 [11:28<06:43,  5.02it/s]

steps= 106


 59%|█████▉    | 2974/5000 [11:30<15:29,  2.18it/s]

steps= 442


 60%|█████▉    | 2975/5000 [11:30<17:18,  1.95it/s]

steps= 285


 60%|█████▉    | 2977/5000 [11:31<12:17,  2.74it/s]

steps= 152
steps= 73


 60%|█████▉    | 2978/5000 [11:31<15:00,  2.25it/s]

steps= 349


 60%|█████▉    | 2979/5000 [11:32<13:34,  2.48it/s]

steps= 114


 60%|█████▉    | 2980/5000 [11:32<12:36,  2.67it/s]

steps= 179


 60%|█████▉    | 2981/5000 [11:32<11:52,  2.83it/s]

steps= 201
steps= 72


 60%|█████▉    | 2984/5000 [11:33<08:07,  4.13it/s]

steps= 114
steps= 106


 60%|█████▉    | 2986/5000 [11:33<08:08,  4.13it/s]

steps= 203
steps= 36


 60%|█████▉    | 2988/5000 [11:34<07:32,  4.45it/s]

steps= 117
steps= 41


 60%|█████▉    | 2989/5000 [11:34<08:14,  4.06it/s]

steps= 68


 60%|█████▉    | 2990/5000 [11:34<10:55,  3.07it/s]

steps= 152


 60%|█████▉    | 2991/5000 [11:35<09:56,  3.37it/s]

steps= 130


 60%|█████▉    | 2992/5000 [11:35<11:28,  2.92it/s]

steps= 314


 60%|█████▉    | 2993/5000 [11:35<12:27,  2.69it/s]

steps= 229


 60%|█████▉    | 2994/5000 [11:36<10:49,  3.09it/s]

steps= 72
steps= 37


 60%|█████▉    | 2996/5000 [11:37<12:49,  2.60it/s]

steps= 411


 60%|█████▉    | 2997/5000 [11:37<11:56,  2.79it/s]

steps= 115


 60%|█████▉    | 2998/5000 [11:37<10:49,  3.08it/s]

steps= 105
steps= 7


 60%|██████    | 3000/5000 [11:38<09:07,  3.65it/s]

steps= 196


 60%|██████    | 3001/5000 [11:38<10:25,  3.20it/s]

steps= 181
episode: 3000 episode reward: -1189 eps: 0.7333256967697342 avg reward (last 100): -1083.9108910891089 episode loss:  26199.697
avg reward for last 100 episodes: -1083.9108910891089


 60%|██████    | 3002/5000 [11:38<10:33,  3.15it/s]

steps= 139


 60%|██████    | 3005/5000 [11:39<08:06,  4.10it/s]

steps= 136
steps= 31
steps= 43


 60%|██████    | 3006/5000 [11:39<09:05,  3.65it/s]

steps= 169


 60%|██████    | 3007/5000 [11:39<09:25,  3.53it/s]

steps= 81


 60%|██████    | 3009/5000 [11:40<08:37,  3.85it/s]

steps= 258
steps= 104


 60%|██████    | 3010/5000 [11:40<11:06,  2.99it/s]

steps= 296


 60%|██████    | 3012/5000 [11:41<07:54,  4.19it/s]

steps= 72
steps= 33


 60%|██████    | 3013/5000 [11:41<07:10,  4.62it/s]

steps= 143


 60%|██████    | 3014/5000 [11:41<09:37,  3.44it/s]

steps= 232


 60%|██████    | 3015/5000 [11:42<13:32,  2.44it/s]

steps= 422


 60%|██████    | 3018/5000 [11:43<10:50,  3.04it/s]

steps= 400
steps= 55
steps= 37


 60%|██████    | 3019/5000 [11:43<10:37,  3.11it/s]

steps= 155
steps= 49


 60%|██████    | 3021/5000 [11:44<10:06,  3.26it/s]

steps= 185


 60%|██████    | 3024/5000 [11:44<08:24,  3.91it/s]

steps= 123
steps= 21
steps= 12


 60%|██████    | 3025/5000 [11:44<08:46,  3.75it/s]

steps= 55


 61%|██████    | 3026/5000 [11:45<13:50,  2.38it/s]

steps= 276


 61%|██████    | 3027/5000 [11:45<11:48,  2.78it/s]

steps= 76
steps= 23
steps= 95


 61%|██████    | 3030/5000 [11:46<07:51,  4.18it/s]

steps= 89


 61%|██████    | 3031/5000 [11:46<07:40,  4.28it/s]

steps= 155


 61%|██████    | 3032/5000 [11:46<08:02,  4.08it/s]

steps= 131


 61%|██████    | 3034/5000 [11:47<07:50,  4.18it/s]

steps= 198
steps= 24


 61%|██████    | 3035/5000 [11:47<07:36,  4.30it/s]

steps= 159


 61%|██████    | 3036/5000 [11:47<08:55,  3.67it/s]

steps= 165


 61%|██████    | 3038/5000 [11:48<10:03,  3.25it/s]

steps= 301
steps= 72


 61%|██████    | 3039/5000 [11:49<11:00,  2.97it/s]

steps= 261


 61%|██████    | 3040/5000 [11:49<10:25,  3.13it/s]

steps= 67


 61%|██████    | 3041/5000 [11:49<10:58,  2.97it/s]

steps= 151


 61%|██████    | 3042/5000 [11:50<16:27,  1.98it/s]

steps= 590


 61%|██████    | 3044/5000 [11:51<11:30,  2.83it/s]

steps= 126
steps= 44


 61%|██████    | 3046/5000 [11:51<11:17,  2.88it/s]

steps= 487
steps= 94
steps= 21


 61%|██████    | 3048/5000 [11:52<08:57,  3.63it/s]

steps= 113


 61%|██████    | 3049/5000 [11:52<09:59,  3.25it/s]

steps= 163


 61%|██████    | 3051/5000 [11:52<07:53,  4.12it/s]

steps= 143
steps= 54


 61%|██████    | 3052/5000 [11:53<13:26,  2.41it/s]

steps= 594


 61%|██████    | 3053/5000 [11:54<18:00,  1.80it/s]

steps= 493


 61%|██████    | 3054/5000 [11:55<20:45,  1.56it/s]

steps= 333


 61%|██████    | 3055/5000 [11:55<16:48,  1.93it/s]

steps= 168


 61%|██████    | 3056/5000 [11:55<14:40,  2.21it/s]

steps= 225
steps= 33


 61%|██████    | 3059/5000 [11:56<09:36,  3.37it/s]

steps= 180
steps= 103
steps= 32


 61%|██████    | 3061/5000 [11:56<08:19,  3.88it/s]

steps= 173


 61%|██████▏   | 3063/5000 [11:56<07:06,  4.55it/s]

steps= 108
steps= 53


 61%|██████▏   | 3065/5000 [11:57<05:49,  5.53it/s]

steps= 27
steps= 20


 61%|██████▏   | 3066/5000 [11:57<08:11,  3.93it/s]

steps= 257


 61%|██████▏   | 3067/5000 [11:57<08:20,  3.87it/s]

steps= 177


 61%|██████▏   | 3068/5000 [11:58<08:14,  3.90it/s]

steps= 146


 61%|██████▏   | 3069/5000 [11:58<09:22,  3.43it/s]

steps= 122


 61%|██████▏   | 3070/5000 [11:58<08:40,  3.71it/s]

steps= 75


 61%|██████▏   | 3071/5000 [11:59<11:57,  2.69it/s]

steps= 201


 61%|██████▏   | 3072/5000 [11:59<12:58,  2.48it/s]

steps= 175


 61%|██████▏   | 3073/5000 [12:00<13:37,  2.36it/s]

steps= 178


 62%|██████▏   | 3075/5000 [12:01<17:36,  1.82it/s]

steps= 840
steps= 76
steps= 55


 62%|██████▏   | 3079/5000 [12:02<09:57,  3.22it/s]

steps= 111
steps= 28
steps= 33


 62%|██████▏   | 3081/5000 [12:02<07:24,  4.32it/s]

steps= 103
steps= 92


 62%|██████▏   | 3083/5000 [12:02<06:28,  4.94it/s]

steps= 46
steps= 93


 62%|██████▏   | 3085/5000 [12:03<08:51,  3.61it/s]

steps= 332
steps= 44


 62%|██████▏   | 3086/5000 [12:03<08:54,  3.58it/s]

steps= 118


 62%|██████▏   | 3087/5000 [12:04<09:59,  3.19it/s]

steps= 181


 62%|██████▏   | 3089/5000 [12:05<10:51,  2.93it/s]

steps= 314
steps= 68


 62%|██████▏   | 3090/5000 [12:05<10:19,  3.08it/s]

steps= 118
steps= 29
steps= 77


 62%|██████▏   | 3093/5000 [12:05<07:03,  4.51it/s]

steps= 51


 62%|██████▏   | 3094/5000 [12:06<11:11,  2.84it/s]

steps= 397
steps= 21


 62%|██████▏   | 3096/5000 [12:06<10:56,  2.90it/s]

steps= 267


 62%|██████▏   | 3097/5000 [12:07<13:07,  2.42it/s]

steps= 260
steps=

 62%|██████▏   | 3099/5000 [12:07<09:07,  3.47it/s]

 80
steps= 43


 62%|██████▏   | 3100/5000 [12:08<12:48,  2.47it/s]

steps= 252


 62%|██████▏   | 3102/5000 [12:09<11:19,  2.79it/s]

steps= 125
episode: 3100 episode reward: -1133 eps: 0.7260286211322649 avg reward (last 100): -1043.920792079208 episode loss:  24998.002
avg reward for last 100 episodes: -1043.920792079208
steps= 101


 62%|██████▏   | 3103/5000 [12:10<15:47,  2.00it/s]

steps= 360


 62%|██████▏   | 3104/5000 [12:11<21:01,  1.50it/s]

steps= 550
steps= 10


 62%|██████▏   | 3106/5000 [12:11<17:27,  1.81it/s]

steps= 262


 62%|██████▏   | 3107/5000 [12:12<16:45,  1.88it/s]

steps= 198


 62%|██████▏   | 3108/5000 [12:12<14:25,  2.19it/s]

steps= 113


 62%|██████▏   | 3109/5000 [12:12<14:28,  2.18it/s]

steps= 150


 62%|██████▏   | 3110/5000 [12:13<13:51,  2.27it/s]

steps= 243


 62%|██████▏   | 3112/5000 [12:13<09:55,  3.17it/s]

steps= 164
steps= 63


 62%|██████▏   | 3113/5000 [12:13<08:30,  3.70it/s]

steps= 59
steps= 22


 62%|██████▏   | 3116/5000 [12:14<06:33,  4.79it/s]

steps= 84
steps= 83


 62%|██████▏   | 3117/5000 [12:14<06:38,  4.73it/s]

steps= 103


 62%|██████▏   | 3118/5000 [12:14<07:20,  4.27it/s]

steps= 182


 62%|██████▏   | 3119/5000 [12:15<09:22,  3.35it/s]

steps= 198


 62%|██████▏   | 3121/5000 [12:15<08:37,  3.63it/s]

steps= 118
steps= 48


 62%|██████▏   | 3122/5000 [12:16<08:20,  3.75it/s]

steps= 80


 62%|██████▏   | 3123/5000 [12:16<13:23,  2.34it/s]

steps= 403


 62%|██████▏   | 3124/5000 [12:17<12:07,  2.58it/s]

steps= 162


 62%|██████▎   | 3125/5000 [12:17<11:27,  2.73it/s]

steps= 179


 63%|██████▎   | 3126/5000 [12:17<10:33,  2.96it/s]

steps= 88


 63%|██████▎   | 3127/5000 [12:18<11:15,  2.77it/s]

steps= 171


 63%|██████▎   | 3128/5000 [12:18<14:04,  2.22it/s]

steps= 372


 63%|██████▎   | 3129/5000 [12:19<14:11,  2.20it/s]

steps= 181


 63%|██████▎   | 3131/5000 [12:20<13:38,  2.28it/s]

steps= 498
steps= 59


 63%|██████▎   | 3132/5000 [12:20<13:58,  2.23it/s]

steps= 387
steps= 34


 63%|██████▎   | 3134/5000 [12:21<13:17,  2.34it/s]

steps= 375


 63%|██████▎   | 3136/5000 [12:21<09:42,  3.20it/s]

steps= 82
steps= 28


 63%|██████▎   | 3137/5000 [12:22<10:43,  2.89it/s]

steps= 189


 63%|██████▎   | 3138/5000 [12:22<09:28,  3.27it/s]

steps= 84


 63%|██████▎   | 3141/5000 [12:23<12:05,  2.56it/s]

steps= 552
steps= 66
steps= 46


 63%|██████▎   | 3144/5000 [12:24<08:29,  3.64it/s]

steps= 197
steps= 21
steps= 83


 63%|██████▎   | 3145/5000 [12:24<06:55,  4.46it/s]

steps= 57
steps= 79


 63%|██████▎   | 3147/5000 [12:25<08:54,  3.47it/s]

steps= 431


 63%|██████▎   | 3149/5000 [12:25<09:34,  3.22it/s]

steps= 244
steps= 69


 63%|██████▎   | 3151/5000 [12:26<08:55,  3.45it/s]

steps= 154
steps= 117


 63%|██████▎   | 3152/5000 [12:26<10:18,  2.99it/s]

steps= 245


 63%|██████▎   | 3153/5000 [12:27<11:47,  2.61it/s]

steps= 273


 63%|██████▎   | 3154/5000 [12:27<11:26,  2.69it/s]

steps= 190


 63%|██████▎   | 3156/5000 [12:28<08:36,  3.57it/s]

steps= 203
steps= 72


 63%|██████▎   | 3157/5000 [12:29<19:59,  1.54it/s]

steps= 625


 63%|██████▎   | 3158/5000 [12:30<16:39,  1.84it/s]

steps= 179


 63%|██████▎   | 3159/5000 [12:31<22:14,  1.38it/s]

steps= 538


 63%|██████▎   | 3160/5000 [12:31<18:15,  1.68it/s]

steps= 156


 63%|██████▎   | 3162/5000 [12:31<11:55,  2.57it/s]

steps= 133
steps= 89


 63%|██████▎   | 3163/5000 [12:32<11:42,  2.61it/s]

steps= 253


 63%|██████▎   | 3164/5000 [12:32<12:32,  2.44it/s]

steps= 310


 63%|██████▎   | 3165/5000 [12:33<16:11,  1.89it/s]

steps= 487


 63%|██████▎   | 3167/5000 [12:33<10:53,  2.81it/s]

steps= 253
steps= 103


 63%|██████▎   | 3168/5000 [12:34<08:40,  3.52it/s]

steps= 86


 63%|██████▎   | 3169/5000 [12:34<09:30,  3.21it/s]

steps= 338


 63%|██████▎   | 3170/5000 [12:34<11:26,  2.67it/s]

steps= 245


 63%|██████▎   | 3171/5000 [12:35<14:37,  2.08it/s]

steps= 424


 63%|██████▎   | 3172/5000 [12:35<12:20,  2.47it/s]

steps= 122
steps= 56


 64%|██████▎   | 3175/5000 [12:36<08:30,  3.58it/s]

steps= 194
steps= 64


 64%|██████▎   | 3176/5000 [12:36<07:52,  3.86it/s]

steps= 97


 64%|██████▎   | 3177/5000 [12:37<10:18,  2.95it/s]

steps= 350


 64%|██████▎   | 3178/5000 [12:37<10:59,  2.76it/s]

steps= 255


 64%|██████▎   | 3179/5000 [12:38<16:16,  1.87it/s]

steps= 338


 64%|██████▎   | 3180/5000 [12:38<13:49,  2.19it/s]

steps= 182


 64%|██████▎   | 3182/5000 [12:39<09:32,  3.18it/s]

steps= 205
steps= 26


 64%|██████▎   | 3184/5000 [12:39<07:00,  4.32it/s]

steps= 57
steps= 52


 64%|██████▎   | 3185/5000 [12:40<11:26,  2.64it/s]

steps= 313


 64%|██████▎   | 3186/5000 [12:40<11:10,  2.71it/s]

steps= 224


 64%|██████▍   | 3188/5000 [12:41<09:37,  3.14it/s]

steps= 174
steps= 119


 64%|██████▍   | 3189/5000 [12:41<09:32,  3.17it/s]

steps= 206


 64%|██████▍   | 3190/5000 [12:41<10:26,  2.89it/s]

steps= 224


 64%|██████▍   | 3191/5000 [12:42<12:16,  2.46it/s]

steps= 238
steps= 11


 64%|██████▍   | 3193/5000 [12:42<10:34,  2.85it/s]

steps= 229
steps= 36


 64%|██████▍   | 3195/5000 [12:43<11:34,  2.60it/s]

steps= 382


 64%|██████▍   | 3196/5000 [12:44<14:33,  2.07it/s]

steps= 455


 64%|██████▍   | 3198/5000 [12:44<10:09,  2.96it/s]

steps= 153
steps= 45


 64%|██████▍   | 3199/5000 [12:45<10:04,  2.98it/s]

steps= 207
steps= 24


 64%|██████▍   | 3201/5000 [12:45<08:03,  3.72it/s]

steps= 75
episode: 3200 episode reward: -1083 eps: 0.7188041562230073 avg reward (last 100): -1137.009900990099 episode loss:  21996.615
avg reward for last 100 episodes: -1137.009900990099


 64%|██████▍   | 3202/5000 [12:46<13:31,  2.22it/s]

steps= 422


 64%|██████▍   | 3203/5000 [12:46<13:30,  2.22it/s]

steps= 95


 64%|██████▍   | 3205/5000 [12:47<11:43,  2.55it/s]

steps= 233
steps= 66
steps= 25


 64%|██████▍   | 3208/5000 [12:47<07:52,  3.79it/s]

steps= 51
steps= 35


 64%|██████▍   | 3209/5000 [12:48<07:30,  3.97it/s]

steps= 76


 64%|██████▍   | 3210/5000 [12:48<07:28,  3.99it/s]

steps= 93


 64%|██████▍   | 3212/5000 [12:49<08:22,  3.56it/s]

steps= 227
steps= 55


 64%|██████▍   | 3214/5000 [12:49<06:46,  4.39it/s]

steps= 37
steps= 72


 64%|██████▍   | 3216/5000 [12:49<06:17,  4.72it/s]

steps= 68
steps= 105


 64%|██████▍   | 3217/5000 [12:50<11:40,  2.55it/s]

steps= 470


 64%|██████▍   | 3218/5000 [12:50<10:31,  2.82it/s]

steps= 119
steps= 13


 64%|██████▍   | 3221/5000 [12:51<07:31,  3.94it/s]

steps= 167
steps= 33


 64%|██████▍   | 3223/5000 [12:51<06:50,  4.33it/s]

steps= 177
steps= 61


 64%|██████▍   | 3224/5000 [12:52<11:20,  2.61it/s]

steps= 424


 65%|██████▍   | 3226/5000 [12:53<11:00,  2.69it/s]

steps= 276
steps= 64


 65%|██████▍   | 3227/5000 [12:53<11:46,  2.51it/s]

steps= 190


 65%|██████▍   | 3228/5000 [12:54<11:32,  2.56it/s]

steps= 210


 65%|██████▍   | 3230/5000 [12:54<09:28,  3.11it/s]

steps= 233
steps= 109


 65%|██████▍   | 3231/5000 [12:54<07:38,  3.86it/s]

steps= 27


 65%|██████▍   | 3233/5000 [12:55<09:14,  3.19it/s]

steps= 439
steps= 55


 65%|██████▍   | 3235/5000 [12:56<07:19,  4.02it/s]

steps= 80
steps= 28


 65%|██████▍   | 3237/5000 [12:56<07:09,  4.10it/s]

steps= 84
steps= 59


 65%|██████▍   | 3239/5000 [12:57<08:20,  3.52it/s]

steps= 307
steps= 78


 65%|██████▍   | 3241/5000 [12:57<06:13,  4.71it/s]

steps= 153
steps= 64


 65%|██████▍   | 3243/5000 [12:58<07:05,  4.13it/s]

steps= 222
steps= 94


 65%|██████▍   | 3244/5000 [12:58<06:38,  4.41it/s]

steps= 69


 65%|██████▍   | 3245/5000 [12:58<07:00,  4.17it/s]

steps= 143
steps= 6


 65%|██████▍   | 3249/5000 [12:59<06:06,  4.77it/s]

steps= 376
steps= 39
steps= 51


 65%|██████▌   | 3250/5000 [12:59<06:53,  4.24it/s]

steps= 253


 65%|██████▌   | 3251/5000 [13:00<10:38,  2.74it/s]

steps= 304


 65%|██████▌   | 3253/5000 [13:00<08:53,  3.27it/s]

steps= 207
steps= 105


 65%|██████▌   | 3254/5000 [13:01<10:22,  2.80it/s]

steps= 306


 65%|██████▌   | 3255/5000 [13:01<09:35,  3.03it/s]

steps= 104


 65%|██████▌   | 3258/5000 [13:02<07:03,  4.11it/s]

steps= 197
steps= 7
steps= 97


 65%|██████▌   | 3259/5000 [13:02<06:01,  4.81it/s]

steps= 53
steps= 25


 65%|██████▌   | 3261/5000 [13:02<05:07,  5.66it/s]

steps= 92


 65%|██████▌   | 3262/5000 [13:02<06:47,  4.26it/s]

steps= 176


 65%|██████▌   | 3263/5000 [13:03<07:25,  3.90it/s]

steps= 113


 65%|██████▌   | 3265/5000 [13:03<07:41,  3.76it/s]

steps= 337
steps= 45


 65%|██████▌   | 3266/5000 [13:04<10:56,  2.64it/s]

steps= 445
steps= 63


 65%|██████▌   | 3268/5000 [13:04<08:32,  3.38it/s]

steps= 80


 65%|██████▌   | 3269/5000 [13:04<08:29,  3.39it/s]

steps= 78


 65%|██████▌   | 3270/5000 [13:05<15:36,  1.85it/s]

steps= 443


 65%|██████▌   | 3273/5000 [13:06<12:06,  2.38it/s]

steps= 326
steps= 38
steps= 69


 65%|██████▌   | 3274/5000 [13:06<10:21,  2.78it/s]

steps= 86


 66%|██████▌   | 3276/5000 [13:07<07:24,  3.88it/s]

steps= 142
steps= 53


 66%|██████▌   | 3277/5000 [13:07<09:25,  3.05it/s]

steps= 294


 66%|██████▌   | 3278/5000 [13:08<12:04,  2.38it/s]

steps= 368


 66%|██████▌   | 3279/5000 [13:08<10:44,  2.67it/s]

steps= 174


 66%|██████▌   | 3280/5000 [13:09<14:55,  1.92it/s]

steps= 372


 66%|██████▌   | 3281/5000 [13:10<21:41,  1.32it/s]

steps= 598


 66%|██████▌   | 3282/5000 [13:11<17:02,  1.68it/s]

steps= 92


 66%|██████▌   | 3283/5000 [13:11<13:44,  2.08it/s]

steps= 78


 66%|██████▌   | 3284/5000 [13:11<12:52,  2.22it/s]

steps= 186


 66%|██████▌   | 3286/5000 [13:12<10:34,  2.70it/s]

steps= 301
steps= 72


 66%|██████▌   | 3287/5000 [13:12<10:10,  2.80it/s]

steps= 223


 66%|██████▌   | 3289/5000 [13:13<07:51,  3.63it/s]

steps= 104
steps= 69


 66%|██████▌   | 3290/5000 [13:13<08:22,  3.40it/s]

steps= 106


 66%|██████▌   | 3292/5000 [13:13<07:00,  4.06it/s]

steps= 90
steps= 41


 66%|██████▌   | 3293/5000 [13:14<07:17,  3.90it/s]

steps= 93


 66%|██████▌   | 3294/5000 [13:15<13:06,  2.17it/s]

steps= 480


 66%|██████▌   | 3295/5000 [13:15<15:35,  1.82it/s]

steps= 259


 66%|██████▌   | 3296/5000 [13:16<18:54,  1.50it/s]

steps= 202


 66%|██████▌   | 3298/5000 [13:17<12:18,  2.30it/s]

steps= 83
steps= 79


 66%|██████▌   | 3300/5000 [13:17<09:50,  2.88it/s]

steps= 156
steps= 81
steps= 52
episode: 3300 episode reward: -1060 eps: 0.7116515795171979 avg reward (last 100): -1043.079207920792 episode loss:  24994.328
avg reward for last 100 episodes: -1043.079207920792


 66%|██████▌   | 3302/5000 [13:17<07:34,  3.73it/s]

steps= 64


 66%|██████▌   | 3303/5000 [13:18<10:58,  2.58it/s]

steps= 251


 66%|██████▌   | 3304/5000 [13:18<10:37,  2.66it/s]

steps= 140


 66%|██████▌   | 3305/5000 [13:19<13:21,  2.11it/s]

steps= 210


 66%|██████▌   | 3307/5000 [13:20<10:38,  2.65it/s]

steps= 56
steps= 92


 66%|██████▌   | 3308/5000 [13:21<15:15,  1.85it/s]

steps= 287


 66%|██████▌   | 3309/5000 [13:21<12:47,  2.20it/s]

steps= 62


 66%|██████▌   | 3311/5000 [13:21<10:05,  2.79it/s]

steps= 99
steps= 18


 66%|██████▋   | 3313/5000 [13:22<09:09,  3.07it/s]

steps= 131
steps= 66


 66%|██████▋   | 3315/5000 [13:22<07:16,  3.86it/s]

steps= 9
steps= 115


 66%|██████▋   | 3316/5000 [13:22<06:29,  4.32it/s]

steps= 101


 66%|██████▋   | 3318/5000 [13:23<06:14,  4.49it/s]

steps= 116
steps= 135


 66%|██████▋   | 3320/5000 [13:23<06:53,  4.07it/s]

steps= 199
steps= 66


 66%|██████▋   | 3321/5000 [13:24<06:12,  4.50it/s]

steps= 41


 66%|██████▋   | 3323/5000 [13:25<08:18,  3.37it/s]

steps= 269
steps= 34


 66%|██████▋   | 3324/5000 [13:25<08:01,  3.48it/s]

steps= 140


 66%|██████▋   | 3325/5000 [13:25<07:25,  3.76it/s]

steps= 70


 67%|██████▋   | 3326/5000 [13:25<08:42,  3.20it/s]

steps= 223


 67%|██████▋   | 3327/5000 [13:26<09:18,  3.00it/s]

steps= 107


 67%|██████▋   | 3328/5000 [13:26<11:52,  2.35it/s]

steps= 285


 67%|██████▋   | 3329/5000 [13:27<11:16,  2.47it/s]

steps= 157


 67%|██████▋   | 3330/5000 [13:27<09:58,  2.79it/s]

steps= 87


 67%|██████▋   | 3331/5000 [13:28<11:33,  2.41it/s]

steps= 301


 67%|██████▋   | 3332/5000 [13:28<15:35,  1.78it/s]

steps= 363


 67%|██████▋   | 3333/5000 [13:29<17:46,  1.56it/s]

steps= 349


 67%|██████▋   | 3335/5000 [13:30<13:00,  2.13it/s]

steps= 337
steps= 140


 67%|██████▋   | 3336/5000 [13:31<14:07,  1.96it/s]

steps= 649
steps= 38
steps= 18


 67%|██████▋   | 3339/5000 [13:31<11:00,  2.51it/s]

steps= 258


 67%|██████▋   | 3341/5000 [13:31<07:50,  3.53it/s]

steps= 137
steps= 125


 67%|██████▋   | 3342/5000 [13:32<08:55,  3.10it/s]

steps= 351


 67%|██████▋   | 3343/5000 [13:32<08:43,  3.17it/s]

steps= 133


 67%|██████▋   | 3344/5000 [13:32<08:05,  3.41it/s]

steps= 48


 67%|██████▋   | 3346/5000 [13:33<08:49,  3.13it/s]

steps= 175
steps= 96


 67%|██████▋   | 3347/5000 [13:33<07:33,  3.65it/s]

steps= 146


 67%|██████▋   | 3349/5000 [13:34<07:06,  3.87it/s]

steps= 261
steps= 57


 67%|██████▋   | 3350/5000 [13:34<07:28,  3.68it/s]

steps= 277


 67%|██████▋   | 3351/5000 [13:35<10:21,  2.65it/s]

steps= 601


 67%|██████▋   | 3353/5000 [13:35<09:38,  2.85it/s]

steps= 516
steps= 123


 67%|██████▋   | 3354/5000 [13:36<07:42,  3.56it/s]

steps= 20


 67%|██████▋   | 3355/5000 [13:36<08:18,  3.30it/s]

steps= 47


 67%|██████▋   | 3356/5000 [13:36<08:28,  3.23it/s]

steps= 51


 67%|██████▋   | 3357/5000 [13:37<08:07,  3.37it/s]

steps= 38


 67%|██████▋   | 3358/5000 [13:37<10:22,  2.64it/s]

steps= 204


 67%|██████▋   | 3359/5000 [13:37<09:07,  3.00it/s]

steps= 149
steps= 17
steps= 51


 67%|██████▋   | 3363/5000 [13:38<07:05,  3.84it/s]

steps= 419
steps= 128


 67%|██████▋   | 3364/5000 [13:38<05:59,  4.55it/s]

steps= 107
steps= 81


 67%|██████▋   | 3366/5000 [13:39<05:40,  4.81it/s]

steps= 351


 67%|██████▋   | 3367/5000 [13:39<05:55,  4.60it/s]

steps= 132


 67%|██████▋   | 3369/5000 [13:39<05:43,  4.74it/s]

steps= 141
steps= 144


 67%|██████▋   | 3370/5000 [13:39<05:04,  5.36it/s]

steps= 48
steps= 38


 67%|██████▋   | 3372/5000 [13:40<04:58,  5.46it/s]

steps= 125


 68%|██████▊   | 3375/5000 [13:41<06:23,  4.23it/s]

steps= 453
steps= 89
steps= 42


 68%|██████▊   | 3376/5000 [13:42<12:54,  2.10it/s]

steps= 399
steps= 22


 68%|██████▊   | 3379/5000 [13:42<08:32,  3.16it/s]

steps= 42
steps= 148
steps= 22


 68%|██████▊   | 3381/5000 [13:43<07:57,  3.39it/s]

steps= 234


 68%|██████▊   | 3383/5000 [13:43<06:14,  4.31it/s]

steps= 239
steps= 53


 68%|██████▊   | 3384/5000 [13:43<06:17,  4.28it/s]

steps= 148


 68%|██████▊   | 3385/5000 [13:43<06:20,  4.25it/s]

steps= 93


 68%|██████▊   | 3386/5000 [13:44<06:31,  4.12it/s]

steps= 121


 68%|██████▊   | 3389/5000 [13:44<05:21,  5.01it/s]

steps= 190
steps= 33
steps= 57


 68%|██████▊   | 3391/5000 [13:44<04:25,  6.06it/s]

steps= 74
steps= 62
steps= 18


 68%|██████▊   | 3393/5000 [13:44<03:59,  6.70it/s]

steps= 221


 68%|██████▊   | 3394/5000 [13:45<04:44,  5.64it/s]

steps= 257


 68%|██████▊   | 3395/5000 [13:45<05:14,  5.11it/s]

steps= 112


 68%|██████▊   | 3396/5000 [13:45<06:16,  4.26it/s]

steps= 152


 68%|██████▊   | 3397/5000 [13:46<06:12,  4.30it/s]

steps= 51
steps= 55
steps= 75


 68%|██████▊   | 3400/5000 [13:46<05:10,  5.15it/s]

steps= 75


 68%|██████▊   | 3401/5000 [13:47<09:07,  2.92it/s]

steps= 295
episode: 3400 episode reward: -1303 eps: 0.7045701756796721 avg reward (last 100): -1100.029702970297 episode loss:  25084.69
avg reward for last 100 episodes: -1100.029702970297


 68%|██████▊   | 3402/5000 [13:47<09:12,  2.89it/s]

steps= 226


 68%|██████▊   | 3403/5000 [13:47<08:32,  3.12it/s]

steps= 237


 68%|██████▊   | 3404/5000 [13:48<11:58,  2.22it/s]

steps= 437


 68%|██████▊   | 3406/5000 [13:49<10:55,  2.43it/s]

steps= 559
steps= 117


 68%|██████▊   | 3408/5000 [13:49<07:27,  3.56it/s]

steps= 82
steps= 57
steps= 34


 68%|██████▊   | 3411/5000 [13:50<05:39,  4.68it/s]

steps= 97
steps= 39


 68%|██████▊   | 3412/5000 [13:50<07:01,  3.77it/s]

steps= 146


 68%|██████▊   | 3413/5000 [13:51<09:56,  2.66it/s]

steps= 392


 68%|██████▊   | 3414/5000 [13:51<11:32,  2.29it/s]

steps= 505


 68%|██████▊   | 3415/5000 [13:51<10:15,  2.57it/s]

steps= 255


 68%|██████▊   | 3417/5000 [13:52<07:45,  3.40it/s]

steps= 194
steps= 221


 68%|██████▊   | 3419/5000 [13:52<06:01,  4.37it/s]

steps= 12
steps= 132


 68%|██████▊   | 3420/5000 [13:52<06:11,  4.26it/s]

steps= 167


 68%|██████▊   | 3421/5000 [13:53<05:55,  4.44it/s]

steps= 216


 68%|██████▊   | 3422/5000 [13:53<08:05,  3.25it/s]

steps= 472


 68%|██████▊   | 3424/5000 [13:53<06:51,  3.83it/s]

steps= 268
steps= 65


 68%|██████▊   | 3425/5000 [13:54<05:43,  4.59it/s]

steps= 91
steps= 21


 69%|██████▊   | 3427/5000 [13:54<05:33,  4.72it/s]

steps= 159
steps= 65


 69%|██████▊   | 3430/5000 [13:54<04:41,  5.58it/s]

steps= 211
steps= 121


 69%|██████▊   | 3432/5000 [13:55<05:17,  4.94it/s]

steps= 171
steps= 89
steps= 59


 69%|██████▊   | 3435/5000 [13:55<05:06,  5.11it/s]

steps= 100
steps= 42
steps= 25


 69%|██████▊   | 3437/5000 [13:56<04:37,  5.62it/s]

steps= 120


 69%|██████▉   | 3438/5000 [13:56<05:09,  5.05it/s]

steps= 109


 69%|██████▉   | 3439/5000 [13:57<07:48,  3.33it/s]

steps= 375


 69%|██████▉   | 3440/5000 [13:57<10:47,  2.41it/s]

steps= 356
steps= 34


 69%|██████▉   | 3442/5000 [13:58<10:14,  2.54it/s]

steps= 208


 69%|██████▉   | 3443/5000 [13:58<09:22,  2.77it/s]

steps= 89


 69%|██████▉   | 3444/5000 [13:58<08:19,  3.11it/s]

steps= 98


 69%|██████▉   | 3445/5000 [13:59<07:25,  3.49it/s]

steps= 80


 69%|██████▉   | 3446/5000 [13:59<08:13,  3.15it/s]

steps= 263
steps= 41


 69%|██████▉   | 3449/5000 [14:00<06:35,  3.92it/s]

steps= 243
steps= 87


 69%|██████▉   | 3450/5000 [14:00<05:50,  4.42it/s]

steps= 150


 69%|██████▉   | 3451/5000 [14:00<06:03,  4.26it/s]

steps= 169


 69%|██████▉   | 3452/5000 [14:01<07:39,  3.37it/s]

steps= 227
steps= 25


 69%|██████▉   | 3454/5000 [14:01<06:46,  3.80it/s]

steps= 108


 69%|██████▉   | 3455/5000 [14:01<06:41,  3.85it/s]

steps= 80


 69%|██████▉   | 3457/5000 [14:03<13:27,  1.91it/s]

steps= 821
steps= 111


 69%|██████▉   | 3458/5000 [14:04<13:03,  1.97it/s]

steps= 246


 69%|██████▉   | 3460/5000 [14:04<09:22,  2.74it/s]

steps= 65
steps= 79


 69%|██████▉   | 3463/5000 [14:05<07:31,  3.40it/s]

steps= 170
steps= 19
steps= 25


 69%|██████▉   | 3465/5000 [14:05<06:52,  3.72it/s]

steps= 105
steps= 123


 69%|██████▉   | 3467/5000 [14:05<05:37,  4.55it/s]

steps= 80
steps= 142


 69%|██████▉   | 3468/5000 [14:06<04:48,  5.31it/s]

steps= 80
steps= 74


 69%|██████▉   | 3472/5000 [14:06<03:41,  6.89it/s]

steps= 53
steps= 38
steps= 92


 69%|██████▉   | 3473/5000 [14:07<09:01,  2.82it/s]

steps= 696
steps= 28


 70%|██████▉   | 3475/5000 [14:07<07:21,  3.45it/s]

steps= 113


 70%|██████▉   | 3476/5000 [14:07<06:43,  3.77it/s]

steps= 110
steps= 17


 70%|██████▉   | 3478/5000 [14:08<05:59,  4.23it/s]

steps= 246
steps= 49


 70%|██████▉   | 3480/5000 [14:08<05:32,  4.57it/s]

steps= 375


 70%|██████▉   | 3481/5000 [14:08<06:52,  3.69it/s]

steps= 289


 70%|██████▉   | 3482/5000 [14:09<09:06,  2.78it/s]

steps= 491


 70%|██████▉   | 3483/5000 [14:10<11:30,  2.20it/s]

steps= 408


 70%|██████▉   | 3484/5000 [14:10<10:09,  2.49it/s]

steps= 197


 70%|██████▉   | 3486/5000 [14:10<07:49,  3.22it/s]

steps= 277
steps= 35
steps= 51


 70%|██████▉   | 3489/5000 [14:11<05:17,  4.75it/s]

steps= 44
steps= 121


 70%|██████▉   | 3491/5000 [14:11<04:08,  6.08it/s]

steps= 65
steps= 50
steps= 143


 70%|██████▉   | 3493/5000 [14:11<04:55,  5.11it/s]

steps= 233
steps= 27


 70%|██████▉   | 3496/5000 [14:12<03:59,  6.29it/s]

steps= 166
steps= 63


 70%|██████▉   | 3498/5000 [14:12<04:11,  5.98it/s]

steps= 218
steps= 158


 70%|██████▉   | 3499/5000 [14:12<05:17,  4.73it/s]

steps= 208
steps= 32


 70%|███████   | 3502/5000 [14:13<04:34,  5.45it/s]

steps= 371
episode: 3500 episode reward: -1379 eps: 0.6975592364933232 avg reward (last 100): -1115.2871287128712 episode loss:  23445.854
avg reward for last 100 episodes: -1115.2871287128712
steps= 125
steps= 67


 70%|███████   | 3504/5000 [14:13<03:40,  6.78it/s]

steps= 52


 70%|███████   | 3505/5000 [14:13<04:19,  5.75it/s]

steps= 35


 70%|███████   | 3507/5000 [14:14<05:05,  4.89it/s]

steps= 154
steps= 118


 70%|███████   | 3508/5000 [14:14<06:23,  3.89it/s]

steps= 190


 70%|███████   | 3509/5000 [14:15<09:51,  2.52it/s]

steps= 365


 70%|███████   | 3510/5000 [14:15<08:24,  2.95it/s]

steps= 75


 70%|███████   | 3511/5000 [14:15<07:57,  3.12it/s]

steps= 80


 70%|███████   | 3512/5000 [14:16<08:59,  2.76it/s]

steps= 147


 70%|███████   | 3515/5000 [14:16<07:18,  3.38it/s]

steps= 378
steps= 68
steps= 89


 70%|███████   | 3516/5000 [14:17<08:11,  3.02it/s]

steps= 334


 70%|███████   | 3517/5000 [14:17<07:54,  3.12it/s]

steps= 135


 70%|███████   | 3518/5000 [14:17<07:10,  3.44it/s]

steps= 196
steps= 38


 70%|███████   | 3520/5000 [14:18<05:51,  4.21it/s]

steps= 74
steps= 41


 70%|███████   | 3523/5000 [14:18<04:54,  5.02it/s]

steps= 232
steps= 119


 70%|███████   | 3524/5000 [14:18<04:24,  5.59it/s]

steps= 67
steps= 76


 71%|███████   | 3526/5000 [14:19<03:55,  6.26it/s]

steps= 93


 71%|███████   | 3527/5000 [14:19<04:25,  5.56it/s]

steps= 193


 71%|███████   | 3529/5000 [14:19<04:56,  4.96it/s]

steps= 188
steps= 120


 71%|███████   | 3530/5000 [14:20<09:18,  2.63it/s]

steps= 594


 71%|███████   | 3531/5000 [14:21<10:28,  2.34it/s]

steps= 428


 71%|███████   | 3533/5000 [14:21<09:09,  2.67it/s]

steps= 346
steps= 78


 71%|███████   | 3534/5000 [14:21<07:53,  3.09it/s]

steps= 176


 71%|███████   | 3535/5000 [14:22<07:07,  3.42it/s]

steps= 141


 71%|███████   | 3536/5000 [14:22<07:41,  3.17it/s]

steps= 159


 71%|███████   | 3537/5000 [14:22<07:26,  3.28it/s]

steps= 282
steps= 26


 71%|███████   | 3539/5000 [14:23<06:46,  3.59it/s]

steps= 349


 71%|███████   | 3540/5000 [14:23<06:38,  3.66it/s]

steps= 269


 71%|███████   | 3541/5000 [14:24<12:20,  1.97it/s]

steps= 677


 71%|███████   | 3542/5000 [14:24<11:20,  2.14it/s]

steps= 325


 71%|███████   | 3543/5000 [14:25<14:15,  1.70it/s]

steps= 387


 71%|███████   | 3546/5000 [14:26<09:06,  2.66it/s]

steps= 172
steps= 6
steps= 76


 71%|███████   | 3547/5000 [14:26<08:41,  2.78it/s]

steps= 161
steps= 76


 71%|███████   | 3550/5000 [14:26<05:42,  4.23it/s]

steps= 76
steps= 141


 71%|███████   | 3552/5000 [14:27<04:45,  5.07it/s]

steps= 123
steps= 31


 71%|███████   | 3554/5000 [14:27<05:30,  4.38it/s]

steps= 261
steps= 55


 71%|███████   | 3556/5000 [14:28<04:09,  5.79it/s]

steps= 42
steps= 69


 71%|███████   | 3557/5000 [14:28<04:22,  5.49it/s]

steps= 137


 71%|███████   | 3558/5000 [14:28<04:44,  5.07it/s]

steps= 245


 71%|███████   | 3559/5000 [14:28<05:21,  4.49it/s]

steps= 228
steps= 24


 71%|███████   | 3562/5000 [14:29<04:57,  4.84it/s]

steps= 273
steps= 119


 71%|███████▏  | 3565/5000 [14:29<03:34,  6.70it/s]

steps= 90
steps= 34
steps= 101
steps= 30


 71%|███████▏  | 3567/5000 [14:29<03:35,  6.65it/s]

steps= 100
steps= 25


 71%|███████▏  | 3569/5000 [14:30<05:09,  4.62it/s]

steps= 451


 71%|███████▏  | 3570/5000 [14:31<06:33,  3.64it/s]

steps= 192
steps= 15


 71%|███████▏  | 3572/5000 [14:31<06:57,  3.42it/s]

steps= 216


 71%|███████▏  | 3573/5000 [14:32<07:33,  3.14it/s]

steps= 149


 72%|███████▏  | 3575/5000 [14:32<07:43,  3.08it/s]

steps= 473
steps= 33


 72%|███████▏  | 3576/5000 [14:33<06:45,  3.52it/s]

steps= 47


 72%|███████▏  | 3577/5000 [14:33<08:06,  2.92it/s]

steps= 313


 72%|███████▏  | 3579/5000 [14:34<06:55,  3.42it/s]

steps= 271
steps= 44
steps= 44


 72%|███████▏  | 3581/5000 [14:34<07:08,  3.31it/s]

steps= 338


 72%|███████▏  | 3582/5000 [14:35<06:59,  3.38it/s]

steps= 137


 72%|███████▏  | 3583/5000 [14:35<09:10,  2.58it/s]

steps= 339


 72%|███████▏  | 3584/5000 [14:36<09:55,  2.38it/s]

steps= 159
steps= 49


 72%|███████▏  | 3588/5000 [14:36<06:26,  3.65it/s]

steps= 159
steps= 281
steps= 60
steps= 29


 72%|███████▏  | 3590/5000 [14:37<05:58,  3.93it/s]

steps= 83


 72%|███████▏  | 3591/5000 [14:37<06:27,  3.64it/s]

steps= 124


 72%|███████▏  | 3592/5000 [14:37<06:59,  3.36it/s]

steps= 102


 72%|███████▏  | 3594/5000 [14:38<07:06,  3.29it/s]

steps= 165
steps= 39


 72%|███████▏  | 3595/5000 [14:38<07:18,  3.20it/s]

steps= 178


 72%|███████▏  | 3596/5000 [14:39<07:15,  3.23it/s]

steps= 106


 72%|███████▏  | 3597/5000 [14:39<06:41,  3.49it/s]

steps= 87


 72%|███████▏  | 3598/5000 [14:40<09:50,  2.38it/s]

steps= 287


 72%|███████▏  | 3600/5000 [14:41<10:46,  2.17it/s]

steps= 439
steps= 26


 72%|███████▏  | 3602/5000 [14:41<08:20,  2.79it/s]

steps= 134
episode: 3600 episode reward: -1142 eps: 0.6906180607882736 avg reward (last 100): -1113.970297029703 episode loss:  26454.072
avg reward for last 100 episodes: -1113.970297029703
steps= 71


 72%|███████▏  | 3603/5000 [14:42<09:19,  2.50it/s]

steps= 183


 72%|███████▏  | 3604/5000 [14:42<10:29,  2.22it/s]

steps= 225


 72%|███████▏  | 3606/5000 [14:43<08:38,  2.69it/s]

steps= 102
steps= 75


 72%|███████▏  | 3607/5000 [14:43<07:51,  2.95it/s]

steps= 57


 72%|███████▏  | 3608/5000 [14:44<07:32,  3.08it/s]

steps= 100


 72%|███████▏  | 3609/5000 [14:44<09:28,  2.45it/s]

steps= 267
steps= 21


 72%|███████▏  | 3611/5000 [14:45<08:23,  2.76it/s]

steps= 163


 72%|███████▏  | 3612/5000 [14:45<09:53,  2.34it/s]

steps= 352


 72%|███████▏  | 3613/5000 [14:46<10:18,  2.24it/s]

steps= 353


 72%|███████▏  | 3614/5000 [14:46<10:11,  2.27it/s]

steps= 149


 72%|███████▏  | 3616/5000 [14:47<09:26,  2.44it/s]

steps= 306
steps= 54


 72%|███████▏  | 3617/5000 [14:47<08:12,  2.81it/s]

steps= 69


 72%|███████▏  | 3618/5000 [14:48<09:26,  2.44it/s]

steps= 183


 72%|███████▏  | 3619/5000 [14:48<08:11,  2.81it/s]

steps= 156


 72%|███████▏  | 3621/5000 [14:49<07:09,  3.21it/s]

steps= 145
steps= 70


 72%|███████▏  | 3622/5000 [14:49<09:18,  2.47it/s]

steps= 387


 72%|███████▏  | 3623/5000 [14:50<09:05,  2.52it/s]

steps= 144


 72%|███████▎  | 3625/5000 [14:50<07:35,  3.02it/s]

steps= 293
steps= 58


 73%|███████▎  | 3627/5000 [14:51<07:00,  3.26it/s]

steps= 352
steps= 49


 73%|███████▎  | 3628/5000 [14:52<11:25,  2.00it/s]

steps= 330


 73%|███████▎  | 3629/5000 [14:52<10:31,  2.17it/s]

steps= 70


 73%|███████▎  | 3630/5000 [14:53<10:10,  2.25it/s]

steps= 70


 73%|███████▎  | 3632/5000 [14:53<07:24,  3.08it/s]

steps= 101
steps= 85


 73%|███████▎  | 3635/5000 [14:54<05:29,  4.15it/s]

steps= 148
steps= 6
steps= 75


 73%|███████▎  | 3636/5000 [14:54<05:25,  4.19it/s]

steps= 208


 73%|███████▎  | 3637/5000 [14:54<06:07,  3.71it/s]

steps= 137


 73%|███████▎  | 3638/5000 [14:54<06:29,  3.50it/s]

steps= 84


 73%|███████▎  | 3639/5000 [14:55<07:39,  2.96it/s]

steps= 88


 73%|███████▎  | 3640/5000 [14:56<10:01,  2.26it/s]

steps= 147


 73%|███████▎  | 3641/5000 [14:57<14:03,  1.61it/s]

steps= 385


 73%|███████▎  | 3642/5000 [14:57<15:10,  1.49it/s]

steps= 262


 73%|███████▎  | 3643/5000 [14:58<12:40,  1.78it/s]

steps= 147


 73%|███████▎  | 3644/5000 [14:58<14:11,  1.59it/s]

steps= 294


 73%|███████▎  | 3645/5000 [14:59<12:16,  1.84it/s]

steps= 125


 73%|███████▎  | 3646/5000 [14:59<10:49,  2.08it/s]

steps= 154


 73%|███████▎  | 3647/5000 [15:00<11:49,  1.91it/s]

steps= 172


 73%|███████▎  | 3648/5000 [15:00<11:38,  1.93it/s]

steps= 206


 73%|███████▎  | 3650/5000 [15:01<11:30,  1.95it/s]

steps= 370
steps= 26


 73%|███████▎  | 3651/5000 [15:02<10:51,  2.07it/s]

steps= 290


 73%|███████▎  | 3653/5000 [15:02<08:27,  2.65it/s]

steps= 155
steps= 76


 73%|███████▎  | 3654/5000 [15:03<07:12,  3.11it/s]

steps= 55


 73%|███████▎  | 3656/5000 [15:03<07:24,  3.02it/s]

steps= 339
steps= 99


 73%|███████▎  | 3657/5000 [15:04<06:31,  3.43it/s]

steps= 68


 73%|███████▎  | 3659/5000 [15:04<06:26,  3.47it/s]

steps= 209
steps= 72


 73%|███████▎  | 3660/5000 [15:04<05:17,  4.22it/s]

steps= 45
steps= 28


 73%|███████▎  | 3662/5000 [15:05<06:02,  3.69it/s]

steps= 473


 73%|███████▎  | 3663/5000 [15:05<06:54,  3.23it/s]

steps= 205


 73%|███████▎  | 3664/5000 [15:06<08:30,  2.62it/s]

steps= 220
steps= 9


 73%|███████▎  | 3667/5000 [15:06<06:10,  3.60it/s]

steps= 90
steps= 119


 73%|███████▎  | 3668/5000 [15:07<05:06,  4.35it/s]

steps= 39


 73%|███████▎  | 3669/5000 [15:07<05:06,  4.34it/s]

steps= 98


 73%|███████▎  | 3670/5000 [15:08<08:41,  2.55it/s]

steps= 323


 73%|███████▎  | 3671/5000 [15:08<09:19,  2.37it/s]

steps= 241


 73%|███████▎  | 3672/5000 [15:09<09:44,  2.27it/s]

steps= 225


 73%|███████▎  | 3673/5000 [15:09<09:24,  2.35it/s]

steps= 209


 74%|███████▎  | 3675/5000 [15:10<07:35,  2.91it/s]

steps= 261
steps= 78


 74%|███████▎  | 3676/5000 [15:10<06:26,  3.42it/s]

steps= 110
steps= 17


 74%|███████▎  | 3679/5000 [15:10<05:07,  4.29it/s]

steps= 229
steps= 117


 74%|███████▎  | 3680/5000 [15:11<07:39,  2.87it/s]

steps= 412


 74%|███████▎  | 3681/5000 [15:11<08:52,  2.48it/s]

steps= 332


 74%|███████▎  | 3683/5000 [15:12<06:50,  3.21it/s]

steps= 166
steps= 68


 74%|███████▎  | 3685/5000 [15:12<06:11,  3.54it/s]

steps= 143
steps= 82


 74%|███████▎  | 3686/5000 [15:14<11:38,  1.88it/s]

steps= 413


 74%|███████▎  | 3687/5000 [15:14<13:02,  1.68it/s]

steps= 241


 74%|███████▍  | 3688/5000 [15:15<12:00,  1.82it/s]

steps= 93


 74%|███████▍  | 3689/5000 [15:15<11:31,  1.90it/s]

steps= 116


 74%|███████▍  | 3692/5000 [15:16<10:06,  2.16it/s]

steps= 516
steps= 27
steps= 85


 74%|███████▍  | 3693/5000 [15:16<08:02,  2.71it/s]

steps= 42


 74%|███████▍  | 3695/5000 [15:17<07:02,  3.09it/s]

steps= 252
steps= 148


 74%|███████▍  | 3696/5000 [15:17<07:06,  3.05it/s]

steps= 162


 74%|███████▍  | 3697/5000 [15:18<07:49,  2.77it/s]

steps= 212


 74%|███████▍  | 3698/5000 [15:18<09:43,  2.23it/s]

steps= 442
steps= 63


 74%|███████▍  | 3700/5000 [15:19<09:44,  2.22it/s]

steps= 308


 74%|███████▍  | 3701/5000 [15:20<08:25,  2.57it/s]

steps= 124
episode: 3700 episode reward: -1132 eps: 0.6837459543717475 avg reward (last 100): -1098.4554455445545 episode loss:  20527.562
avg reward for last 100 episodes: -1098.4554455445545


 74%|███████▍  | 3702/5000 [15:21<12:03,  1.79it/s]

steps= 359


 74%|███████▍  | 3703/5000 [15:21<14:15,  1.52it/s]

steps= 481


 74%|███████▍  | 3704/5000 [15:22<11:22,  1.90it/s]

steps= 72


 74%|███████▍  | 3705/5000 [15:22<12:09,  1.77it/s]

steps= 563


 74%|███████▍  | 3706/5000 [15:23<11:01,  1.95it/s]

steps= 156


 74%|███████▍  | 3707/5000 [15:23<09:57,  2.17it/s]

steps= 157


 74%|███████▍  | 3709/5000 [15:24<08:39,  2.49it/s]

steps= 369
steps= 43


 74%|███████▍  | 3710/5000 [15:24<06:55,  3.10it/s]

steps= 70


 74%|███████▍  | 3711/5000 [15:24<08:18,  2.59it/s]

steps= 514


 74%|███████▍  | 3712/5000 [15:25<08:06,  2.65it/s]

steps= 195


 74%|███████▍  | 3713/5000 [15:25<08:08,  2.63it/s]

steps= 119


 74%|███████▍  | 3715/5000 [15:26<06:39,  3.22it/s]

steps= 213
steps= 61


 74%|███████▍  | 3717/5000 [15:26<05:12,  4.11it/s]

steps= 36
steps= 32


 74%|███████▍  | 3719/5000 [15:27<04:40,  4.57it/s]

steps= 166
steps= 86


 74%|███████▍  | 3720/5000 [15:27<04:39,  4.59it/s]

steps= 75


 74%|███████▍  | 3721/5000 [15:28<08:29,  2.51it/s]

steps= 679


 74%|███████▍  | 3722/5000 [15:28<10:56,  1.95it/s]

steps= 513


 74%|███████▍  | 3723/5000 [15:29<09:44,  2.18it/s]

steps= 224


 74%|███████▍  | 3724/5000 [15:29<09:06,  2.34it/s]

steps= 129


 74%|███████▍  | 3725/5000 [15:30<09:34,  2.22it/s]

steps= 293


 75%|███████▍  | 3726/5000 [15:30<08:50,  2.40it/s]

steps= 199


 75%|███████▍  | 3729/5000 [15:30<06:26,  3.28it/s]

steps= 183
steps= 28
steps= 20


 75%|███████▍  | 3731/5000 [15:32<09:53,  2.14it/s]

steps= 739
steps= 98


 75%|███████▍  | 3732/5000 [15:32<09:55,  2.13it/s]

steps= 247


 75%|███████▍  | 3733/5000 [15:33<09:41,  2.18it/s]

steps= 121


 75%|███████▍  | 3734/5000 [15:33<10:02,  2.10it/s]

steps= 206


 75%|███████▍  | 3735/5000 [15:34<08:34,  2.46it/s]

steps= 91


 75%|███████▍  | 3736/5000 [15:34<09:52,  2.13it/s]

steps= 506


 75%|███████▍  | 3738/5000 [15:35<08:04,  2.60it/s]

steps= 301
steps= 60


 75%|███████▍  | 3740/5000 [15:35<06:34,  3.19it/s]

steps= 383
steps= 48


 75%|███████▍  | 3741/5000 [15:36<06:20,  3.30it/s]

steps= 131


 75%|███████▍  | 3743/5000 [15:37<07:50,  2.67it/s]

steps= 288
steps= 61


 75%|███████▍  | 3745/5000 [15:37<07:00,  2.99it/s]

steps= 326
steps= 68


 75%|███████▍  | 3746/5000 [15:37<06:10,  3.39it/s]

steps= 85


 75%|███████▍  | 3747/5000 [15:39<10:43,  1.95it/s]

steps= 515


 75%|███████▍  | 3749/5000 [15:39<07:33,  2.76it/s]

steps= 148
steps= 114


 75%|███████▌  | 3751/5000 [15:40<06:16,  3.31it/s]

steps= 305
steps= 27


 75%|███████▌  | 3752/5000 [15:40<07:24,  2.81it/s]

steps= 295


 75%|███████▌  | 3753/5000 [15:41<09:00,  2.31it/s]

steps= 367
steps= 27


 75%|███████▌  | 3755/5000 [15:42<09:23,  2.21it/s]

steps= 396


 75%|███████▌  | 3756/5000 [15:42<08:01,  2.58it/s]

steps= 127


 75%|███████▌  | 3758/5000 [15:42<06:45,  3.06it/s]

steps= 172
steps= 71


 75%|███████▌  | 3759/5000 [15:43<06:05,  3.40it/s]

steps= 82


 75%|███████▌  | 3760/5000 [15:43<06:14,  3.31it/s]

steps= 225
steps= 27


 75%|███████▌  | 3763/5000 [15:43<04:28,  4.61it/s]

steps= 55
steps= 68


 75%|███████▌  | 3764/5000 [15:44<06:30,  3.17it/s]

steps= 409


 75%|███████▌  | 3765/5000 [15:44<06:18,  3.27it/s]

steps= 94


 75%|███████▌  | 3766/5000 [15:44<06:18,  3.26it/s]

steps= 88


 75%|███████▌  | 3768/5000 [15:45<04:49,  4.26it/s]

steps= 86
steps= 43


 75%|███████▌  | 3769/5000 [15:45<05:45,  3.56it/s]

steps= 263


 75%|███████▌  | 3770/5000 [15:46<06:06,  3.36it/s]

steps= 156


 75%|███████▌  | 3772/5000 [15:46<05:56,  3.45it/s]

steps= 217
steps= 27


 75%|███████▌  | 3773/5000 [15:47<06:35,  3.11it/s]

steps= 203


 75%|███████▌  | 3774/5000 [15:47<07:16,  2.81it/s]

steps= 132


 76%|███████▌  | 3776/5000 [15:48<07:26,  2.74it/s]

steps= 337
steps= 29


 76%|███████▌  | 3777/5000 [15:48<05:54,  3.45it/s]

steps= 20


 76%|███████▌  | 3778/5000 [15:48<06:12,  3.28it/s]

steps= 164


 76%|███████▌  | 3779/5000 [15:49<08:27,  2.40it/s]

steps= 378


 76%|███████▌  | 3780/5000 [15:49<08:16,  2.46it/s]

steps= 155


 76%|███████▌  | 3782/5000 [15:51<09:01,  2.25it/s]

steps= 632
steps= 47
steps= 35


 76%|███████▌  | 3784/5000 [15:51<07:46,  2.61it/s]

steps= 293


 76%|███████▌  | 3786/5000 [15:52<06:35,  3.07it/s]

steps= 196
steps= 25


 76%|███████▌  | 3787/5000 [15:52<07:03,  2.87it/s]

steps= 232


 76%|███████▌  | 3788/5000 [15:53<08:41,  2.32it/s]

steps= 326
steps= 

 76%|███████▌  | 3789/5000 [15:53<07:20,  2.75it/s]

83


 76%|███████▌  | 3790/5000 [15:54<09:17,  2.17it/s]

steps= 331
steps= 12


 76%|███████▌  | 3793/5000 [15:54<06:44,  2.99it/s]

steps= 308
steps= 91


 76%|███████▌  | 3794/5000 [15:55<09:33,  2.10it/s]

steps= 281


 76%|███████▌  | 3795/5000 [15:55<08:45,  2.29it/s]

steps= 113


 76%|███████▌  | 3797/5000 [15:56<09:23,  2.13it/s]

steps= 484
steps= 60


 76%|███████▌  | 3798/5000 [15:57<09:48,  2.04it/s]

steps= 236


 76%|███████▌  | 3799/5000 [15:58<11:59,  1.67it/s]

steps= 442


 76%|███████▌  | 3801/5000 [15:59<08:51,  2.26it/s]

steps= 192
steps= 134
episode: 3800 episode reward: -1142 eps: 0.6769422299586498 avg reward (last 100): -1188.9405940594058 episode loss:  25065.508
avg reward for last 100 episodes: -1188.9405940594058


 76%|███████▌  | 3802/5000 [15:59<06:59,  2.86it/s]

steps= 41


 76%|███████▌  | 3803/5000 [15:59<06:21,  3.14it/s]

steps= 113


 76%|███████▌  | 3804/5000 [15:59<07:34,  2.63it/s]

steps= 377


 76%|███████▌  | 3805/5000 [16:00<07:55,  2.51it/s]

steps= 174


 76%|███████▌  | 3806/5000 [16:00<07:26,  2.67it/s]

steps= 183


 76%|███████▌  | 3807/5000 [16:01<08:08,  2.44it/s]

steps= 440


 76%|███████▌  | 3810/5000 [16:02<09:01,  2.20it/s]

steps= 783
steps= 46
steps= 62


 76%|███████▌  | 3811/5000 [16:02<09:05,  2.18it/s]

steps= 415


 76%|███████▋  | 3813/5000 [16:03<07:00,  2.82it/s]

steps= 132
steps= 88


 76%|███████▋  | 3814/5000 [16:04<08:21,  2.37it/s]

steps= 397
steps= 28


 76%|███████▋  | 3816/5000 [16:05<08:48,  2.24it/s]

steps= 729


 76%|███████▋  | 3817/5000 [16:05<08:50,  2.23it/s]

steps= 265


 76%|███████▋  | 3818/5000 [16:05<08:35,  2.29it/s]

steps= 250


 76%|███████▋  | 3819/5000 [16:06<11:45,  1.67it/s]

steps= 621


 76%|███████▋  | 3820/5000 [16:07<13:02,  1.51it/s]

steps= 564


 76%|███████▋  | 3821/5000 [16:08<11:33,  1.70it/s]

steps= 227


 76%|███████▋  | 3823/5000 [16:08<07:55,  2.48it/s]

steps= 100
steps= 159


 76%|███████▋  | 3824/5000 [16:08<06:22,  3.07it/s]

steps= 76


 77%|███████▋  | 3826/5000 [16:09<05:25,  3.61it/s]

steps= 108
steps= 131


 77%|███████▋  | 3827/5000 [16:09<05:35,  3.50it/s]

steps= 118


 77%|███████▋  | 3828/5000 [16:10<06:54,  2.83it/s]

steps= 298


 77%|███████▋  | 3829/5000 [16:10<07:02,  2.77it/s]

steps= 196


 77%|███████▋  | 3830/5000 [16:10<07:30,  2.60it/s]

steps= 235


 77%|███████▋  | 3831/5000 [16:11<07:32,  2.59it/s]

steps= 113


 77%|███████▋  | 3833/5000 [16:13<13:24,  1.45it/s]

steps= 1271
steps= 45


 77%|███████▋  | 3835/5000 [16:13<08:26,  2.30it/s]

steps= 108
steps= 50


 77%|███████▋  | 3836/5000 [16:14<08:08,  2.38it/s]

steps= 91


 77%|███████▋  | 3837/5000 [16:14<09:36,  2.02it/s]

steps= 257


 77%|███████▋  | 3838/5000 [16:15<11:16,  1.72it/s]

steps= 385


 77%|███████▋  | 3839/5000 [16:16<09:59,  1.94it/s]

steps= 133


 77%|███████▋  | 3840/5000 [16:16<08:30,  2.27it/s]

steps= 77


 77%|███████▋  | 3841/5000 [16:17<09:55,  1.95it/s]

steps= 448


 77%|███████▋  | 3843/5000 [16:17<06:39,  2.90it/s]

steps= 67
steps= 42


 77%|███████▋  | 3845/5000 [16:17<05:05,  3.78it/s]

steps= 192
steps= 100


 77%|███████▋  | 3846/5000 [16:18<05:52,  3.27it/s]

steps= 105


 77%|███████▋  | 3847/5000 [16:18<06:17,  3.05it/s]

steps= 142


 77%|███████▋  | 3848/5000 [16:18<06:21,  3.02it/s]

steps= 123


 77%|███████▋  | 3849/5000 [16:19<08:34,  2.24it/s]

steps= 430


 77%|███████▋  | 3851/5000 [16:20<06:36,  2.90it/s]

steps= 253
steps= 109


 77%|███████▋  | 3853/5000 [16:20<05:27,  3.50it/s]

steps= 179
steps= 64


 77%|███████▋  | 3854/5000 [16:21<05:38,  3.39it/s]

steps= 200


 77%|███████▋  | 3855/5000 [16:21<05:13,  3.65it/s]

steps= 63


 77%|███████▋  | 3856/5000 [16:22<08:31,  2.24it/s]

steps= 258


 77%|███████▋  | 3857/5000 [16:22<08:08,  2.34it/s]

steps= 251


 77%|███████▋  | 3858/5000 [16:22<07:27,  2.55it/s]

steps= 79


 77%|███████▋  | 3859/5000 [16:23<08:02,  2.36it/s]

steps= 215


 77%|███████▋  | 3861/5000 [16:23<06:55,  2.74it/s]

steps= 237
steps= 117


 77%|███████▋  | 3863/5000 [16:24<06:34,  2.88it/s]

steps= 418
steps= 45


 77%|███████▋  | 3864/5000 [16:24<06:04,  3.11it/s]

steps= 100


 77%|███████▋  | 3866/5000 [16:25<05:47,  3.26it/s]

steps= 231
steps= 28


 77%|███████▋  | 3867/5000 [16:25<05:11,  3.63it/s]

steps= 98


 77%|███████▋  | 3868/5000 [16:26<05:14,  3.60it/s]

steps= 111


 77%|███████▋  | 3869/5000 [16:26<05:41,  3.31it/s]

steps= 110


 77%|███████▋  | 3871/5000 [16:27<05:31,  3.40it/s]

steps= 252
steps= 81


 77%|███████▋  | 3873/5000 [16:27<06:00,  3.12it/s]

steps= 421
steps= 125


 78%|███████▊  | 3876/5000 [16:28<04:23,  4.27it/s]

steps= 122
steps= 25
steps= 52


 78%|███████▊  | 3877/5000 [16:28<03:48,  4.92it/s]

steps= 54


 78%|███████▊  | 3878/5000 [16:28<04:31,  4.13it/s]

steps= 219


 78%|███████▊  | 3879/5000 [16:29<06:22,  2.93it/s]

steps= 434


 78%|███████▊  | 3880/5000 [16:29<06:23,  2.92it/s]

steps= 249


 78%|███████▊  | 3881/5000 [16:30<07:15,  2.57it/s]

steps= 399


 78%|███████▊  | 3882/5000 [16:30<07:45,  2.40it/s]

steps= 203
steps= 16


 78%|███████▊  | 3885/5000 [16:31<05:19,  3.49it/s]

steps= 77
steps= 66


 78%|███████▊  | 3887/5000 [16:31<04:15,  4.35it/s]

steps= 177
steps= 29


 78%|███████▊  | 3888/5000 [16:32<06:06,  3.03it/s]

steps= 373


 78%|███████▊  | 3889/5000 [16:32<05:51,  3.16it/s]

steps= 102


 78%|███████▊  | 3890/5000 [16:32<07:29,  2.47it/s]

steps= 481


 78%|███████▊  | 3892/5000 [16:33<06:27,  2.86it/s]

steps= 275
steps= 68


 78%|███████▊  | 3893/5000 [16:33<06:36,  2.79it/s]

steps= 139


 78%|███████▊  | 3894/5000 [16:34<07:45,  2.37it/s]

steps= 269


 78%|███████▊  | 3895/5000 [16:34<07:04,  2.60it/s]

steps= 190


 78%|███████▊  | 3896/5000 [16:35<08:52,  2.07it/s]

steps= 555


 78%|███████▊  | 3897/5000 [16:35<07:44,  2.38it/s]

steps= 57


 78%|███████▊  | 3898/5000 [16:36<07:16,  2.52it/s]

steps= 41


 78%|███████▊  | 3900/5000 [16:36<06:29,  2.82it/s]

steps= 236
steps= 122


 78%|███████▊  | 3901/5000 [16:37<07:01,  2.61it/s]

steps= 258
episode: 3900 episode reward: -1266 eps: 0.6702062071028237 avg reward (last 100): -1136.5544554455446 episode loss:  26591.527
avg reward for last 100 episodes: -1136.5544554455446


 78%|███████▊  | 3903/5000 [16:37<06:07,  2.99it/s]

steps= 321
steps= 72


 78%|███████▊  | 3904/5000 [16:38<07:11,  2.54it/s]

steps= 359


 78%|███████▊  | 3905/5000 [16:38<06:55,  2.64it/s]

steps= 132


 78%|███████▊  | 3906/5000 [16:39<06:28,  2.82it/s]

steps= 173


 78%|███████▊  | 3907/5000 [16:39<09:04,  2.01it/s]

steps= 594


 78%|███████▊  | 3908/5000 [16:40<07:47,  2.34it/s]

steps= 89


 78%|███████▊  | 3909/5000 [16:41<11:17,  1.61it/s]

steps= 462


 78%|███████▊  | 3910/5000 [16:41<09:14,  1.97it/s]

steps= 166


 78%|███████▊  | 3911/5000 [16:41<08:38,  2.10it/s]

steps= 186


 78%|███████▊  | 3913/5000 [16:42<06:16,  2.89it/s]

steps= 189
steps= 109


 78%|███████▊  | 3914/5000 [16:42<06:34,  2.76it/s]

steps= 180


 78%|███████▊  | 3915/5000 [16:43<06:50,  2.65it/s]

steps= 250
steps= 28


 78%|███████▊  | 3917/5000 [16:43<05:58,  3.02it/s]

steps= 214


 78%|███████▊  | 3918/5000 [16:44<06:47,  2.66it/s]

steps= 172


 78%|███████▊  | 3920/5000 [16:44<06:29,  2.77it/s]

steps= 225
steps= 56


 78%|███████▊  | 3921/5000 [16:44<05:10,  3.47it/s]

steps= 33


 78%|███████▊  | 3922/5000 [16:45<05:46,  3.11it/s]

steps= 184


 78%|███████▊  | 3924/5000 [16:45<04:46,  3.75it/s]

steps= 176
steps= 164


 78%|███████▊  | 3925/5000 [16:46<04:19,  4.14it/s]

steps= 49


 79%|███████▊  | 3926/5000 [16:46<05:29,  3.26it/s]

steps= 243


 79%|███████▊  | 3927/5000 [16:46<06:04,  2.94it/s]

steps= 249


 79%|███████▊  | 3928/5000 [16:47<05:31,  3.23it/s]

steps= 55


 79%|███████▊  | 3929/5000 [16:47<07:51,  2.27it/s]

steps= 446
steps= 37


 79%|███████▊  | 3932/5000 [16:48<05:22,  3.31it/s]

steps= 124
steps= 95


 79%|███████▊  | 3933/5000 [16:48<04:25,  4.01it/s]

steps= 46


 79%|███████▊  | 3934/5000 [16:48<04:54,  3.62it/s]

steps= 121


 79%|███████▊  | 3935/5000 [16:49<05:55,  2.99it/s]

steps= 244


 79%|███████▊  | 3937/5000 [16:50<06:19,  2.80it/s]

steps= 307
steps= 80


 79%|███████▉  | 3938/5000 [16:50<06:05,  2.91it/s]

steps= 127


 79%|███████▉  | 3939/5000 [16:50<06:20,  2.79it/s]

steps= 227


 79%|███████▉  | 3941/5000 [16:51<04:58,  3.55it/s]

steps= 102
steps= 35


 79%|███████▉  | 3942/5000 [16:51<04:51,  3.63it/s]

steps= 62


 79%|███████▉  | 3943/5000 [16:53<11:15,  1.57it/s]

steps= 474


 79%|███████▉  | 3944/5000 [16:53<10:46,  1.63it/s]

steps= 229


 79%|███████▉  | 3945/5000 [16:54<10:24,  1.69it/s]

steps= 74


 79%|███████▉  | 3946/5000 [16:54<10:20,  1.70it/s]

steps= 155


 79%|███████▉  | 3947/5000 [16:55<09:49,  1.79it/s]

steps= 320


 79%|███████▉  | 3948/5000 [16:55<09:46,  1.79it/s]

steps= 268


 79%|███████▉  | 3949/5000 [16:56<08:22,  2.09it/s]

steps= 130


 79%|███████▉  | 3951/5000 [16:56<07:03,  2.48it/s]

steps= 311
steps= 65


 79%|███████▉  | 3953/5000 [16:57<05:44,  3.04it/s]

steps= 265
steps= 50


 79%|███████▉  | 3954/5000 [16:57<07:02,  2.47it/s]

steps= 298


 79%|███████▉  | 3955/5000 [16:58<08:25,  2.07it/s]

steps= 383


 79%|███████▉  | 3956/5000 [16:59<08:34,  2.03it/s]

steps= 319


 79%|███████▉  | 3957/5000 [16:59<07:36,  2.28it/s]

steps= 171


 79%|███████▉  | 3959/5000 [17:00<06:35,  2.63it/s]

steps= 344
steps= 54


 79%|███████▉  | 3960/5000 [17:00<06:14,  2.78it/s]

steps= 117


 79%|███████▉  | 3961/5000 [17:01<08:01,  2.16it/s]

steps= 477


 79%|███████▉  | 3962/5000 [17:01<08:39,  2.00it/s]

steps= 365


 79%|███████▉  | 3964/5000 [17:02<07:15,  2.38it/s]

steps= 351
steps= 37


 79%|███████▉  | 3965/5000 [17:03<08:27,  2.04it/s]

steps= 416


 79%|███████▉  | 3966/5000 [17:04<12:22,  1.39it/s]

steps= 762


 79%|███████▉  | 3967/5000 [17:04<10:58,  1.57it/s]

steps= 275


 79%|███████▉  | 3968/5000 [17:05<09:26,  1.82it/s]

steps= 204


 79%|███████▉  | 3969/5000 [17:06<10:34,  1.62it/s]

steps= 609


 79%|███████▉  | 3970/5000 [17:06<08:55,  1.92it/s]

steps= 132


 79%|███████▉  | 3971/5000 [17:06<07:47,  2.20it/s]

steps= 125


 79%|███████▉  | 3973/5000 [17:07<05:45,  2.97it/s]

steps= 149
steps= 221


 79%|███████▉  | 3974/5000 [17:07<05:20,  3.20it/s]

steps= 102


 80%|███████▉  | 3975/5000 [17:07<05:19,  3.21it/s]

steps= 97


 80%|███████▉  | 3976/5000 [17:07<05:06,  3.34it/s]

steps= 131


 80%|███████▉  | 3977/5000 [17:08<08:34,  1.99it/s]

steps= 517


 80%|███████▉  | 3979/5000 [17:09<05:59,  2.84it/s]

steps= 129
steps= 95


 80%|███████▉  | 3981/5000 [17:11<09:46,  1.74it/s]

steps= 836
steps= 72


 80%|███████▉  | 3983/5000 [17:12<08:13,  2.06it/s]

steps= 317
steps= 75


 80%|███████▉  | 3985/5000 [17:12<05:48,  2.91it/s]

steps= 66
steps= 112


 80%|███████▉  | 3986/5000 [17:12<05:39,  2.99it/s]

steps= 224


 80%|███████▉  | 3988/5000 [17:13<05:17,  3.18it/s]

steps= 453
steps= 66


 80%|███████▉  | 3989/5000 [17:13<05:18,  3.18it/s]

steps= 168


 80%|███████▉  | 3990/5000 [17:14<05:02,  3.33it/s]

steps= 148


 80%|███████▉  | 3991/5000 [17:14<04:50,  3.48it/s]

steps= 141


 80%|███████▉  | 3992/5000 [17:14<05:16,  3.18it/s]

steps= 283


 80%|███████▉  | 3994/5000 [17:15<05:24,  3.10it/s]

steps= 583
steps= 108


 80%|███████▉  | 3995/5000 [17:15<05:59,  2.79it/s]

steps= 253


 80%|███████▉  | 3997/5000 [17:16<04:50,  3.45it/s]

steps= 135
steps= 81


 80%|███████▉  | 3998/5000 [17:17<07:02,  2.37it/s]

steps= 529


 80%|███████▉  | 3999/5000 [17:17<06:43,  2.48it/s]

steps= 191


 80%|████████  | 4000/5000 [17:17<06:30,  2.56it/s]

steps= 83


 80%|████████  | 4001/5000 [17:18<05:57,  2.79it/s]

steps= 100
episode: 4000 episode reward: -1108 eps: 0.6635372121290035 avg reward (last 100): -1064.2970297029703 episode loss:  21275.162
avg reward for last 100 episodes: -1064.2970297029703


 80%|████████  | 4003/5000 [17:18<05:11,  3.21it/s]

steps= 162
steps= 240


 80%|████████  | 4005/5000 [17:18<03:55,  4.23it/s]

steps= 117
steps= 14


 80%|████████  | 4006/5000 [17:18<03:48,  4.36it/s]

steps= 227


 80%|████████  | 4007/5000 [17:19<03:50,  4.31it/s]

steps= 271


 80%|████████  | 4008/5000 [17:19<05:53,  2.81it/s]

steps= 352


 80%|████████  | 4009/5000 [17:20<07:38,  2.16it/s]

steps= 344


 80%|████████  | 4010/5000 [17:20<06:20,  2.60it/s]

steps= 199


 80%|████████  | 4011/5000 [17:21<07:21,  2.24it/s]

steps= 620
steps= 5


 80%|████████  | 4013/5000 [17:21<06:03,  2.72it/s]

steps= 226


 80%|████████  | 4016/5000 [17:22<04:34,  3.58it/s]

steps= 415
steps= 73
steps= 82


 80%|████████  | 4018/5000 [17:22<03:57,  4.13it/s]

steps= 242
steps= 134


 80%|████████  | 4020/5000 [17:22<02:58,  5.48it/s]

steps= 118
steps= 76


 80%|████████  | 4021/5000 [17:23<03:12,  5.10it/s]

steps= 178


 80%|████████  | 4023/5000 [17:24<05:05,  3.20it/s]

steps= 358
steps= 67


 80%|████████  | 4024/5000 [17:24<05:18,  3.06it/s]

steps= 245


 80%|████████  | 4025/5000 [17:24<05:27,  2.98it/s]

steps= 243


 81%|████████  | 4027/5000 [17:25<04:53,  3.31it/s]

steps= 127
steps= 84


 81%|████████  | 4028/5000 [17:25<05:29,  2.95it/s]

steps= 245


 81%|████████  | 4029/5000 [17:26<06:41,  2.42it/s]

steps= 463
steps= 12


 81%|████████  | 4031/5000 [17:27<06:29,  2.49it/s]

steps= 356


 81%|████████  | 4032/5000 [17:27<05:57,  2.71it/s]

steps= 141


 81%|████████  | 4033/5000 [17:28<09:28,  1.70it/s]

steps= 656


 81%|████████  | 4034/5000 [17:29<10:24,  1.55it/s]

steps= 562


 81%|████████  | 4035/5000 [17:29<09:48,  1.64it/s]

steps= 246


 81%|████████  | 4036/5000 [17:30<08:11,  1.96it/s]

steps= 102


 81%|████████  | 4037/5000 [17:30<07:46,  2.07it/s]

steps= 139


 81%|████████  | 4039/5000 [17:31<06:57,  2.30it/s]

steps= 237
steps= 75


 81%|████████  | 4040/5000 [17:31<06:43,  2.38it/s]

steps= 104


 81%|████████  | 4041/5000 [17:32<06:42,  2.38it/s]

steps= 129


 81%|████████  | 4043/5000 [17:33<06:23,  2.49it/s]

steps= 320
steps= 38


 81%|████████  | 4045/5000 [17:33<04:28,  3.56it/s]

steps= 39
steps= 67


 81%|████████  | 4046/5000 [17:33<04:03,  3.92it/s]

steps= 51


 81%|████████  | 4047/5000 [17:34<05:03,  3.15it/s]

steps= 180


 81%|████████  | 4048/5000 [17:34<06:07,  2.59it/s]

steps= 293


 81%|████████  | 4049/5000 [17:35<06:02,  2.62it/s]

steps= 199


 81%|████████  | 4050/5000 [17:35<05:58,  2.65it/s]

steps= 232


 81%|████████  | 4051/5000 [17:36<07:11,  2.20it/s]

steps= 394


 81%|████████  | 4052/5000 [17:36<06:54,  2.29it/s]

steps= 143


 81%|████████  | 4053/5000 [17:36<06:09,  2.56it/s]

steps= 103


 81%|████████  | 4054/5000 [17:37<06:20,  2.49it/s]

steps= 199


 81%|████████  | 4055/5000 [17:37<06:33,  2.40it/s]

steps= 220


 81%|████████  | 4056/5000 [17:38<06:39,  2.36it/s]

steps= 169


 81%|████████  | 4057/5000 [17:38<06:37,  2.37it/s]

steps= 148


 81%|████████  | 4058/5000 [17:38<06:19,  2.48it/s]

steps= 100
steps= 17


 81%|████████  | 4060/5000 [17:39<04:59,  3.14it/s]

steps= 105


 81%|████████  | 4062/5000 [17:39<05:42,  2.74it/s]

steps= 393
steps= 41


 81%|████████▏ | 4063/5000 [17:40<04:39,  3.36it/s]

steps= 76


 81%|████████▏ | 4064/5000 [17:40<04:25,  3.53it/s]

steps= 114


 81%|████████▏ | 4065/5000 [17:40<04:14,  3.68it/s]

steps= 83


 81%|████████▏ | 4066/5000 [17:41<07:18,  2.13it/s]

steps= 418


 81%|████████▏ | 4068/5000 [17:41<05:02,  3.08it/s]

steps= 108
steps= 37


 81%|████████▏ | 4069/5000 [17:42<04:22,  3.54it/s]

steps= 64


 81%|████████▏ | 4070/5000 [17:42<05:31,  2.81it/s]

steps= 247


 81%|████████▏ | 4073/5000 [17:43<05:01,  3.07it/s]

steps= 235
steps= 34
steps= 16


 81%|████████▏ | 4074/5000 [17:43<06:12,  2.49it/s]

steps= 500


 82%|████████▏ | 4075/5000 [17:44<06:41,  2.30it/s]

steps= 256


 82%|████████▏ | 4078/5000 [17:44<04:32,  3.38it/s]

steps= 126
steps= 35
steps= 78


 82%|████████▏ | 4080/5000 [17:45<04:02,  3.79it/s]

steps= 169
steps= 75


 82%|████████▏ | 4081/5000 [17:45<04:38,  3.30it/s]

steps= 292


 82%|████████▏ | 4082/5000 [17:46<05:31,  2.77it/s]

steps= 217


 82%|████████▏ | 4083/5000 [17:46<05:24,  2.82it/s]

steps= 159


 82%|████████▏ | 4085/5000 [17:47<04:16,  3.57it/s]

steps= 151
steps= 91


 82%|████████▏ | 4086/5000 [17:47<04:54,  3.10it/s]

steps= 239


 82%|████████▏ | 4087/5000 [17:47<04:43,  3.22it/s]

steps= 95


 82%|████████▏ | 4088/5000 [17:48<04:15,  3.57it/s]

steps= 62


 82%|████████▏ | 4090/5000 [17:48<03:31,  4.30it/s]

steps= 66
steps= 195


 82%|████████▏ | 4091/5000 [17:48<03:30,  4.33it/s]

steps= 158


 82%|████████▏ | 4093/5000 [17:49<03:30,  4.31it/s]

steps= 140
steps= 52


 82%|████████▏ | 4095/5000 [17:49<03:14,  4.65it/s]

steps= 83
steps= 38


 82%|████████▏ | 4096/5000 [17:49<03:40,  4.10it/s]

steps= 58


 82%|████████▏ | 4098/5000 [17:50<03:45,  4.00it/s]

steps= 157
steps= 149
steps= 4


 82%|████████▏ | 4100/5000 [17:51<04:48,  3.12it/s]

steps= 549


 82%|████████▏ | 4101/5000 [17:52<06:02,  2.48it/s]

steps= 222
episode: 4100 episode reward: -1230 eps: 0.656934578065436 avg reward (last 100): -1147.3663366336634 episode loss:  28226.45
avg reward for last 100 episodes: -1147.3663366336634


 82%|████████▏ | 4102/5000 [17:52<07:18,  2.05it/s]

steps= 344


 82%|████████▏ | 4103/5000 [17:53<07:26,  2.01it/s]

steps= 195


 82%|████████▏ | 4104/5000 [17:53<06:10,  2.42it/s]

steps= 78


 82%|████████▏ | 4106/5000 [17:54<05:49,  2.56it/s]

steps= 341
steps= 115


 82%|████████▏ | 4107/5000 [17:54<04:57,  3.00it/s]

steps= 73


 82%|████████▏ | 4109/5000 [17:55<04:43,  3.14it/s]

steps= 167
steps= 61


 82%|████████▏ | 4110/5000 [17:55<05:14,  2.83it/s]

steps= 169


 82%|████████▏ | 4111/5000 [17:55<04:54,  3.02it/s]

steps= 96


 82%|████████▏ | 4112/5000 [17:56<04:57,  2.98it/s]

steps= 172


 82%|████████▏ | 4113/5000 [17:56<05:58,  2.47it/s]

steps= 306


 82%|████████▏ | 4115/5000 [17:57<05:16,  2.79it/s]

steps= 267
steps= 73


 82%|████████▏ | 4116/5000 [17:58<07:32,  1.95it/s]

steps= 559


 82%|████████▏ | 4117/5000 [17:58<06:39,  2.21it/s]

steps= 229
steps= 27


 82%|████████▏ | 4119/5000 [17:58<05:15,  2.79it/s]

steps= 121


 82%|████████▏ | 4120/5000 [17:59<04:52,  3.01it/s]

steps= 124


 82%|████████▏ | 4121/5000 [17:59<04:57,  2.96it/s]

steps= 99


 82%|████████▏ | 4122/5000 [18:00<05:56,  2.46it/s]

steps= 279


 82%|████████▏ | 4123/5000 [18:00<05:48,  2.52it/s]

steps= 147
steps= 16


 82%|████████▎ | 4125/5000 [18:00<04:42,  3.10it/s]

steps= 113


 83%|████████▎ | 4126/5000 [18:01<07:04,  2.06it/s]

steps= 299


 83%|████████▎ | 4127/5000 [18:02<07:01,  2.07it/s]

steps= 261


 83%|████████▎ | 4128/5000 [18:02<05:56,  2.45it/s]

steps= 76


 83%|████████▎ | 4129/5000 [18:02<05:04,  2.86it/s]

steps= 103


 83%|████████▎ | 4130/5000 [18:02<05:26,  2.66it/s]

steps= 278


 83%|████████▎ | 4132/5000 [18:03<04:23,  3.30it/s]

steps= 137
steps= 39


 83%|████████▎ | 4134/5000 [18:03<03:12,  4.50it/s]

steps= 89
steps= 145


 83%|████████▎ | 4135/5000 [18:04<03:44,  3.86it/s]

steps= 57


 83%|████████▎ | 4136/5000 [18:04<05:41,  2.53it/s]

steps= 265


 83%|████████▎ | 4137/5000 [18:05<06:11,  2.32it/s]

steps= 267
steps= 12


 83%|████████▎ | 4139/5000 [18:05<05:43,  2.51it/s]

steps= 139
steps= 56


 83%|████████▎ | 4141/5000 [18:06<04:57,  2.89it/s]

steps= 115


 83%|████████▎ | 4142/5000 [18:07<08:33,  1.67it/s]

steps= 350


 83%|████████▎ | 4144/5000 [18:08<05:38,  2.53it/s]

steps= 56
steps= 33


 83%|████████▎ | 4145/5000 [18:08<07:16,  1.96it/s]

steps= 209


 83%|████████▎ | 4146/5000 [18:09<07:00,  2.03it/s]

steps= 181
steps= 31


 83%|████████▎ | 4149/5000 [18:09<04:24,  3.22it/s]

steps= 76
steps= 31


 83%|████████▎ | 4150/5000 [18:10<05:06,  2.78it/s]

steps= 102


 83%|████████▎ | 4151/5000 [18:12<11:13,  1.26it/s]

steps= 527


 83%|████████▎ | 4152/5000 [18:12<10:11,  1.39it/s]

steps= 118


 83%|████████▎ | 4153/5000 [18:13<08:49,  1.60it/s]

steps= 84


 83%|████████▎ | 4154/5000 [18:13<08:53,  1.59it/s]

steps= 219


 83%|████████▎ | 4155/5000 [18:14<07:51,  1.79it/s]

steps= 183


 83%|████████▎ | 4156/5000 [18:15<10:01,  1.40it/s]

steps= 492


 83%|████████▎ | 4157/5000 [18:15<07:53,  1.78it/s]

steps= 72
steps= 42


 83%|████████▎ | 4160/5000 [18:15<05:13,  2.68it/s]

steps= 200
steps= 120


 83%|████████▎ | 4161/5000 [18:16<05:59,  2.33it/s]

steps= 193


 83%|████████▎ | 4163/5000 [18:16<04:08,  3.37it/s]

steps= 82
steps= 30


 83%|████████▎ | 4164/5000 [18:17<03:32,  3.94it/s]

steps= 80


 83%|████████▎ | 4165/5000 [18:17<04:50,  2.87it/s]

steps= 336
steps= 47


 83%|████████▎ | 4168/5000 [18:18<04:08,  3.34it/s]

steps= 349
steps= 104


 83%|████████▎ | 4169/5000 [18:19<05:14,  2.64it/s]

steps= 424


 83%|████████▎ | 4170/5000 [18:19<07:26,  1.86it/s]

steps= 253


 83%|████████▎ | 4172/5000 [18:20<06:00,  2.30it/s]

steps= 251
steps= 54


 83%|████████▎ | 4173/5000 [18:21<07:13,  1.91it/s]

steps= 237
steps= 22
steps= 102


 84%|████████▎ | 4177/5000 [18:22<04:40,  2.94it/s]

steps= 324
steps= 82


 84%|████████▎ | 4179/5000 [18:22<03:18,  4.14it/s]

steps= 106
steps= 37


 84%|████████▎ | 4180/5000 [18:23<05:27,  2.51it/s]

steps= 235


 84%|████████▎ | 4181/5000 [18:23<05:28,  2.49it/s]

steps= 110


 84%|████████▎ | 4182/5000 [18:24<07:22,  1.85it/s]

steps= 338


 84%|████████▎ | 4184/5000 [18:25<05:38,  2.41it/s]

steps= 342
steps= 75


 84%|████████▎ | 4186/5000 [18:25<04:09,  3.27it/s]

steps= 232
steps= 80
steps= 36


 84%|████████▍ | 4188/5000 [18:25<03:06,  4.34it/s]

steps= 10


 84%|████████▍ | 4189/5000 [18:26<03:40,  3.68it/s]

steps= 184
steps= 113


 84%|████████▍ | 4192/5000 [18:26<02:34,  5.23it/s]

steps= 96
steps= 50


 84%|████████▍ | 4193/5000 [18:26<02:24,  5.58it/s]

steps= 87


 84%|████████▍ | 4194/5000 [18:27<04:36,  2.91it/s]

steps= 377


 84%|████████▍ | 4195/5000 [18:27<04:16,  3.14it/s]

steps= 121


 84%|████████▍ | 4196/5000 [18:28<03:59,  3.36it/s]

steps= 101
steps= 9


 84%|████████▍ | 4198/5000 [18:28<03:13,  4.15it/s]

steps= 50


 84%|████████▍ | 4199/5000 [18:29<06:21,  2.10it/s]

steps= 574


 84%|████████▍ | 4200/5000 [18:29<05:38,  2.36it/s]

steps= 130
steps= 85
episode:

 84%|████████▍ | 4201/5000 [18:29<04:44,  2.80it/s]

 4200 episode reward: -1093 eps: 0.6503976445771799 avg reward (last 100): -1091.2178217821781 episode loss:  23448.105
avg reward for last 100 episodes: -1091.2178217821781


 84%|████████▍ | 4202/5000 [18:31<09:11,  1.45it/s]

steps= 709


 84%|████████▍ | 4203/5000 [18:31<07:55,  1.68it/s]

steps= 156


 84%|████████▍ | 4204/5000 [18:32<08:14,  1.61it/s]

steps= 451


 84%|████████▍ | 4205/5000 [18:33<08:57,  1.48it/s]

steps= 478


 84%|████████▍ | 4206/5000 [18:33<07:31,  1.76it/s]

steps= 80


 84%|████████▍ | 4207/5000 [18:33<07:27,  1.77it/s]

steps= 215
steps= 38


 84%|████████▍ | 4209/5000 [18:34<05:44,  2.30it/s]

steps= 72


 84%|████████▍ | 4211/5000 [18:34<04:10,  3.15it/s]

steps= 171
steps= 76


 84%|████████▍ | 4212/5000 [18:35<05:30,  2.39it/s]

steps= 257


 84%|████████▍ | 4213/5000 [18:35<06:04,  2.16it/s]

steps= 300


 84%|████████▍ | 4214/5000 [18:36<06:04,  2.16it/s]

steps= 203


 84%|████████▍ | 4215/5000 [18:37<07:55,  1.65it/s]

steps= 523
steps= 43


 84%|████████▍ | 4218/5000 [18:37<04:57,  2.63it/s]

steps= 219
steps= 96


 84%|████████▍ | 4219/5000 [18:38<04:52,  2.67it/s]

steps= 107


 84%|████████▍ | 4220/5000 [18:38<04:29,  2.90it/s]

steps= 50


 84%|████████▍ | 4221/5000 [18:38<04:27,  2.91it/s]

steps= 168
steps= 48


 84%|████████▍ | 4223/5000 [18:39<03:43,  3.48it/s]

steps= 40


 84%|████████▍ | 4224/5000 [18:39<04:36,  2.81it/s]

steps= 306


 84%|████████▍ | 4225/5000 [18:39<04:18,  3.00it/s]

steps= 106


 85%|████████▍ | 4226/5000 [18:40<04:35,  2.81it/s]

steps= 165


 85%|████████▍ | 4227/5000 [18:41<06:32,  1.97it/s]

steps= 584


 85%|████████▍ | 4228/5000 [18:41<05:27,  2.36it/s]

steps= 162


 85%|████████▍ | 4229/5000 [18:41<05:03,  2.54it/s]

steps= 207


 85%|████████▍ | 4230/5000 [18:42<04:48,  2.67it/s]

steps= 102
steps= 28


 85%|████████▍ | 4232/5000 [18:42<04:22,  2.93it/s]

steps= 177


 85%|████████▍ | 4234/5000 [18:43<04:46,  2.67it/s]

steps= 244
steps= 89


 85%|████████▍ | 4235/5000 [18:43<05:17,  2.41it/s]

steps= 302


 85%|████████▍ | 4237/5000 [18:44<03:46,  3.37it/s]

steps= 140
steps= 36


 85%|████████▍ | 4238/5000 [18:45<05:54,  2.15it/s]

steps= 334


 85%|████████▍ | 4239/5000 [18:45<05:36,  2.26it/s]

steps= 159


 85%|████████▍ | 4240/5000 [18:46<05:31,  2.29it/s]

steps= 237


 85%|████████▍ | 4241/5000 [18:46<05:05,  2.49it/s]

steps= 134
steps= 32


 85%|████████▍ | 4244/5000 [18:46<03:36,  3.50it/s]

steps= 130
steps= 36


 85%|████████▍ | 4245/5000 [18:47<04:49,  2.60it/s]

steps= 368


 85%|████████▍ | 4246/5000 [18:48<05:03,  2.48it/s]

steps= 304


 85%|████████▍ | 4247/5000 [18:48<05:07,  2.45it/s]

steps= 261
steps= 46


 85%|████████▍ | 4249/5000 [18:48<04:04,  3.08it/s]

steps= 113


 85%|████████▌ | 4250/5000 [18:49<03:53,  3.21it/s]

steps= 107
steps= 11


 85%|████████▌ | 4252/5000 [18:49<03:08,  3.97it/s]

steps= 37


 85%|████████▌ | 4253/5000 [18:49<04:52,  2.56it/s]

steps= 308


 85%|████████▌ | 4254/5000 [18:50<04:22,  2.84it/s]

steps= 123


 85%|████████▌ | 4257/5000 [18:50<02:57,  4.18it/s]

steps= 115
steps= 24
steps= 51


 85%|████████▌ | 4258/5000 [18:50<02:59,  4.14it/s]

steps= 63


 85%|████████▌ | 4260/5000 [18:51<03:00,  4.11it/s]

steps= 114
steps= 68


 85%|████████▌ | 4261/5000 [18:51<03:24,  3.61it/s]

steps= 178
steps= 26


 85%|████████▌ | 4263/5000 [18:53<05:20,  2.30it/s]

steps= 775


 85%|████████▌ | 4264/5000 [18:53<05:46,  2.12it/s]

steps= 246


 85%|████████▌ | 4265/5000 [18:54<06:21,  1.92it/s]

steps= 339
steps= 17


 85%|████████▌ | 4267/5000 [18:55<05:31,  2.21it/s]

steps= 281
steps= 21


 85%|████████▌ | 4270/5000 [18:55<03:42,  3.27it/s]

steps= 73
steps= 95


 85%|████████▌ | 4271/5000 [18:56<04:31,  2.68it/s]

steps= 338


 85%|████████▌ | 4272/5000 [18:56<04:01,  3.01it/s]

steps= 143


 85%|████████▌ | 4274/5000 [18:56<03:38,  3.32it/s]

steps= 302
steps= 81


 86%|████████▌ | 4275/5000 [18:57<03:11,  3.79it/s]

steps= 87


 86%|████████▌ | 4276/5000 [18:57<03:37,  3.33it/s]

steps= 139


 86%|████████▌ | 4278/5000 [18:57<02:43,  4.42it/s]

steps= 89
steps= 83


 86%|████████▌ | 4280/5000 [18:58<03:57,  3.03it/s]

steps= 455
steps= 33


 86%|████████▌ | 4281/5000 [18:59<04:10,  2.87it/s]

steps= 181


 86%|████████▌ | 4282/5000 [18:59<04:03,  2.95it/s]

steps= 120


 86%|████████▌ | 4283/5000 [18:59<04:03,  2.95it/s]

steps= 134


 86%|████████▌ | 4284/5000 [19:00<03:41,  3.23it/s]

steps= 96


 86%|████████▌ | 4285/5000 [19:01<06:28,  1.84it/s]

steps= 469


 86%|████████▌ | 4286/5000 [19:01<05:52,  2.02it/s]

steps= 185


 86%|████████▌ | 4288/5000 [19:01<03:54,  3.03it/s]

steps= 103
steps= 46


 86%|████████▌ | 4289/5000 [19:02<03:49,  3.10it/s]

steps= 173


 86%|████████▌ | 4290/5000 [19:03<05:54,  2.00it/s]

steps= 437
steps= 39


 86%|████████▌ | 4292/5000 [19:03<05:29,  2.15it/s]

steps= 310


 86%|████████▌ | 4293/5000 [19:04<04:43,  2.49it/s]

steps= 76


 86%|████████▌ | 4294/5000 [19:04<05:40,  2.07it/s]

steps= 401


 86%|████████▌ | 4295/5000 [19:05<05:17,  2.22it/s]

steps= 165


 86%|████████▌ | 4296/5000 [19:05<05:34,  2.10it/s]

steps= 305
steps= 55


 86%|████████▌ | 4298/5000 [19:07<06:31,  1.79it/s]

steps= 648


 86%|████████▌ | 4300/5000 [19:08<05:48,  2.01it/s]

steps= 492
steps= 99


 86%|████████▌ | 4301/5000 [19:08<04:58,  2.34it/s]

steps= 187
episode: 4300 episode reward: -1195 eps: 0.6439257579000631 avg reward (last 100): -1096.3267326732673 episode loss:  21704.477
avg reward for last 100 episodes: -1096.3267326732673
steps= 21


 86%|████████▌ | 4303/5000 [19:08<04:09,  2.80it/s]

steps= 172
steps= 13


 86%|████████▌ | 4306/5000 [19:09<02:57,  3.91it/s]

steps= 111
steps= 51


 86%|████████▌ | 4307/5000 [19:09<02:49,  4.10it/s]

steps= 76


 86%|████████▌ | 4308/5000 [19:09<03:18,  3.49it/s]

steps= 213


 86%|████████▌ | 4309/5000 [19:10<04:53,  2.35it/s]

steps= 359


 86%|████████▌ | 4310/5000 [19:11<04:54,  2.35it/s]

steps= 132


 86%|████████▌ | 4312/5000 [19:12<05:43,  2.00it/s]

steps= 400
steps= 33


 86%|████████▋ | 4314/5000 [19:13<04:27,  2.56it/s]

steps= 93
steps= 26


 86%|████████▋ | 4315/5000 [19:13<05:27,  2.09it/s]

steps= 184


 86%|████████▋ | 4316/5000 [19:15<08:00,  1.42it/s]

steps= 351


 86%|████████▋ | 4318/5000 [19:15<05:27,  2.08it/s]

steps= 121
steps= 11


 86%|████████▋ | 4319/5000 [19:16<05:20,  2.13it/s]

steps= 147
steps= 65


 86%|████████▋ | 4321/5000 [19:17<05:49,  1.94it/s]

steps= 204


 86%|████████▋ | 4322/5000 [19:17<06:21,  1.78it/s]

steps= 207


 86%|████████▋ | 4323/5000 [19:18<05:57,  1.90it/s]

steps= 108
steps= 28


 86%|████████▋ | 4325/5000 [19:18<04:54,  2.29it/s]

steps= 221


 87%|████████▋ | 4326/5000 [19:19<04:59,  2.25it/s]

steps= 274
steps= 50


 87%|████████▋ | 4328/5000 [19:19<04:39,  2.40it/s]

steps= 362


 87%|████████▋ | 4329/5000 [19:20<05:03,  2.21it/s]

steps= 182


 87%|████████▋ | 4330/5000 [19:20<04:27,  2.50it/s]

steps= 50


 87%|████████▋ | 4331/5000 [19:21<06:04,  1.83it/s]

steps= 275


 87%|████████▋ | 4332/5000 [19:23<10:26,  1.07it/s]

steps= 445


 87%|████████▋ | 4333/5000 [19:24<11:40,  1.05s/it]

steps= 357


 87%|████████▋ | 4334/5000 [19:25<10:03,  1.10it/s]

steps= 178


 87%|████████▋ | 4335/5000 [19:25<07:59,  1.39it/s]

steps= 97


 87%|████████▋ | 4336/5000 [19:26<07:19,  1.51it/s]

steps= 160


 87%|████████▋ | 4338/5000 [19:26<05:29,  2.01it/s]

steps= 350
steps= 26


 87%|████████▋ | 4339/5000 [19:27<07:31,  1.47it/s]

steps= 349


 87%|████████▋ | 4340/5000 [19:28<07:55,  1.39it/s]

steps= 441


 87%|████████▋ | 4341/5000 [19:29<07:58,  1.38it/s]

steps= 353


 87%|████████▋ | 4342/5000 [19:31<11:10,  1.02s/it]

steps= 757


 87%|████████▋ | 4343/5000 [19:31<09:32,  1.15it/s]

steps= 192


 87%|████████▋ | 4344/5000 [19:33<11:52,  1.09s/it]

steps= 692


 87%|████████▋ | 4345/5000 [19:33<09:45,  1.12it/s]

steps= 205


 87%|████████▋ | 4346/5000 [19:34<07:54,  1.38it/s]

steps= 73


 87%|████████▋ | 4347/5000 [19:34<06:57,  1.56it/s]

steps= 208
steps= 18


 87%|████████▋ | 4349/5000 [19:34<05:18,  2.04it/s]

steps= 65


 87%|████████▋ | 4350/5000 [19:35<05:51,  1.85it/s]

steps= 229


 87%|████████▋ | 4351/5000 [19:35<05:41,  1.90it/s]

steps= 181


 87%|████████▋ | 4352/5000 [19:36<05:39,  1.91it/s]

steps= 184


 87%|████████▋ | 4353/5000 [19:36<05:11,  2.08it/s]

steps= 108


 87%|████████▋ | 4354/5000 [19:37<05:24,  1.99it/s]

steps= 246


 87%|████████▋ | 4355/5000 [19:37<05:08,  2.09it/s]

steps= 209


 87%|████████▋ | 4356/5000 [19:38<04:56,  2.17it/s]

steps= 210


 87%|████████▋ | 4357/5000 [19:38<04:50,  2.21it/s]

steps= 335


 87%|████████▋ | 4358/5000 [19:39<04:35,  2.33it/s]

steps= 165


 87%|████████▋ | 4359/5000 [19:39<04:49,  2.21it/s]

steps= 196
steps= 34


 87%|████████▋ | 4361/5000 [19:39<03:52,  2.75it/s]

steps= 105


 87%|████████▋ | 4362/5000 [19:40<04:55,  2.16it/s]

steps= 256


 87%|████████▋ | 4363/5000 [19:41<04:58,  2.13it/s]

steps= 259


 87%|████████▋ | 4364/5000 [19:41<04:41,  2.26it/s]

steps= 181


 87%|████████▋ | 4365/5000 [19:41<04:03,  2.61it/s]

steps= 61


 87%|████████▋ | 4367/5000 [19:42<04:20,  2.43it/s]

steps= 448
steps= 46


 87%|████████▋ | 4368/5000 [19:43<04:28,  2.35it/s]

steps= 157
steps= 17


 87%|████████▋ | 4371/5000 [19:43<03:20,  3.13it/s]

steps= 252
steps= 109


 87%|████████▋ | 4372/5000 [19:44<02:50,  3.68it/s]

steps= 18


 87%|████████▋ | 4373/5000 [19:44<03:50,  2.72it/s]

steps= 207


 88%|████████▊ | 4375/5000 [19:45<03:17,  3.16it/s]

steps= 180
steps= 129


 88%|████████▊ | 4376/5000 [19:45<03:05,  3.36it/s]

steps= 104


 88%|████████▊ | 4377/5000 [19:45<03:31,  2.95it/s]

steps= 123
steps= 9


 88%|████████▊ | 4379/5000 [19:46<03:13,  3.21it/s]

steps= 148


 88%|████████▊ | 4380/5000 [19:46<03:35,  2.88it/s]

steps= 216


 88%|████████▊ | 4382/5000 [19:47<02:54,  3.55it/s]

steps= 197
steps= 20


 88%|████████▊ | 4383/5000 [19:47<03:14,  3.16it/s]

steps= 180


 88%|████████▊ | 4384/5000 [19:48<04:30,  2.27it/s]

steps= 251


 88%|████████▊ | 4385/5000 [19:49<06:21,  1.61it/s]

steps= 590


 88%|████████▊ | 4386/5000 [19:49<05:30,  1.86it/s]

steps= 158


 88%|████████▊ | 4387/5000 [19:50<05:48,  1.76it/s]

steps= 344


 88%|████████▊ | 4388/5000 [19:50<05:25,  1.88it/s]

steps= 272


 88%|████████▊ | 4389/5000 [19:51<04:56,  2.06it/s]

steps= 142


 88%|████████▊ | 4390/5000 [19:51<04:48,  2.11it/s]

steps= 165


 88%|████████▊ | 4391/5000 [19:52<04:52,  2.08it/s]

steps= 327


 88%|████████▊ | 4392/5000 [19:52<05:00,  2.02it/s]

steps= 152


 88%|████████▊ | 4393/5000 [19:53<05:26,  1.86it/s]

steps= 247


 88%|████████▊ | 4394/5000 [19:53<04:47,  2.11it/s]

steps= 128


 88%|████████▊ | 4395/5000 [19:54<04:24,  2.29it/s]

steps= 149
steps= 61


 88%|████████▊ | 4397/5000 [19:54<03:50,  2.61it/s]

steps= 261


 88%|████████▊ | 4399/5000 [19:54<02:52,  3.49it/s]

steps= 143
steps= 34


 88%|████████▊ | 4400/5000 [19:55<02:19,  4.29it/s]

steps= 14


 88%|████████▊ | 4401/5000 [19:55<02:23,  4.16it/s]

steps= 104
episode: 4400 episode reward: -1112 eps: 0.6375182707752978 avg reward (last 100): -1133.920792079208 episode loss:  27311.258
avg reward for last 100 episodes: -1133.920792079208


 88%|████████▊ | 4402/5000 [19:55<03:10,  3.14it/s]

steps= 232


 88%|████████▊ | 4403/5000 [19:56<03:17,  3.02it/s]

steps= 136


 88%|████████▊ | 4404/5000 [19:57<06:25,  1.55it/s]

steps= 616


 88%|████████▊ | 4405/5000 [19:57<05:05,  1.95it/s]

steps= 103


 88%|████████▊ | 4406/5000 [19:58<05:25,  1.82it/s]

steps= 269


 88%|████████▊ | 4408/5000 [19:58<03:37,  2.72it/s]

steps= 82
steps= 54


 88%|████████▊ | 4411/5000 [19:59<02:29,  3.93it/s]

steps= 52
steps= 27
steps= 48


 88%|████████▊ | 4412/5000 [19:59<03:19,  2.94it/s]

steps= 220


 88%|████████▊ | 4413/5000 [20:00<04:27,  2.20it/s]

steps= 181


 88%|████████▊ | 4414/5000 [20:00<03:52,  2.52it/s]

steps= 71


 88%|████████▊ | 4415/5000 [20:01<03:31,  2.76it/s]

steps= 84


 88%|████████▊ | 4416/5000 [20:01<03:09,  3.09it/s]

steps= 55


 88%|████████▊ | 4417/5000 [20:01<03:34,  2.72it/s]

steps= 109


 88%|████████▊ | 4418/5000 [20:02<03:48,  2.55it/s]

steps= 104


 88%|████████▊ | 4419/5000 [20:02<04:14,  2.29it/s]

steps= 184


 88%|████████▊ | 4420/5000 [20:03<04:05,  2.36it/s]

steps= 158


 88%|████████▊ | 4421/5000 [20:03<03:27,  2.79it/s]

steps= 95


 88%|████████▊ | 4422/5000 [20:03<03:51,  2.49it/s]

steps= 139


 88%|████████▊ | 4423/5000 [20:04<03:36,  2.67it/s]

steps= 76


 88%|████████▊ | 4424/5000 [20:05<05:50,  1.64it/s]

steps= 647


 88%|████████▊ | 4425/5000 [20:06<08:08,  1.18it/s]

steps= 579


 89%|████████▊ | 4426/5000 [20:07<06:29,  1.47it/s]

steps= 70
steps= 32


 89%|████████▊ | 4428/5000 [20:07<05:46,  1.65it/s]

steps= 361


 89%|████████▊ | 4429/5000 [20:08<04:51,  1.96it/s]

steps= 140


 89%|████████▊ | 4430/5000 [20:08<04:35,  2.07it/s]

steps= 218
steps= 23


 89%|████████▊ | 4432/5000 [20:09<03:54,  2.42it/s]

steps= 216


 89%|████████▊ | 4433/5000 [20:09<03:31,  2.68it/s]

steps= 164


 89%|████████▊ | 4434/5000 [20:09<03:44,  2.52it/s]

steps= 228
steps= 27


 89%|████████▊ | 4436/5000 [20:10<03:03,  3.08it/s]

steps= 151


 89%|████████▊ | 4437/5000 [20:10<03:21,  2.80it/s]

steps= 160


 89%|████████▉ | 4438/5000 [20:11<04:48,  1.95it/s]

steps= 275


 89%|████████▉ | 4439/5000 [20:11<04:02,  2.31it/s]

steps= 126


 89%|████████▉ | 4440/5000 [20:11<03:24,  2.74it/s]

steps= 64


 89%|████████▉ | 4441/5000 [20:12<03:19,  2.81it/s]

steps= 175


 89%|████████▉ | 4442/5000 [20:12<03:18,  2.81it/s]

steps= 171


 89%|████████▉ | 4444/5000 [20:13<03:01,  3.06it/s]

steps= 262
steps= 108


 89%|████████▉ | 4445/5000 [20:14<05:55,  1.56it/s]

steps= 614


 89%|████████▉ | 4446/5000 [20:15<06:41,  1.38it/s]

steps= 509


 89%|████████▉ | 4447/5000 [20:15<05:24,  1.70it/s]

steps= 167
steps= 17


 89%|████████▉ | 4449/5000 [20:16<04:13,  2.17it/s]

steps= 120
steps= 10


 89%|████████▉ | 4451/5000 [20:16<03:59,  2.30it/s]

steps= 159


 89%|████████▉ | 4452/5000 [20:17<05:40,  1.61it/s]

steps= 413


 89%|████████▉ | 4453/5000 [20:18<05:13,  1.75it/s]

steps= 223


 89%|████████▉ | 4454/5000 [20:18<04:22,  2.08it/s]

steps= 104


 89%|████████▉ | 4456/5000 [20:19<03:14,  2.79it/s]

steps= 128
steps= 95


 89%|████████▉ | 4457/5000 [20:19<02:38,  3.43it/s]

steps= 48


 89%|████████▉ | 4458/5000 [20:19<02:38,  3.42it/s]

steps= 105


 89%|████████▉ | 4459/5000 [20:19<02:26,  3.68it/s]

steps= 72


 89%|████████▉ | 4460/5000 [20:22<07:49,  1.15it/s]

steps= 841


 89%|████████▉ | 4461/5000 [20:22<06:41,  1.34it/s]

steps= 126


 89%|████████▉ | 4462/5000 [20:22<05:31,  1.62it/s]

steps= 106


 89%|████████▉ | 4464/5000 [20:23<04:32,  1.97it/s]

steps= 358
steps= 42


 89%|████████▉ | 4465/5000 [20:24<03:46,  2.36it/s]

steps= 150


 89%|████████▉ | 4466/5000 [20:24<03:47,  2.35it/s]

steps= 344


 89%|████████▉ | 4467/5000 [20:25<04:35,  1.94it/s]

steps= 330


 89%|████████▉ | 4468/5000 [20:25<03:55,  2.26it/s]

steps= 113


 89%|████████▉ | 4469/5000 [20:25<03:54,  2.27it/s]

steps= 246


 89%|████████▉ | 4470/5000 [20:26<03:18,  2.67it/s]

steps= 126


 89%|████████▉ | 4471/5000 [20:26<04:07,  2.14it/s]

steps= 426
steps= 73


 89%|████████▉ | 4473/5000 [20:27<04:01,  2.19it/s]

steps= 282


 89%|████████▉ | 4474/5000 [20:28<04:05,  2.14it/s]

steps= 211
steps= 40


 90%|████████▉ | 4476/5000 [20:29<03:59,  2.19it/s]

steps= 315


 90%|████████▉ | 4477/5000 [20:29<04:39,  1.87it/s]

steps= 314


 90%|████████▉ | 4478/5000 [20:30<04:50,  1.80it/s]

steps= 344


 90%|████████▉ | 4479/5000 [20:31<05:43,  1.52it/s]

steps= 423


 90%|████████▉ | 4480/5000 [20:31<05:39,  1.53it/s]

steps= 277


 90%|████████▉ | 4481/5000 [20:32<05:00,  1.73it/s]

steps= 135
steps= 117


 90%|████████▉ | 4483/5000 [20:32<03:24,  2.53it/s]

steps= 93


 90%|████████▉ | 4484/5000 [20:33<03:39,  2.35it/s]

steps= 150


 90%|████████▉ | 4485/5000 [20:33<03:34,  2.40it/s]

steps= 200


 90%|████████▉ | 4487/5000 [20:34<03:21,  2.55it/s]

steps= 404
steps= 47


 90%|████████▉ | 4488/5000 [20:35<04:38,  1.84it/s]

steps= 445


 90%|████████▉ | 4489/5000 [20:35<04:24,  1.93it/s]

steps= 144


 90%|████████▉ | 4491/5000 [20:36<03:25,  2.48it/s]

steps= 219
steps= 34


 90%|████████▉ | 4493/5000 [20:36<02:20,  3.61it/s]

steps= 46
steps= 103
steps= 50


 90%|████████▉ | 4495/5000 [20:37<02:23,  3.52it/s]

steps= 291


 90%|████████▉ | 4496/5000 [20:38<05:38,  1.49it/s]

steps= 652


 90%|████████▉ | 4497/5000 [20:39<04:48,  1.74it/s]

steps= 106


 90%|████████▉ | 4498/5000 [20:39<03:58,  2.11it/s]

steps= 70


 90%|████████▉ | 4499/5000 [20:39<03:26,  2.42it/s]

steps= 75


 90%|█████████ | 4500/5000 [20:40<03:22,  2.46it/s]

steps= 94


 90%|█████████ | 4501/5000 [20:41<04:35,  1.81it/s]

steps= 300
episode: 4500 episode reward: -1308 eps: 0.6311745423847503 avg reward (last 100): -1101.3960396039604 episode loss:  23455.605
avg reward for last 100 episodes: -1101.3960396039604


 90%|█████████ | 4503/5000 [20:41<03:11,  2.60it/s]

steps= 71
steps= 31


 90%|█████████ | 4504/5000 [20:41<03:02,  2.72it/s]

steps= 73


 90%|█████████ | 4505/5000 [20:42<04:12,  1.96it/s]

steps= 536


 90%|█████████ | 4506/5000 [20:43<04:19,  1.90it/s]

steps= 230


 90%|█████████ | 4507/5000 [20:43<04:11,  1.96it/s]

steps= 191
steps= 41


 90%|█████████ | 4509/5000 [20:44<03:56,  2.08it/s]

steps= 394


 90%|█████████ | 4510/5000 [20:44<03:43,  2.19it/s]

steps= 179


 90%|█████████ | 4511/5000 [20:45<03:51,  2.11it/s]

steps= 171


 90%|█████████ | 4512/5000 [20:46<04:25,  1.84it/s]

steps= 156


 90%|█████████ | 4513/5000 [20:46<04:47,  1.70it/s]

steps= 274


 90%|█████████ | 4514/5000 [20:47<05:07,  1.58it/s]

steps= 300


 90%|█████████ | 4515/5000 [20:48<04:46,  1.69it/s]

steps= 191


 90%|█████████ | 4516/5000 [20:48<04:19,  1.87it/s]

steps= 168


 90%|█████████ | 4517/5000 [20:49<04:23,  1.83it/s]

steps= 146


 90%|█████████ | 4519/5000 [20:50<04:36,  1.74it/s]

steps= 542
steps= 95


 90%|█████████ | 4521/5000 [20:50<02:54,  2.74it/s]

steps= 73
steps= 58


 90%|█████████ | 4522/5000 [20:52<05:38,  1.41it/s]

steps= 660


 90%|█████████ | 4524/5000 [20:53<04:19,  1.84it/s]

steps= 324
steps= 58


 90%|█████████ | 4525/5000 [20:53<04:03,  1.95it/s]

steps= 149


 91%|█████████ | 4526/5000 [20:53<03:21,  2.35it/s]

steps= 122


 91%|█████████ | 4527/5000 [20:54<03:34,  2.20it/s]

steps= 179


 91%|█████████ | 4528/5000 [20:55<05:23,  1.46it/s]

steps= 473


 91%|█████████ | 4529/5000 [20:55<04:34,  1.72it/s]

steps= 104


 91%|█████████ | 4530/5000 [20:56<04:04,  1.92it/s]

steps= 246


 91%|█████████ | 4531/5000 [20:57<05:14,  1.49it/s]

steps= 477


 91%|█████████ | 4532/5000 [20:57<04:25,  1.77it/s]

steps= 89
steps= 12


 91%|█████████ | 4534/5000 [20:57<03:20,  2.33it/s]

steps= 93


 91%|█████████ | 4535/5000 [20:58<03:03,  2.54it/s]

steps= 126


 91%|█████████ | 4536/5000 [20:58<02:49,  2.74it/s]

steps= 78


 91%|█████████ | 4537/5000 [20:59<03:10,  2.44it/s]

steps= 163


 91%|█████████ | 4538/5000 [20:59<03:18,  2.33it/s]

steps= 164
steps= 25


 91%|█████████ | 4541/5000 [21:00<02:33,  2.98it/s]

steps= 437
steps= 100


 91%|█████████ | 4542/5000 [21:01<03:55,  1.94it/s]

steps= 497


 91%|█████████ | 4543/5000 [21:01<03:39,  2.08it/s]

steps= 218


 91%|█████████ | 4545/5000 [21:02<02:48,  2.69it/s]

steps= 243
steps= 64


 91%|█████████ | 4546/5000 [21:02<02:23,  3.17it/s]

steps= 136


 91%|█████████ | 4548/5000 [21:02<01:52,  4.01it/s]

steps= 117
steps= 63
steps= 34


 91%|█████████ | 4550/5000 [21:03<01:37,  4.61it/s]

steps= 100


 91%|█████████ | 4551/5000 [21:03<01:51,  4.02it/s]

steps= 146


 91%|█████████ | 4552/5000 [21:04<02:46,  2.69it/s]

steps= 244


 91%|█████████ | 4553/5000 [21:04<02:55,  2.55it/s]

steps= 198


 91%|█████████ | 4554/5000 [21:05<02:51,  2.59it/s]

steps= 108
steps= 15


 91%|█████████ | 4556/5000 [21:05<02:39,  2.78it/s]

steps= 304


 91%|█████████ | 4557/5000 [21:06<02:59,  2.46it/s]

steps= 170


 91%|█████████ | 4558/5000 [21:06<02:41,  2.73it/s]

steps= 110


 91%|█████████ | 4559/5000 [21:07<03:23,  2.17it/s]

steps= 308


 91%|█████████ | 4560/5000 [21:07<03:34,  2.05it/s]

steps= 242


 91%|█████████ | 4561/5000 [21:08<03:31,  2.08it/s]

steps= 129


 91%|█████████ | 4562/5000 [21:08<04:16,  1.71it/s]

steps= 408


 91%|█████████▏| 4563/5000 [21:09<04:25,  1.65it/s]

steps= 286


 91%|█████████▏| 4564/5000 [21:11<06:18,  1.15it/s]

steps= 708


 91%|█████████▏| 4565/5000 [21:12<07:05,  1.02it/s]

steps= 446


 91%|█████████▏| 4567/5000 [21:13<04:38,  1.55it/s]

steps= 357
steps= 57


 91%|█████████▏| 4568/5000 [21:13<03:51,  1.87it/s]

steps= 156


 91%|█████████▏| 4569/5000 [21:13<04:05,  1.76it/s]

steps= 242


 91%|█████████▏| 4571/5000 [21:14<02:41,  2.66it/s]

steps= 142
steps= 82


 91%|█████████▏| 4573/5000 [21:14<01:44,  4.07it/s]

steps= 51
steps= 85


 92%|█████████▏| 4575/5000 [21:15<02:08,  3.30it/s]

steps= 370
steps= 28


 92%|█████████▏| 4576/5000 [21:15<02:32,  2.77it/s]

steps= 120


 92%|█████████▏| 4577/5000 [21:16<02:14,  3.15it/s]

steps= 70


 92%|█████████▏| 4578/5000 [21:16<02:40,  2.62it/s]

steps= 186


 92%|█████████▏| 4579/5000 [21:17<03:00,  2.33it/s]

steps= 265


 92%|█████████▏| 4580/5000 [21:17<02:34,  2.71it/s]

steps= 135


 92%|█████████▏| 4581/5000 [21:17<02:48,  2.49it/s]

steps= 237


 92%|█████████▏| 4583/5000 [21:18<02:11,  3.18it/s]

steps= 180
steps= 38


 92%|█████████▏| 4584/5000 [21:18<02:28,  2.80it/s]

steps= 129


 92%|█████████▏| 4585/5000 [21:19<02:29,  2.77it/s]

steps= 82


 92%|█████████▏| 4586/5000 [21:19<02:18,  3.00it/s]

steps= 100


 92%|█████████▏| 4587/5000 [21:20<02:46,  2.49it/s]

steps= 215


 92%|█████████▏| 4588/5000 [21:20<02:35,  2.64it/s]

steps= 80


 92%|█████████▏| 4590/5000 [21:21<02:41,  2.54it/s]

steps= 159
steps= 155


 92%|█████████▏| 4591/5000 [21:21<02:06,  3.23it/s]

steps= 67
steps= 77


 92%|█████████▏| 4594/5000 [21:22<01:46,  3.80it/s]

steps= 74
steps= 63


 92%|█████████▏| 4595/5000 [21:22<01:49,  3.71it/s]

steps= 164


 92%|█████████▏| 4596/5000 [21:22<01:51,  3.63it/s]

steps= 54


 92%|█████████▏| 4597/5000 [21:23<03:17,  2.05it/s]

steps= 245


 92%|█████████▏| 4598/5000 [21:24<04:00,  1.67it/s]

steps= 314


 92%|█████████▏| 4599/5000 [21:26<05:54,  1.13it/s]

steps= 564


 92%|█████████▏| 4601/5000 [21:26<03:45,  1.77it/s]

steps= 102
steps= 65
episode: 4600 episode reward: -1073 eps: 0.6248939382868509 avg reward (last 100): -1098.039603960396 episode loss:  22094.584
avg reward for last 100 episodes: -1098.039603960396


 92%|█████████▏| 4602/5000 [21:27<03:55,  1.69it/s]

steps= 201


 92%|█████████▏| 4603/5000 [21:27<03:26,  1.92it/s]

steps= 172


 92%|█████████▏| 4604/5000 [21:27<02:52,  2.30it/s]

steps= 58


 92%|█████████▏| 4606/5000 [21:28<02:27,  2.67it/s]

steps= 154
steps= 44


 92%|█████████▏| 4607/5000 [21:28<02:12,  2.96it/s]

steps= 71


 92%|█████████▏| 4608/5000 [21:29<02:34,  2.53it/s]

steps= 261


 92%|█████████▏| 4609/5000 [21:29<02:42,  2.40it/s]

steps= 278


 92%|█████████▏| 4610/5000 [21:31<04:56,  1.32it/s]

steps= 769


 92%|█████████▏| 4611/5000 [21:32<04:41,  1.38it/s]

steps= 213


 92%|█████████▏| 4612/5000 [21:32<05:01,  1.29it/s]

steps= 334


 92%|█████████▏| 4613/5000 [21:33<05:24,  1.19it/s]

steps= 272


 92%|█████████▏| 4615/5000 [21:34<03:20,  1.92it/s]

steps= 72
steps= 85


 92%|█████████▏| 4616/5000 [21:35<04:13,  1.51it/s]

steps= 425


 92%|█████████▏| 4617/5000 [21:35<03:25,  1.86it/s]

steps= 102


 92%|█████████▏| 4618/5000 [21:35<03:04,  2.07it/s]

steps= 105
steps= 106


 92%|█████████▏| 4620/5000 [21:36<03:11,  1.99it/s]

steps= 279
steps= 51


 92%|█████████▏| 4622/5000 [21:37<02:34,  2.44it/s]

steps= 171


 92%|█████████▏| 4623/5000 [21:38<03:06,  2.02it/s]

steps= 331


 93%|█████████▎| 4626/5000 [21:38<02:23,  2.61it/s]

steps= 311
steps= 51
steps= 60


 93%|█████████▎| 4628/5000 [21:39<01:34,  3.95it/s]

steps= 58
steps= 89


 93%|█████████▎| 4629/5000 [21:39<01:20,  4.62it/s]

steps= 74


 93%|█████████▎| 4631/5000 [21:39<01:08,  5.37it/s]

steps= 105
steps= 73


 93%|█████████▎| 4632/5000 [21:39<01:31,  4.00it/s]

steps= 252


 93%|█████████▎| 4633/5000 [21:40<02:06,  2.91it/s]

steps= 178


 93%|█████████▎| 4634/5000 [21:40<02:11,  2.79it/s]

steps= 153


 93%|█████████▎| 4635/5000 [21:41<02:05,  2.91it/s]

steps= 201


 93%|█████████▎| 4637/5000 [21:41<01:41,  3.58it/s]

steps= 125
steps= 33


 93%|█████████▎| 4638/5000 [21:41<01:29,  4.04it/s]

steps= 94


 93%|█████████▎| 4639/5000 [21:42<01:51,  3.25it/s]

steps= 206


 93%|█████████▎| 4640/5000 [21:42<01:47,  3.36it/s]

steps= 77


 93%|█████████▎| 4641/5000 [21:43<02:10,  2.75it/s]

steps= 240


 93%|█████████▎| 4642/5000 [21:43<02:58,  2.00it/s]

steps= 363


 93%|█████████▎| 4644/5000 [21:44<02:26,  2.44it/s]

steps= 325
steps= 67
steps= 15


 93%|█████████▎| 4646/5000 [21:46<02:57,  1.99it/s]

steps= 571


 93%|█████████▎| 4648/5000 [21:47<03:25,  1.72it/s]

steps= 780
steps= 127


 93%|█████████▎| 4649/5000 [21:47<02:42,  2.15it/s]

steps= 69


 93%|█████████▎| 4650/5000 [21:48<03:01,  1.93it/s]

steps= 242


 93%|█████████▎| 4651/5000 [21:48<02:55,  1.99it/s]

steps= 221


 93%|█████████▎| 4652/5000 [21:50<05:31,  1.05it/s]

steps= 651


 93%|█████████▎| 4653/5000 [21:52<06:47,  1.18s/it]

steps= 775


 93%|█████████▎| 4655/5000 [21:53<04:01,  1.43it/s]

steps= 127
steps= 13


 93%|█████████▎| 4657/5000 [21:53<02:30,  2.29it/s]

steps= 66
steps= 36
steps= 59


 93%|█████████▎| 4659/5000 [21:53<02:09,  2.64it/s]

steps= 257


 93%|█████████▎| 4660/5000 [21:54<03:11,  1.78it/s]

steps= 515


 93%|█████████▎| 4661/5000 [21:55<02:47,  2.02it/s]

steps= 101


 93%|█████████▎| 4662/5000 [21:56<03:34,  1.57it/s]

steps= 308


 93%|█████████▎| 4663/5000 [21:56<02:53,  1.94it/s]

steps= 101


 93%|█████████▎| 4665/5000 [21:57<02:13,  2.51it/s]

steps= 216
steps= 63


 93%|█████████▎| 4667/5000 [21:57<01:36,  3.45it/s]

steps= 190
steps= 28


 93%|█████████▎| 4669/5000 [21:58<01:43,  3.18it/s]

steps= 326
steps= 78
steps= 45


 93%|█████████▎| 4671/5000 [21:58<01:33,  3.51it/s]

steps= 187


 93%|█████████▎| 4672/5000 [21:59<02:10,  2.52it/s]

steps= 364


 93%|█████████▎| 4673/5000 [21:59<01:56,  2.81it/s]

steps= 161


 94%|█████████▎| 4675/5000 [22:00<02:03,  2.64it/s]

steps= 537
steps= 81


 94%|█████████▎| 4676/5000 [22:01<03:06,  1.74it/s]

steps= 348


 94%|█████████▎| 4678/5000 [22:01<02:02,  2.64it/s]

steps= 88
steps= 32


 94%|█████████▎| 4680/5000 [22:02<01:38,  3.24it/s]

steps= 153
steps= 62


 94%|█████████▎| 4681/5000 [22:02<01:53,  2.82it/s]

steps= 243


 94%|█████████▎| 4682/5000 [22:03<02:39,  1.99it/s]

steps= 522


 94%|█████████▎| 4684/5000 [22:04<02:23,  2.21it/s]

steps= 395
steps= 30


 94%|█████████▎| 4686/5000 [22:05<02:08,  2.44it/s]

steps= 285
steps= 61
steps= 43


 94%|█████████▍| 4688/5000 [22:06<02:04,  2.51it/s]

steps= 184


 94%|█████████▍| 4689/5000 [22:06<01:53,  2.75it/s]

steps= 116


 94%|█████████▍| 4690/5000 [22:06<01:55,  2.70it/s]

steps= 192


 94%|█████████▍| 4692/5000 [22:07<01:37,  3.15it/s]

steps= 152
steps= 36


 94%|█████████▍| 4693/5000 [22:08<01:53,  2.71it/s]

steps= 187


 94%|█████████▍| 4694/5000 [22:08<01:41,  3.02it/s]

steps= 112


 94%|█████████▍| 4695/5000 [22:10<05:17,  1.04s/it]

steps= 1155


 94%|█████████▍| 4696/5000 [22:11<04:31,  1.12it/s]

steps= 198


 94%|█████████▍| 4697/5000 [22:11<03:41,  1.37it/s]

steps= 113


 94%|█████████▍| 4698/5000 [22:13<04:19,  1.16it/s]

steps= 476


 94%|█████████▍| 4699/5000 [22:13<03:36,  1.39it/s]

steps= 170


 94%|█████████▍| 4700/5000 [22:13<02:53,  1.73it/s]

steps= 109


 94%|█████████▍| 4702/5000 [22:14<01:57,  2.54it/s]

steps= 164
episode: 4700 episode reward: -1172 eps: 0.6186758303531436 avg reward (last 100): -1093.950495049505 episode loss:  25069.055
avg reward for last 100 episodes: -1093.950495049505
steps= 45


 94%|█████████▍| 4703/5000 [22:14<02:32,  1.95it/s]

steps= 426


 94%|█████████▍| 4704/5000 [22:15<02:55,  1.69it/s]

steps= 291


 94%|█████████▍| 4705/5000 [22:15<02:24,  2.05it/s]

steps= 145


 94%|█████████▍| 4706/5000 [22:16<03:04,  1.59it/s]

steps= 374
steps= 34


 94%|█████████▍| 4708/5000 [22:17<02:18,  2.12it/s]

steps= 54
steps= 56


 94%|█████████▍| 4710/5000 [22:17<02:09,  2.24it/s]

steps= 265


 94%|█████████▍| 4711/5000 [22:18<01:50,  2.62it/s]

steps= 103


 94%|█████████▍| 4712/5000 [22:18<01:51,  2.59it/s]

steps= 182


 94%|█████████▍| 4713/5000 [22:18<01:43,  2.77it/s]

steps= 169


 94%|█████████▍| 4714/5000 [22:19<01:54,  2.50it/s]

steps= 297


 94%|█████████▍| 4715/5000 [22:19<01:42,  2.78it/s]

steps= 141


 94%|█████████▍| 4716/5000 [22:20<01:46,  2.67it/s]

steps= 248


 94%|█████████▍| 4717/5000 [22:21<03:31,  1.34it/s]

steps= 927


 94%|█████████▍| 4718/5000 [22:22<03:29,  1.34it/s]

steps= 258


 94%|█████████▍| 4719/5000 [22:22<02:55,  1.60it/s]

steps= 190


 94%|█████████▍| 4720/5000 [22:23<02:54,  1.60it/s]

steps= 226


 94%|█████████▍| 4721/5000 [22:24<03:21,  1.38it/s]

steps= 300


 94%|█████████▍| 4722/5000 [22:24<02:50,  1.63it/s]

steps= 105


 94%|█████████▍| 4723/5000 [22:25<02:30,  1.84it/s]

steps= 99


 94%|█████████▍| 4724/5000 [22:25<02:18,  1.99it/s]

steps= 113


 94%|█████████▍| 4725/5000 [22:25<02:20,  1.96it/s]

steps= 180


 95%|█████████▍| 4726/5000 [22:26<02:46,  1.65it/s]

steps= 233


 95%|█████████▍| 4727/5000 [22:27<02:58,  1.53it/s]

steps= 339


 95%|█████████▍| 4728/5000 [22:27<02:36,  1.74it/s]

steps= 94


 95%|█████████▍| 4729/5000 [22:28<02:29,  1.82it/s]

steps= 209


 95%|█████████▍| 4730/5000 [22:29<02:28,  1.81it/s]

steps= 179


 95%|█████████▍| 4731/5000 [22:29<02:16,  1.97it/s]

steps= 123


 95%|█████████▍| 4732/5000 [22:29<02:00,  2.23it/s]

steps= 109


 95%|█████████▍| 4733/5000 [22:30<02:46,  1.60it/s]

steps= 379


 95%|█████████▍| 4734/5000 [22:31<02:16,  1.95it/s]

steps= 81


 95%|█████████▍| 4735/5000 [22:31<02:14,  1.97it/s]

steps= 231


 95%|█████████▍| 4736/5000 [22:32<02:43,  1.61it/s]

steps= 274


 95%|█████████▍| 4737/5000 [22:32<02:21,  1.86it/s]

steps= 174


 95%|█████████▍| 4738/5000 [22:33<02:24,  1.81it/s]

steps= 148


 95%|█████████▍| 4739/5000 [22:33<02:20,  1.86it/s]

steps= 149


 95%|█████████▍| 4740/5000 [22:34<02:39,  1.63it/s]

steps= 367


 95%|█████████▍| 4741/5000 [22:35<02:48,  1.53it/s]

steps= 260


 95%|█████████▍| 4742/5000 [22:35<02:27,  1.75it/s]

steps= 84


 95%|█████████▍| 4743/5000 [22:36<02:29,  1.72it/s]

steps= 191


 95%|█████████▍| 4744/5000 [22:36<02:27,  1.73it/s]

steps= 212


 95%|█████████▍| 4745/5000 [22:37<02:27,  1.73it/s]

steps= 263


 95%|█████████▍| 4746/5000 [22:37<02:11,  1.93it/s]

steps= 107


 95%|█████████▍| 4747/5000 [22:38<02:05,  2.02it/s]

steps= 142


 95%|█████████▍| 4748/5000 [22:38<01:56,  2.16it/s]

steps= 108


 95%|█████████▍| 4749/5000 [22:39<02:05,  2.00it/s]

steps= 196


 95%|█████████▌| 4750/5000 [22:39<02:16,  1.83it/s]

steps= 192


 95%|█████████▌| 4751/5000 [22:40<02:09,  1.92it/s]

steps= 241


 95%|█████████▌| 4752/5000 [22:40<02:04,  1.99it/s]

steps= 184


 95%|█████████▌| 4753/5000 [22:41<01:43,  2.39it/s]

steps= 63


 95%|█████████▌| 4755/5000 [22:41<01:11,  3.43it/s]

steps= 86
steps= 95


 95%|█████████▌| 4756/5000 [22:42<01:31,  2.68it/s]

steps= 184


 95%|█████████▌| 4757/5000 [22:42<01:25,  2.85it/s]

steps= 120
steps= 14


 95%|█████████▌| 4759/5000 [22:42<01:19,  3.03it/s]

steps= 295


 95%|█████████▌| 4760/5000 [22:43<01:26,  2.76it/s]

steps= 202


 95%|█████████▌| 4761/5000 [22:43<01:32,  2.59it/s]

steps= 144


 95%|█████████▌| 4762/5000 [22:44<02:28,  1.60it/s]

steps= 534


 95%|█████████▌| 4763/5000 [22:45<02:06,  1.87it/s]

steps= 95


 95%|█████████▌| 4764/5000 [22:45<02:09,  1.82it/s]

steps= 186


 95%|█████████▌| 4765/5000 [22:46<02:23,  1.64it/s]

steps= 369


 95%|█████████▌| 4767/5000 [22:47<01:48,  2.14it/s]

steps= 300
steps= 37


 95%|█████████▌| 4768/5000 [22:48<03:12,  1.20it/s]

steps= 680


 95%|█████████▌| 4769/5000 [22:49<02:56,  1.31it/s]

steps= 388


 95%|█████████▌| 4770/5000 [22:50<02:41,  1.43it/s]

steps= 228


 95%|█████████▌| 4771/5000 [22:50<02:26,  1.56it/s]

steps= 113


 95%|█████████▌| 4772/5000 [22:51<02:10,  1.75it/s]

steps= 88


 95%|█████████▌| 4773/5000 [22:52<02:42,  1.40it/s]

steps= 270


 95%|█████████▌| 4774/5000 [22:52<02:30,  1.51it/s]

steps= 278
steps= 10


 96%|█████████▌| 4776/5000 [22:53<01:58,  1.89it/s]

steps= 224


 96%|█████████▌| 4778/5000 [22:53<01:23,  2.64it/s]

steps= 311
steps= 42


 96%|█████████▌| 4779/5000 [22:53<01:07,  3.26it/s]

steps= 58


 96%|█████████▌| 4781/5000 [22:54<00:54,  3.98it/s]

steps= 236
steps= 83


 96%|█████████▌| 4782/5000 [22:54<01:15,  2.88it/s]

steps= 319


 96%|█████████▌| 4783/5000 [22:55<01:19,  2.72it/s]

steps= 187


 96%|█████████▌| 4785/5000 [22:55<00:59,  3.62it/s]

steps= 93
steps= 83


 96%|█████████▌| 4786/5000 [22:55<01:04,  3.31it/s]

steps= 250


 96%|█████████▌| 4787/5000 [22:56<01:21,  2.61it/s]

steps= 430


 96%|█████████▌| 4788/5000 [22:57<01:46,  1.99it/s]

steps= 427


 96%|█████████▌| 4789/5000 [22:57<01:32,  2.28it/s]

steps= 166


 96%|█████████▌| 4790/5000 [22:58<01:35,  2.21it/s]

steps= 400


 96%|█████████▌| 4792/5000 [22:58<01:12,  2.86it/s]

steps= 277
steps= 75


 96%|█████████▌| 4794/5000 [22:58<00:56,  3.67it/s]

steps= 187
steps= 113


 96%|█████████▌| 4795/5000 [22:59<00:52,  3.90it/s]

steps= 182
steps= 64


 96%|█████████▌| 4798/5000 [23:00<00:55,  3.65it/s]

steps= 281
steps= 63


 96%|█████████▌| 4799/5000 [23:01<01:21,  2.48it/s]

steps= 267


 96%|█████████▌| 4801/5000 [23:02<01:23,  2.38it/s]

steps= 223
steps= 53
episode: 4800 episode reward: -1061 eps: 0.6125195967054651 avg reward (last 100): -1112.2079207920792 episode loss:  23429.734
avg reward for last 100 episodes: -1112.2079207920792


 96%|█████████▌| 4802/5000 [23:03<02:13,  1.48it/s]

steps= 414


 96%|█████████▌| 4803/5000 [23:03<01:46,  1.85it/s]

steps= 82


 96%|█████████▌| 4804/5000 [23:04<01:55,  1.69it/s]

steps= 285


 96%|█████████▌| 4805/5000 [23:04<01:35,  2.04it/s]

steps= 131


 96%|█████████▌| 4806/5000 [23:05<01:58,  1.63it/s]

steps= 560


 96%|█████████▌| 4807/5000 [23:05<01:51,  1.74it/s]

steps= 249


 96%|█████████▌| 4809/5000 [23:06<01:14,  2.56it/s]

steps= 128
steps= 23


 96%|█████████▌| 4810/5000 [23:07<01:30,  2.09it/s]

steps= 204


 96%|█████████▌| 4811/5000 [23:08<02:03,  1.54it/s]

steps= 499


 96%|█████████▌| 4812/5000 [23:08<01:38,  1.91it/s]

steps= 76


 96%|█████████▋| 4814/5000 [23:09<01:24,  2.20it/s]

steps= 321
steps= 100


 96%|█████████▋| 4816/5000 [23:09<01:08,  2.70it/s]

steps= 286
steps= 54


 96%|█████████▋| 4818/5000 [23:10<00:50,  3.62it/s]

steps= 90
steps= 69


 96%|█████████▋| 4819/5000 [23:10<01:10,  2.56it/s]

steps= 231


 96%|█████████▋| 4820/5000 [23:11<01:12,  2.47it/s]

steps= 116


 96%|█████████▋| 4821/5000 [23:11<01:02,  2.85it/s]

steps= 61


 96%|█████████▋| 4823/5000 [23:12<00:53,  3.29it/s]

steps= 220
steps= 41


 96%|█████████▋| 4824/5000 [23:12<01:18,  2.24it/s]

steps= 339


 96%|█████████▋| 4825/5000 [23:13<01:16,  2.28it/s]

steps= 84


 97%|█████████▋| 4826/5000 [23:13<01:08,  2.55it/s]

steps= 65


 97%|█████████▋| 4827/5000 [23:13<01:03,  2.72it/s]

steps= 62


 97%|█████████▋| 4828/5000 [23:14<01:18,  2.18it/s]

steps= 155


 97%|█████████▋| 4829/5000 [23:15<01:21,  2.09it/s]

steps= 159


 97%|█████████▋| 4831/5000 [23:15<01:08,  2.47it/s]

steps= 201
steps= 27


 97%|█████████▋| 4832/5000 [23:16<01:41,  1.65it/s]

steps= 509


 97%|█████████▋| 4833/5000 [23:17<01:21,  2.06it/s]

steps= 57


 97%|█████████▋| 4834/5000 [23:17<01:33,  1.77it/s]

steps= 186


 97%|█████████▋| 4835/5000 [23:18<01:25,  1.93it/s]

steps= 69


 97%|█████████▋| 4836/5000 [23:19<01:44,  1.57it/s]

steps= 290


 97%|█████████▋| 4837/5000 [23:19<01:25,  1.90it/s]

steps= 100


 97%|█████████▋| 4838/5000 [23:19<01:15,  2.14it/s]

steps= 102


 97%|█████████▋| 4839/5000 [23:20<01:19,  2.03it/s]

steps= 167


 97%|█████████▋| 4840/5000 [23:21<01:27,  1.83it/s]

steps= 186


 97%|█████████▋| 4841/5000 [23:21<01:27,  1.82it/s]

steps= 150


 97%|█████████▋| 4842/5000 [23:21<01:17,  2.04it/s]

steps= 241


 97%|█████████▋| 4843/5000 [23:22<01:11,  2.19it/s]

steps= 180


 97%|█████████▋| 4844/5000 [23:22<01:04,  2.41it/s]

steps= 95


 97%|█████████▋| 4845/5000 [23:23<01:32,  1.68it/s]

steps= 219


 97%|█████████▋| 4846/5000 [23:24<01:55,  1.33it/s]

steps= 352
steps= 31


 97%|█████████▋| 4848/5000 [23:25<01:31,  1.66it/s]

steps= 201


 97%|█████████▋| 4849/5000 [23:26<01:37,  1.55it/s]

steps= 168


 97%|█████████▋| 4850/5000 [23:26<01:20,  1.87it/s]

steps= 34


 97%|█████████▋| 4851/5000 [23:26<01:09,  2.14it/s]

steps= 80


 97%|█████████▋| 4852/5000 [23:27<01:06,  2.21it/s]

steps= 98


 97%|█████████▋| 4853/5000 [23:27<01:08,  2.14it/s]

steps= 201


 97%|█████████▋| 4854/5000 [23:28<01:08,  2.14it/s]

steps= 217


 97%|█████████▋| 4855/5000 [23:28<01:05,  2.23it/s]

steps= 280


 97%|█████████▋| 4856/5000 [23:28<01:06,  2.16it/s]

steps= 253


 97%|█████████▋| 4857/5000 [23:30<01:43,  1.38it/s]

steps= 519


 97%|█████████▋| 4858/5000 [23:31<01:48,  1.31it/s]

steps= 298


 97%|█████████▋| 4859/5000 [23:31<01:39,  1.42it/s]

steps= 163


 97%|█████████▋| 4860/5000 [23:32<02:01,  1.15it/s]

steps= 529
steps= 66


 97%|█████████▋| 4862/5000 [23:33<01:28,  1.55it/s]

steps= 132


 97%|█████████▋| 4863/5000 [23:33<01:17,  1.76it/s]

steps= 195


 97%|█████████▋| 4865/5000 [23:33<00:49,  2.70it/s]

steps= 181
steps= 103


 97%|█████████▋| 4866/5000 [23:34<00:39,  3.41it/s]

steps= 57
steps= 16


 97%|█████████▋| 4868/5000 [23:34<00:32,  4.10it/s]

steps= 156
steps= 20


 97%|█████████▋| 4871/5000 [23:34<00:28,  4.53it/s]

steps= 178
steps= 25


 97%|█████████▋| 4872/5000 [23:35<00:41,  3.06it/s]

steps= 123


 97%|█████████▋| 4874/5000 [23:36<00:38,  3.25it/s]

steps= 176
steps= 41
steps= 46


 98%|█████████▊| 4876/5000 [23:37<00:48,  2.54it/s]

steps= 481


 98%|█████████▊| 4877/5000 [23:37<00:44,  2.74it/s]

steps= 89


 98%|█████████▊| 4878/5000 [23:38<00:50,  2.41it/s]

steps= 143


 98%|█████████▊| 4879/5000 [23:38<00:48,  2.50it/s]

steps= 172
steps= 23


 98%|█████████▊| 4881/5000 [23:38<00:38,  3.08it/s]

steps= 112


 98%|█████████▊| 4882/5000 [23:39<00:37,  3.11it/s]

steps= 78


 98%|█████████▊| 4883/5000 [23:39<00:49,  2.35it/s]

steps= 245


 98%|█████████▊| 4884/5000 [23:40<01:00,  1.92it/s]

steps= 254


 98%|█████████▊| 4885/5000 [23:41<01:00,  1.89it/s]

steps= 161


 98%|█████████▊| 4886/5000 [23:41<01:03,  1.79it/s]

steps= 218


 98%|█████████▊| 4887/5000 [23:42<00:54,  2.06it/s]

steps= 89


 98%|█████████▊| 4888/5000 [23:42<00:54,  2.06it/s]

steps= 161


 98%|█████████▊| 4889/5000 [23:42<00:51,  2.15it/s]

steps= 151


 98%|█████████▊| 4890/5000 [23:43<00:48,  2.26it/s]

steps= 126


 98%|█████████▊| 4891/5000 [23:43<00:43,  2.51it/s]

steps= 82


 98%|█████████▊| 4892/5000 [23:44<00:48,  2.21it/s]

steps= 199


 98%|█████████▊| 4894/5000 [23:44<00:40,  2.60it/s]

steps= 202
steps= 60


 98%|█████████▊| 4895/5000 [23:45<00:46,  2.25it/s]

steps= 147


 98%|█████████▊| 4896/5000 [23:45<00:47,  2.19it/s]

steps= 155


 98%|█████████▊| 4897/5000 [23:46<00:50,  2.03it/s]

steps= 181


 98%|█████████▊| 4899/5000 [23:47<00:38,  2.59it/s]

steps= 160
steps= 46


 98%|█████████▊| 4900/5000 [23:47<00:38,  2.59it/s]

steps= 98


 98%|█████████▊| 4901/5000 [23:48<00:39,  2.52it/s]

steps= 116
episode: 4900 episode reward: -1124 eps: 0.6064246216537515 avg reward (last 100): -976.3762376237623 episode loss:  26633.945
avg reward for last 100 episodes: -976.3762376237623
steps= 58


 98%|█████████▊| 4903/5000 [23:48<00:29,  3.34it/s]

steps= 94


 98%|█████████▊| 4904/5000 [23:48<00:29,  3.23it/s]

steps= 125
steps= 7


 98%|█████████▊| 4906/5000 [23:48<00:23,  4.01it/s]

steps= 119


 98%|█████████▊| 4908/5000 [23:49<00:21,  4.27it/s]

steps= 160
steps= 94


 98%|█████████▊| 4909/5000 [23:50<00:39,  2.28it/s]

steps= 422


 98%|█████████▊| 4910/5000 [23:50<00:39,  2.29it/s]

steps= 183


 98%|█████████▊| 4911/5000 [23:52<01:24,  1.06it/s]

steps= 905


 98%|█████████▊| 4912/5000 [23:53<01:13,  1.20it/s]

steps= 283


 98%|█████████▊| 4914/5000 [23:54<00:49,  1.72it/s]

steps= 253
steps= 59


 98%|█████████▊| 4915/5000 [23:54<00:49,  1.72it/s]

steps= 210


 98%|█████████▊| 4916/5000 [23:55<00:46,  1.82it/s]

steps= 174


 98%|█████████▊| 4917/5000 [23:55<00:47,  1.73it/s]

steps= 234


 98%|█████████▊| 4918/5000 [23:56<00:41,  1.99it/s]

steps= 169


 98%|█████████▊| 4919/5000 [23:56<00:34,  2.36it/s]

steps= 82


 98%|█████████▊| 4920/5000 [23:57<00:37,  2.11it/s]

steps= 236


 98%|█████████▊| 4921/5000 [23:57<00:31,  2.47it/s]

steps= 153
steps= 

 98%|█████████▊| 4922/5000 [23:57<00:26,  2.91it/s]

47


 98%|█████████▊| 4923/5000 [23:58<00:29,  2.64it/s]

steps= 258


 98%|█████████▊| 4924/5000 [23:58<00:25,  2.99it/s]

steps= 124


 98%|█████████▊| 4925/5000 [23:58<00:24,  3.08it/s]

steps= 189


 99%|█████████▊| 4926/5000 [23:58<00:24,  3.01it/s]

steps= 103


 99%|█████████▊| 4927/5000 [23:59<00:36,  2.03it/s]

steps= 248


 99%|█████████▊| 4928/5000 [24:00<00:33,  2.15it/s]

steps= 109


 99%|█████████▊| 4929/5000 [24:00<00:36,  1.96it/s]

steps= 290


 99%|█████████▊| 4930/5000 [24:02<00:52,  1.32it/s]

steps= 628


 99%|█████████▊| 4931/5000 [24:03<01:01,  1.12it/s]

steps= 529


 99%|█████████▊| 4932/5000 [24:04<01:03,  1.07it/s]

steps= 430


 99%|█████████▊| 4933/5000 [24:04<00:54,  1.23it/s]

steps= 180


 99%|█████████▊| 4934/5000 [24:05<00:52,  1.25it/s]

steps= 299


 99%|█████████▊| 4935/5000 [24:06<00:47,  1.37it/s]

steps= 191


 99%|█████████▊| 4936/5000 [24:07<00:49,  1.28it/s]

steps= 486


 99%|█████████▊| 4937/5000 [24:08<00:57,  1.09it/s]

steps= 583


 99%|█████████▉| 4938/5000 [24:09<01:05,  1.06s/it]

steps= 489


 99%|█████████▉| 4940/5000 [24:10<00:44,  1.35it/s]

steps= 356
steps= 48


 99%|█████████▉| 4941/5000 [24:11<00:48,  1.22it/s]

steps= 414


 99%|█████████▉| 4942/5000 [24:12<00:38,  1.52it/s]

steps= 83


 99%|█████████▉| 4943/5000 [24:12<00:31,  1.80it/s]

steps= 83


 99%|█████████▉| 4944/5000 [24:12<00:29,  1.88it/s]

steps= 197


 99%|█████████▉| 4945/5000 [24:13<00:24,  2.28it/s]

steps= 57


 99%|█████████▉| 4946/5000 [24:13<00:30,  1.78it/s]

steps= 267


 99%|█████████▉| 4947/5000 [24:14<00:31,  1.68it/s]

steps= 438


 99%|█████████▉| 4948/5000 [24:16<00:46,  1.11it/s]

steps= 741


 99%|█████████▉| 4949/5000 [24:17<00:54,  1.07s/it]

steps= 656


 99%|█████████▉| 4950/5000 [24:17<00:40,  1.22it/s]

steps= 82


 99%|█████████▉| 4951/5000 [24:18<00:31,  1.56it/s]

steps= 128


 99%|█████████▉| 4952/5000 [24:18<00:30,  1.55it/s]

steps= 258


 99%|█████████▉| 4953/5000 [24:19<00:29,  1.57it/s]

steps= 292


 99%|█████████▉| 4954/5000 [24:19<00:23,  1.93it/s]

steps= 122


 99%|█████████▉| 4956/5000 [24:20<00:17,  2.58it/s]

steps= 297
steps= 122
steps= 42


 99%|█████████▉| 4958/5000 [24:20<00:13,  3.02it/s]

steps= 186


 99%|█████████▉| 4959/5000 [24:21<00:16,  2.42it/s]

steps= 183


 99%|█████████▉| 4960/5000 [24:21<00:17,  2.27it/s]

steps= 321


 99%|█████████▉| 4961/5000 [24:22<00:21,  1.77it/s]

steps= 491


 99%|█████████▉| 4962/5000 [24:23<00:28,  1.32it/s]

steps= 716


 99%|█████████▉| 4963/5000 [24:24<00:22,  1.63it/s]

steps= 125


 99%|█████████▉| 4964/5000 [24:24<00:19,  1.81it/s]

steps= 111


 99%|█████████▉| 4965/5000 [24:25<00:21,  1.59it/s]

steps= 315


 99%|█████████▉| 4966/5000 [24:25<00:21,  1.58it/s]

steps= 232


 99%|█████████▉| 4967/5000 [24:26<00:18,  1.81it/s]

steps= 96


 99%|█████████▉| 4968/5000 [24:26<00:17,  1.79it/s]

steps= 230


 99%|█████████▉| 4969/5000 [24:27<00:15,  2.06it/s]

steps= 92


 99%|█████████▉| 4970/5000 [24:27<00:12,  2.32it/s]

steps= 80


 99%|█████████▉| 4971/5000 [24:27<00:11,  2.44it/s]

steps= 119


 99%|█████████▉| 4972/5000 [24:28<00:15,  1.80it/s]

steps= 349


 99%|█████████▉| 4973/5000 [24:29<00:15,  1.72it/s]

steps= 328


 99%|█████████▉| 4974/5000 [24:29<00:13,  1.87it/s]

steps= 158


100%|█████████▉| 4975/5000 [24:30<00:12,  2.06it/s]

steps= 182


100%|█████████▉| 4976/5000 [24:30<00:10,  2.21it/s]

steps= 198


100%|█████████▉| 4978/5000 [24:31<00:08,  2.58it/s]

steps= 340
steps= 26


100%|█████████▉| 4979/5000 [24:31<00:10,  2.09it/s]

steps= 400


100%|█████████▉| 4981/5000 [24:32<00:07,  2.67it/s]

steps= 151
steps= 58


100%|█████████▉| 4982/5000 [24:32<00:06,  2.82it/s]

steps= 76


100%|█████████▉| 4983/5000 [24:33<00:09,  1.79it/s]

steps= 589


100%|█████████▉| 4984/5000 [24:34<00:09,  1.73it/s]

steps= 185
steps= 9


100%|█████████▉| 4986/5000 [24:34<00:06,  2.28it/s]

steps= 46


100%|█████████▉| 4987/5000 [24:35<00:05,  2.38it/s]

steps= 191


100%|█████████▉| 4988/5000 [24:35<00:04,  2.46it/s]

steps= 127


100%|█████████▉| 4989/5000 [24:36<00:06,  1.65it/s]

steps= 450


100%|█████████▉| 4990/5000 [24:37<00:06,  1.48it/s]

steps= 302


100%|█████████▉| 4992/5000 [24:37<00:03,  2.24it/s]

steps= 172
steps= 35


100%|█████████▉| 4993/5000 [24:38<00:02,  2.77it/s]

steps= 99


100%|█████████▉| 4995/5000 [24:38<00:01,  2.80it/s]

steps= 278
steps= 52


100%|█████████▉| 4996/5000 [24:39<00:01,  2.91it/s]

steps= 279


100%|█████████▉| 4997/5000 [24:40<00:01,  1.62it/s]

steps= 515


100%|█████████▉| 4998/5000 [24:40<00:01,  1.77it/s]

steps= 97
steps= 

100%|█████████▉| 4999/5000 [24:41<00:00,  2.19it/s]

37


100%|██████████| 5000/5000 [24:41<00:00,  3.38it/s]

steps= 101
gameswon= 225



