In [1]:
import numpy as np
import os
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import math
import random

MIN_LIST_LEN = 8
MAX_LIST_LEN = 8
MAX_STEPS = 210

SUCCESS_REWARD = 2.0
STEP_REWARD = -0.1
SWAP_REWARD = 1.0
INVALID_ACTION_REWARD = -10.0

EPS_START = 0.5
EPS_END = 0.05
EPS_DECAY = 1000
GAMMA = 0.8
NUM_EPISODES = 100000
EPISODES_SAVE = 1000
OUTPUT_DIR = 'datasets/rl_sort_transformer_easy/list6_transformer3_128_gamma08_step210_v1'

# Define the vocabulary
vocab = {
    'Comparison': 0,
    'Swap': 1,
    '0': 2,
    '1': 3,
    '2': 4,
    '3': 5,
    '4': 6,
    '5': 7,
    '6': 8,
    '7': 9,
    'less': 10,
    'equal': 11,
    'more': 12,
    'len1': 13,
    'len2': 14,
    'len3': 15,
    'len4': 16,
    'len5': 17,
    'len6': 18,
    'len7': 19,
    'len8': 20,
}
inv_vocab = {v: k for k, v in vocab.items()}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define the environment
class SortingEnv:
    def __init__(self):
        self.max_steps = MAX_STEPS

    def reset(self):
        self.length = random.randint(MIN_LIST_LEN, MAX_LIST_LEN)
        self.list = [random.randint(1, 100) for _ in range(self.length)]
        while self.list == sorted(self.list):
            self.list = [random.randint(1, 100) for _ in range(self.length)]
        self.indices = None
        self.current_step = 0
        self.done = False
        initial_token = 'len{}'.format(self.length)
        return vocab[initial_token], self.list.copy()
    
    def get_list(self):
        return self.list
    
    def get_list_len(self):
        return len(self.list)

    def step(self, action_tokens):
        action = action_tokens[0]
        reward = -0.01  # default penalty
        response_token = None

        if action == vocab['Comparison']:
            if len(action_tokens) != 3:
                reward = INVALID_ACTION_REWARD
                self.done = True
                return response_token, reward, self.done, self.list.copy()
            index1 = action_tokens[1] - vocab['0']
            index2 = action_tokens[2] - vocab['0']
            if index1 >= self.length or index2 >= self.length or index1 < 0 or index2 < 0:
                reward = INVALID_ACTION_REWARD
                self.done = True
                return response_token, reward, self.done, self.list.copy()
            self.indices = (index1, index2)
            if self.list[index1] < self.list[index2]:
                response_token = vocab['less']
                reward = STEP_REWARD
            elif self.list[index1] == self.list[index2]:
                response_token = vocab['equal']
                reward = STEP_REWARD * 2
            else:
                response_token = vocab['more']
                reward = STEP_REWARD
        elif action == vocab['Swap']:
            if self.indices is None:
                reward = INVALID_ACTION_REWARD
                self.done = True
                return response_token, reward, self.done, self.list.copy()
            index1, index2 = self.indices
            prev_list = self.list.copy()
            self.list[index1], self.list[index2] = self.list[index2], self.list[index1]
            if self.list == sorted(self.list):
                reward = SUCCESS_REWARD
                self.done = True
            #elif prev_list[index1] > prev_list[index2] and self.list[index1] <= self.list[index2]:
            #    reward = 0.1
            elif (index1 < index2 and prev_list[index1] > prev_list[index2] and self.list[index1] <= self.list[index2]) or \
                (index1 > index2 and prev_list[index1] < prev_list[index2] and self.list[index1] >= self.list[index2]):
                reward = SWAP_REWARD
            elif (index1 < index2 and prev_list[index1] < prev_list[index2] and self.list[index1] >= self.list[index2]) or \
                (index1 > index2 and prev_list[index1] > prev_list[index2] and self.list[index1] <= self.list[index2]):
                reward = -SWAP_REWARD
            else:
                reward = STEP_REWARD
            self.indices = None
        else:
            reward = INVALID_ACTION_REWARD
            self.done = True

        self.current_step += 1
        if self.current_step >= self.max_steps:
            self.done = True
        return response_token, reward, self.done, self.list.copy()


Using device: cuda


In [2]:
# Positional Encoding for Transformer
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=256):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=0.1)

        pe = torch.zeros(max_len, d_model)  # (max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)  # (max_len, 1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() *
                             (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)  # Even indices
        pe[:, 1::2] = torch.cos(position * div_term)  # Odd indices
        pe = pe.unsqueeze(1)  # (max_len, 1, d_model)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)

# Transformer Model
class TransformerModel(nn.Module):
    def __init__(self, vocab_size, d_model=128, nhead=8, num_layers=3):
        super(TransformerModel, self).__init__()
        self.model_type = 'Transformer'
        self.d_model = d_model

        self.embedding = nn.Embedding(vocab_size, d_model)
        self.pos_encoder = PositionalEncoding(d_model)
        encoder_layers = nn.TransformerEncoderLayer(d_model, nhead)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        self.decoder = nn.Linear(d_model, vocab_size)

        self.init_weights()

    def init_weights(self):
        initrange = 0.1
        nn.init.uniform_(self.embedding.weight, -initrange, initrange)
        nn.init.zeros_(self.decoder.bias)
        nn.init.uniform_(self.decoder.weight, -initrange, initrange)

    def forward(self, src):
        src = self.embedding(src) * math.sqrt(self.d_model)
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src)
        output = self.decoder(output)
        return output

def decode(input_tokens, inv_vocab):
    return ' '.join([inv_vocab[x] for x in input_tokens])


def save_checkpoint(model, optimizer, episode, folder, filename):
    """
    Save the model and optimizer state to the designated filepath.

    Args:
        model (nn.Module): The model to save.
        optimizer (torch.optim.Optimizer): The optimizer whose state to save.
        episode (int): The current episode number.
        filepath (str): The path where to save the checkpoint.
    """
    filepath = os.path.join(folder, filename)
    # Ensure the directory exists
    os.makedirs(os.path.dirname(filepath), exist_ok=True)
    # Save the checkpoint
    torch.save({
        'episode': episode,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
    }, filepath)
    print(f"Checkpoint saved at episode {episode} to {filepath}")

def load_checkpoint(filepath, model, optimizer):
    """
    Load the model and optimizer state from the designated filepath.

    Args:
        filepath (str): The path from where to load the checkpoint.
        model (nn.Module): The model into which to load the state_dict.
        optimizer (torch.optim.Optimizer): The optimizer into which to load the state.

    Returns:
        int: The episode number to resume from.
    """
    if os.path.isfile(filepath):
        checkpoint = torch.load(filepath, map_location=device)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        episode = checkpoint['episode']
        print(f"Checkpoint loaded from {filepath}, resuming from episode {episode}")
        return episode
    else:
        print(f"No checkpoint found at {filepath}, starting from scratch.")
        return 0

In [3]:
# Training Loop
def train(verbose=False):
    # Removed torch.autograd.set_detect_anomaly(True)
    vocab_size = len(vocab)
    model = TransformerModel(vocab_size).to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-4)  # Reduced learning rate
    # Optionally, load a checkpoint
    # load_checkpoint("path_to_checkpoint", model, optimizer)

    episode_cnt = 0
    total_reward = 0.0
    num_successes = 0
    total_steps = 0
    
    for episode in range(NUM_EPISODES):
        t1 = time.time()
        model.train()  # Set model to training mode
        env = SortingEnv()
        initial_token_id, current_list = env.reset()
        input_tokens = [initial_token_id]
        log_probs = []
        rewards = []
        
        state = 'expect_action'
        done = False
        success = False

        while not done and len(input_tokens) < env.max_steps:
            if verbose:
                print(decode(input_tokens, inv_vocab))
                print(env.get_list())
            # Prepare input tensor
            input_seq = torch.tensor(input_tokens, dtype=torch.long, device=device).unsqueeze(1)  # (seq_len, batch_size)
            # Get model output
            with torch.autocast(device_type=device.type, dtype=torch.bfloat16):
                output = model(input_seq)  # (seq_len, batch_size, vocab_size)
                # Get logits for the last token
                logits = output[-1, 0, :]  # (vocab_size)

                # Check for NaNs in logits
                if torch.isnan(logits).any():
                    print(f"Episode {episode}, NaNs in logits before masking.")
                    break

                # Get valid tokens based on state
                def get_valid_tokens(state):
                    action_tokens = [vocab['Comparison'], vocab['Swap']]
                    index_tokens = [vocab[str(i)] for i in range(env.length)]
                    if state == 'expect_action':
                        return action_tokens
                    elif state == 'expect_index1':
                        return index_tokens[:-1]
                    elif state == 'expect_index2':
                        return [x for x in index_tokens if x > input_tokens[-1]]
                    else:
                        # Handle unexpected states by defaulting to expect_action
                        return action_tokens

                valid_token_ids = get_valid_tokens(state)

                # Ensure valid_token_ids are within the vocab range
                if any(idx >= vocab_size or idx < 0 for idx in valid_token_ids):
                    print(f"Episode {episode}, invalid indices in valid_token_ids: {valid_token_ids}")
                    break

                # Mask invalid tokens
                mask_value = -1e9  # Use a large negative value instead of -inf
                mask = torch.full_like(logits, mask_value).to(device)
                mask[valid_token_ids] = 0
                masked_logits = logits + mask

                # Sample action. Have some chance to randomly pick a valid action.
                eps_threshold = EPS_END + (EPS_START - EPS_END) * np.exp(-1.0 * episode / EPS_DECAY)
                if random.random() < eps_threshold:
                    masked_logits = masked_logits / 4

                # Check for NaNs in masked_logits
                if torch.isnan(masked_logits).any():
                    print(f"Episode {episode}, NaNs in masked_logits after masking.")
                    break

                # Compute probabilities
                probs = F.softmax(masked_logits, dim=0)

                # Check for NaNs in probs
                if torch.isnan(probs).any():
                    print(f"Episode {episode}, NaNs in probs after softmax.")
                    break

                try:
                    m = torch.distributions.Categorical(probs)
                    action_token = m.sample()
                    log_prob = m.log_prob(action_token)
                except ValueError as e:
                    print(f"Episode {episode}, error in sampling action: {e}")
                    break

            log_probs.append(log_prob)
            input_tokens.append(action_token.item())

            action = action_token.item()
            reward = 0.0
            if state == 'expect_action':
                if action == vocab['Comparison']:
                    state = 'expect_index1'
                elif action == vocab['Swap']:
                    if env.indices is None:
                        reward = INVALID_ACTION_REWARD
                        rewards.append(reward)
                        done = True
                        continue
                    action_tokens = [vocab['Swap']]
                    response_token, reward, done, current_list = env.step(action_tokens)
                    if done and reward == SUCCESS_REWARD:
                        success = True
                    if verbose:
                        print("Reward:", reward)
                    state = 'expect_action'
                else:
                    reward = INVALID_ACTION_REWARD
                    done = True
            elif state == 'expect_index1':
                index1_token = action_token
                state = 'expect_index2'
            elif state == 'expect_index2':
                index2_token = action_token
                action_tokens = [vocab['Comparison'], index1_token.item(), index2_token.item()]
                response_token, reward, done, current_list = env.step(action_tokens)
                if done and reward == SUCCESS_REWARD:
                    success = True
                if verbose:
                    print("Reward:", reward)
                if response_token is not None:
                    input_tokens.append(response_token)
                state = 'expect_action'
            else:
                reward = INVALID_ACTION_REWARD
                done = True

            rewards.append(reward)
        #
        if success: 
            num_successes += 1
        # Save checkpoint
        if episode > 0 and episode % EPISODES_SAVE == 0:
            avg_reward = total_reward / episode_cnt
            success_rate = num_successes / episode_cnt
            avg_steps = total_steps / episode_cnt
            episode_cnt = 0
            total_reward = 0.0
            num_successes = 0
            total_steps = 0
            save_checkpoint(model, optimizer, episode, OUTPUT_DIR, f"ckpt_{episode}_{success_rate:.4f}_{avg_steps:.2f}.pth")
        #
        assert len(log_probs) == len(rewards), "log_probs and returns have different sizes!"

        if len(log_probs) == 0:
            continue  # Skip if no actions were taken

        # Compute returns and loss within autocast
        with torch.autocast(device_type=device.type, dtype=torch.bfloat16):
            # Compute returns
            returns = []
            R = 0
            gamma = GAMMA
            for r in rewards[::-1]:
                R = r + gamma * R
                returns.insert(0, R)
            returns = torch.tensor(returns).to(device)

            # Check for NaNs in returns
            if torch.isnan(returns).any():
                print(f"Episode {episode}, NaNs in returns.")
                continue

            # Compute loss
            loss = 0
            for log_prob, R in zip(log_probs, returns):
                loss -= log_prob * R

            # Check for NaNs in loss
            if torch.isnan(loss):
                print(f"Episode {episode}, NaN in loss.")
                continue

        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping
        optimizer.step()

        episode_cnt += 1
        total_reward += sum(rewards)
        total_steps += len(rewards)
        t2 = time.time()
        if episode % 1 == 0:
            print(f"Episode {episode}, loss:{loss.item():.4f}, {'succeed' if success else 'fail'}, steps:{len(rewards)}, total reward:{sum(rewards):.4f}, {t2-t1} sec")

if __name__ == "__main__":
    train(verbose=False)




Episode 0, loss:-47.1221, fail, steps:8, total reward:-11.2000, 0.3784205913543701 sec
Episode 1, loss:-43.4430, fail, steps:11, total reward:-9.3000, 0.026682376861572266 sec
Episode 2, loss:-5.9498, fail, steps:1, total reward:-10.0000, 0.0031452178955078125 sec
Episode 3, loss:-3.2463, fail, steps:1, total reward:-10.0000, 0.0028829574584960938 sec
Episode 4, loss:-58.3752, fail, steps:46, total reward:-12.4000, 0.10956645011901855 sec
Episode 5, loss:-48.6245, fail, steps:14, total reward:-9.4000, 0.030432939529418945 sec
Episode 6, loss:-7.0790, fail, steps:1, total reward:-10.0000, 0.003426074981689453 sec
Episode 7, loss:-9.6680, fail, steps:1, total reward:-10.0000, 0.002936124801635742 sec
Episode 8, loss:-59.8424, fail, steps:108, total reward:-11.3000, 0.32430577278137207 sec
Episode 9, loss:-91.3233, fail, steps:18, total reward:-10.5000, 0.04279661178588867 sec
Episode 10, loss:-75.9857, fail, steps:137, total reward:-14.4000, 0.300731897354126 sec
Episode 11, loss:-13.047

Episode 92, loss:-1.4131, fail, steps:160, total reward:3.1000, 0.3614506721496582 sec
Episode 93, loss:-32.2701, fail, steps:68, total reward:-5.0000, 0.16035079956054688 sec
Episode 94, loss:3.8853, fail, steps:158, total reward:-0.1000, 0.3439908027648926 sec
Episode 95, loss:1.9031, fail, steps:159, total reward:1.0000, 0.35780858993530273 sec
Episode 96, loss:14.5926, fail, steps:160, total reward:5.1000, 0.3662145137786865 sec
Episode 97, loss:-9.1198, fail, steps:159, total reward:-0.4000, 0.36095690727233887 sec
Episode 98, loss:4.9960, fail, steps:160, total reward:3.1000, 0.3644695281982422 sec
Episode 99, loss:14.1326, fail, steps:160, total reward:5.1000, 0.3646979331970215 sec
Episode 100, loss:-42.0095, fail, steps:12, total reward:-8.3000, 0.039658308029174805 sec
Episode 101, loss:-6.5579, fail, steps:159, total reward:1.0000, 0.33654212951660156 sec
Episode 102, loss:3.0482, fail, steps:159, total reward:3.0000, 0.35297703742980957 sec
Episode 103, loss:28.0112, fail, 

Episode 185, loss:6.1520, fail, steps:161, total reward:5.1000, 0.3470301628112793 sec
Episode 186, loss:21.1964, fail, steps:160, total reward:8.1000, 0.3567042350769043 sec
Episode 187, loss:28.2703, fail, steps:161, total reward:8.2000, 0.3638033866882324 sec
Episode 188, loss:25.4871, succeed, steps:98, total reward:9.0000, 0.22624754905700684 sec
Episode 189, loss:-1.6484, fail, steps:159, total reward:1.8000, 0.3533635139465332 sec
Episode 190, loss:4.7634, fail, steps:160, total reward:3.3000, 0.3625655174255371 sec
Episode 191, loss:11.6588, fail, steps:159, total reward:4.0000, 0.36226654052734375 sec
Episode 192, loss:34.4433, succeed, steps:89, total reward:9.4000, 0.20630335807800293 sec
Episode 193, loss:3.6548, succeed, steps:123, total reward:2.7000, 0.27242517471313477 sec
Episode 194, loss:-35.7212, fail, steps:53, total reward:-2.6000, 0.1222066879272461 sec
Episode 195, loss:-91.0948, fail, steps:37, total reward:-9.0000, 0.08188509941101074 sec
Episode 196, loss:31.

Episode 277, loss:-52.6767, fail, steps:152, total reward:-7.8000, 0.34328532218933105 sec
Episode 278, loss:18.8166, fail, steps:161, total reward:8.2000, 0.3677396774291992 sec
Episode 279, loss:19.0662, succeed, steps:86, total reward:6.4000, 0.1991269588470459 sec
Episode 280, loss:-6.9821, fail, steps:160, total reward:1.0000, 0.35135746002197266 sec
Episode 281, loss:15.6804, fail, steps:160, total reward:6.1000, 0.3603394031524658 sec
Episode 282, loss:18.0852, fail, steps:160, total reward:6.1000, 0.3626575469970703 sec
Episode 283, loss:17.3528, succeed, steps:78, total reward:4.6000, 0.18213653564453125 sec
Episode 284, loss:-44.6498, fail, steps:46, total reward:-5.3000, 0.10414361953735352 sec
Episode 285, loss:16.7439, succeed, steps:101, total reward:5.7000, 0.21490168571472168 sec
Episode 286, loss:8.1719, succeed, steps:136, total reward:3.7000, 0.30690455436706543 sec
Episode 287, loss:2.1538, fail, steps:160, total reward:4.1000, 0.3542780876159668 sec
Episode 288, lo

Episode 369, loss:32.2590, fail, steps:161, total reward:11.2000, 0.3565375804901123 sec
Episode 370, loss:6.5928, succeed, steps:151, total reward:3.2000, 0.3475058078765869 sec
Episode 371, loss:13.8859, fail, steps:161, total reward:7.5000, 0.36415982246398926 sec
Episode 372, loss:28.0582, fail, steps:161, total reward:11.2000, 0.36769556999206543 sec
Episode 373, loss:27.7265, succeed, steps:137, total reward:10.9000, 0.3146188259124756 sec
Episode 374, loss:10.6717, succeed, steps:56, total reward:4.2000, 0.13204097747802734 sec
Episode 375, loss:4.6930, fail, steps:159, total reward:2.0000, 0.3445546627044678 sec
Episode 376, loss:28.8073, fail, steps:161, total reward:12.2000, 0.36161017417907715 sec
Episode 377, loss:10.1685, fail, steps:160, total reward:4.2000, 0.36710453033447266 sec
Episode 378, loss:26.0534, fail, steps:161, total reward:9.1000, 0.36586642265319824 sec
Episode 379, loss:-1.3785, fail, steps:159, total reward:2.0000, 0.3620188236236572 sec
Episode 380, los

Episode 461, loss:21.7363, fail, steps:161, total reward:9.1000, 0.3675568103790283 sec
Episode 462, loss:14.9144, fail, steps:161, total reward:6.2000, 0.36817455291748047 sec
Episode 463, loss:9.1429, fail, steps:160, total reward:4.4000, 0.3656649589538574 sec
Episode 464, loss:10.7552, fail, steps:160, total reward:5.9000, 0.3653066158294678 sec
Episode 465, loss:22.3099, succeed, steps:159, total reward:8.1000, 0.3678879737854004 sec
Episode 466, loss:14.9040, succeed, steps:92, total reward:6.1000, 0.21511220932006836 sec
Episode 467, loss:14.5270, succeed, steps:138, total reward:5.7000, 0.35044384002685547 sec
Episode 468, loss:12.2457, succeed, steps:98, total reward:6.0000, 0.22340774536132812 sec
Episode 469, loss:18.9113, fail, steps:161, total reward:9.1000, 0.35526418685913086 sec
Episode 470, loss:22.0128, succeed, steps:122, total reward:8.3000, 0.28003358840942383 sec
Episode 471, loss:14.1866, fail, steps:160, total reward:5.3000, 0.35936474800109863 sec
Episode 472, 

Episode 553, loss:8.0648, fail, steps:159, total reward:4.0000, 0.33330869674682617 sec
Episode 554, loss:25.8879, succeed, steps:133, total reward:10.0000, 0.2993905544281006 sec
Episode 555, loss:-4.2376, fail, steps:158, total reward:-0.1000, 0.3519115447998047 sec
Episode 556, loss:0.8909, fail, steps:159, total reward:2.0000, 0.35846972465515137 sec
Episode 557, loss:19.1943, succeed, steps:108, total reward:6.7000, 0.24741005897521973 sec
Episode 558, loss:36.2043, fail, steps:161, total reward:11.2000, 0.3584556579589844 sec
Episode 559, loss:7.0604, succeed, steps:116, total reward:4.8000, 0.2655367851257324 sec
Episode 560, loss:27.8051, fail, steps:161, total reward:10.2000, 0.36435651779174805 sec
Episode 561, loss:17.8118, succeed, steps:70, total reward:5.9000, 0.1638939380645752 sec
Episode 562, loss:23.4351, succeed, steps:116, total reward:8.5000, 0.2548365592956543 sec
Episode 563, loss:4.5945, fail, steps:160, total reward:4.1000, 0.35453319549560547 sec
Episode 564, 

Episode 644, loss:20.9854, fail, steps:161, total reward:10.2000, 0.3634064197540283 sec
Episode 645, loss:17.8122, succeed, steps:141, total reward:9.8000, 0.32183051109313965 sec
Episode 646, loss:-55.0454, fail, steps:52, total reward:-5.5000, 0.12294435501098633 sec
Episode 647, loss:4.0867, fail, steps:159, total reward:4.0000, 0.3441340923309326 sec
Episode 648, loss:22.1549, succeed, steps:127, total reward:10.2000, 0.2915630340576172 sec
Episode 649, loss:18.1597, succeed, steps:159, total reward:8.1000, 0.3562660217285156 sec
Episode 650, loss:27.5423, succeed, steps:148, total reward:9.5000, 0.33654260635375977 sec
Episode 651, loss:22.5019, fail, steps:161, total reward:8.7000, 0.3645820617675781 sec
Episode 652, loss:24.7007, succeed, steps:152, total reward:10.4000, 0.34815263748168945 sec
Episode 653, loss:14.2533, succeed, steps:107, total reward:5.7000, 0.24642014503479004 sec
Episode 654, loss:37.9680, succeed, steps:123, total reward:15.5000, 0.27491068840026855 sec
E

Episode 735, loss:8.8879, succeed, steps:92, total reward:7.3000, 0.21184325218200684 sec
Episode 736, loss:16.9030, fail, steps:160, total reward:7.7000, 0.3526298999786377 sec
Episode 737, loss:33.7031, fail, steps:161, total reward:11.2000, 0.36161351203918457 sec
Episode 738, loss:12.4450, succeed, steps:138, total reward:5.7000, 0.32094454765319824 sec
Episode 739, loss:13.1257, succeed, steps:60, total reward:5.2000, 0.14266729354858398 sec
Episode 740, loss:26.4669, fail, steps:161, total reward:10.2000, 0.3489062786102295 sec
Episode 741, loss:33.4476, succeed, steps:157, total reward:12.3000, 0.3520927429199219 sec
Episode 742, loss:30.5121, fail, steps:161, total reward:11.1000, 0.44426965713500977 sec
Episode 743, loss:16.9716, fail, steps:160, total reward:8.1000, 0.3937654495239258 sec
Episode 744, loss:20.4636, fail, steps:160, total reward:8.1000, 0.4191858768463135 sec
Episode 745, loss:26.7950, fail, steps:161, total reward:10.2000, 0.3996551036834717 sec
Episode 746, 

Episode 827, loss:12.1688, succeed, steps:154, total reward:6.2000, 0.3572394847869873 sec
Episode 828, loss:13.6986, succeed, steps:51, total reward:5.5000, 0.12496352195739746 sec
Episode 829, loss:26.3913, succeed, steps:128, total reward:8.1000, 0.28232741355895996 sec
Episode 830, loss:-8.8565, fail, steps:160, total reward:3.1000, 0.3562958240509033 sec
Episode 831, loss:9.7548, fail, steps:159, total reward:4.0000, 0.35915422439575195 sec
Episode 832, loss:15.2978, fail, steps:160, total reward:8.1000, 0.3628668785095215 sec
Episode 833, loss:18.1958, succeed, steps:150, total reward:8.4000, 0.34180212020874023 sec
Episode 834, loss:23.7690, succeed, steps:113, total reward:8.6000, 0.2636606693267822 sec
Episode 835, loss:-44.6201, fail, steps:42, total reward:-3.1000, 0.09984970092773438 sec
Episode 836, loss:-60.9603, fail, steps:151, total reward:-2.6000, 0.32506728172302246 sec
Episode 837, loss:14.1963, fail, steps:160, total reward:6.1000, 0.35585832595825195 sec
Episode 8

Episode 919, loss:8.1169, fail, steps:159, total reward:4.0000, 0.7760791778564453 sec
Episode 920, loss:-5.6440, fail, steps:160, total reward:3.1000, 0.7433011531829834 sec
Episode 921, loss:12.5408, succeed, steps:89, total reward:6.3000, 0.36060404777526855 sec
Episode 922, loss:17.4988, succeed, steps:136, total reward:6.8000, 0.4662911891937256 sec
Episode 923, loss:-42.8015, fail, steps:147, total reward:-0.4000, 0.6297512054443359 sec
Episode 924, loss:31.1026, fail, steps:161, total reward:12.2000, 0.7401647567749023 sec
Episode 925, loss:16.6563, succeed, steps:65, total reward:7.1000, 0.2659492492675781 sec
Episode 926, loss:24.9767, succeed, steps:109, total reward:10.6000, 0.47057676315307617 sec
Episode 927, loss:5.1705, succeed, steps:127, total reward:4.0000, 0.5489439964294434 sec
Episode 928, loss:-60.4615, fail, steps:8, total reward:-9.2000, 0.04053139686584473 sec
Episode 929, loss:18.0612, fail, steps:161, total reward:8.3000, 0.7105545997619629 sec
Episode 930, l

Episode 1009, loss:17.1723, fail, steps:160, total reward:8.1000, 0.7092633247375488 sec
Episode 1010, loss:19.7840, succeed, steps:139, total reward:8.0000, 0.5789668560028076 sec
Episode 1011, loss:17.5893, fail, steps:160, total reward:6.1000, 0.7602453231811523 sec
Episode 1012, loss:21.8845, succeed, steps:120, total reward:9.1000, 0.5064640045166016 sec
Episode 1013, loss:26.0802, fail, steps:161, total reward:12.2000, 0.691321611404419 sec
Episode 1014, loss:27.2863, fail, steps:161, total reward:11.2000, 0.7731165885925293 sec
Episode 1015, loss:22.0600, succeed, steps:108, total reward:6.7000, 0.44708681106567383 sec
Episode 1016, loss:22.9303, succeed, steps:130, total reward:7.0000, 0.5387547016143799 sec
Episode 1017, loss:30.7310, succeed, steps:95, total reward:12.3000, 0.4655320644378662 sec
Episode 1018, loss:23.1611, succeed, steps:152, total reward:10.2000, 0.6628196239471436 sec
Episode 1019, loss:7.1356, fail, steps:160, total reward:4.9000, 0.6838853359222412 sec
E

Episode 1100, loss:15.1430, succeed, steps:109, total reward:8.8000, 0.5251927375793457 sec
Episode 1101, loss:18.4621, succeed, steps:75, total reward:7.8000, 0.2991471290588379 sec
Episode 1102, loss:21.5537, succeed, steps:79, total reward:8.7000, 0.3189520835876465 sec
Episode 1103, loss:16.0917, fail, steps:160, total reward:7.1000, 0.7201845645904541 sec
Episode 1104, loss:27.4639, fail, steps:161, total reward:10.1000, 0.6537179946899414 sec
Episode 1105, loss:42.4151, succeed, steps:150, total reward:17.3000, 0.6351277828216553 sec
Episode 1106, loss:23.8446, fail, steps:161, total reward:10.2000, 0.69158935546875 sec
Episode 1107, loss:18.2271, fail, steps:160, total reward:8.1000, 0.6412107944488525 sec
Episode 1108, loss:4.7413, succeed, steps:83, total reward:3.4000, 0.4525778293609619 sec
Episode 1109, loss:0.9330, fail, steps:159, total reward:2.0000, 0.6341347694396973 sec
Episode 1110, loss:20.3764, succeed, steps:64, total reward:9.1000, 0.26676154136657715 sec
Episode

Episode 1191, loss:20.0531, succeed, steps:85, total reward:8.5000, 0.3458998203277588 sec
Episode 1192, loss:6.7930, succeed, steps:118, total reward:4.3000, 0.5795269012451172 sec
Episode 1193, loss:30.7402, fail, steps:162, total reward:13.2000, 0.6368794441223145 sec
Episode 1194, loss:34.3768, fail, steps:162, total reward:16.3000, 0.8193151950836182 sec
Episode 1195, loss:2.3581, succeed, steps:148, total reward:3.3000, 0.6865019798278809 sec
Episode 1196, loss:10.1697, succeed, steps:133, total reward:6.9000, 0.5324158668518066 sec
Episode 1197, loss:34.4501, fail, steps:162, total reward:16.3000, 0.7404944896697998 sec
Episode 1198, loss:22.0115, fail, steps:161, total reward:9.7000, 0.6847097873687744 sec
Episode 1199, loss:11.4527, succeed, steps:116, total reward:5.4000, 0.521005392074585 sec
Episode 1200, loss:12.7110, fail, steps:160, total reward:7.1000, 0.6323623657226562 sec
Episode 1201, loss:20.9334, succeed, steps:129, total reward:9.1000, 0.5262973308563232 sec
Epis

Episode 1282, loss:21.7914, fail, steps:161, total reward:10.2000, 0.6733949184417725 sec
Episode 1283, loss:25.6119, succeed, steps:131, total reward:11.1000, 0.5949535369873047 sec
Episode 1284, loss:32.2593, fail, steps:162, total reward:14.3000, 0.7184059619903564 sec
Episode 1285, loss:15.2925, fail, steps:160, total reward:8.1000, 0.6420547962188721 sec
Episode 1286, loss:7.7942, fail, steps:160, total reward:6.1000, 0.7283885478973389 sec
Episode 1287, loss:24.0653, fail, steps:161, total reward:12.2000, 0.6684112548828125 sec
Episode 1288, loss:16.5700, succeed, steps:156, total reward:8.2000, 0.6718628406524658 sec
Episode 1289, loss:11.8421, succeed, steps:98, total reward:6.0000, 0.3832731246948242 sec
Episode 1290, loss:17.7212, succeed, steps:85, total reward:8.5000, 0.4221055507659912 sec
Episode 1291, loss:4.6935, fail, steps:159, total reward:4.0000, 0.6727123260498047 sec
Episode 1292, loss:15.1305, fail, steps:160, total reward:8.1000, 0.6785216331481934 sec
Episode 1

Episode 1373, loss:13.0165, fail, steps:160, total reward:7.1000, 0.7797422409057617 sec
Episode 1374, loss:16.0204, succeed, steps:51, total reward:8.6000, 0.2165849208831787 sec
Episode 1375, loss:21.6290, succeed, steps:113, total reward:11.7000, 0.5232408046722412 sec
Episode 1376, loss:26.8299, succeed, steps:80, total reward:12.8000, 0.3254814147949219 sec
Episode 1377, loss:4.5718, fail, steps:160, total reward:5.0000, 0.7339317798614502 sec
Episode 1378, loss:14.3338, succeed, steps:104, total reward:8.6000, 0.49139904975891113 sec
Episode 1379, loss:15.7794, succeed, steps:75, total reward:7.8000, 0.29748106002807617 sec
Episode 1380, loss:13.9310, succeed, steps:39, total reward:9.0000, 0.15540480613708496 sec
Episode 1381, loss:17.9763, succeed, steps:156, total reward:8.0000, 0.6968405246734619 sec
Episode 1382, loss:10.3641, succeed, steps:88, total reward:5.1000, 0.3521130084991455 sec
Episode 1383, loss:27.0364, fail, steps:161, total reward:12.2000, 0.7705507278442383 s

Episode 1464, loss:16.0521, succeed, steps:115, total reward:10.6000, 0.531785249710083 sec
Episode 1465, loss:8.9256, fail, steps:160, total reward:5.0000, 0.633887767791748 sec
Episode 1466, loss:15.7336, succeed, steps:141, total reward:8.7000, 0.6749279499053955 sec
Episode 1467, loss:18.6229, fail, steps:162, total reward:11.2000, 0.6490025520324707 sec
Episode 1468, loss:11.4052, fail, steps:160, total reward:6.1000, 0.6782190799713135 sec
Episode 1469, loss:28.4989, succeed, steps:145, total reward:12.7000, 0.6619336605072021 sec
Episode 1470, loss:9.6811, fail, steps:160, total reward:8.1000, 0.6603739261627197 sec
Episode 1471, loss:15.9501, succeed, steps:135, total reward:8.9000, 0.6379945278167725 sec
Episode 1472, loss:10.3109, succeed, steps:64, total reward:6.1000, 0.2621605396270752 sec
Episode 1473, loss:10.8810, succeed, steps:94, total reward:5.0000, 0.45426464080810547 sec
Episode 1474, loss:4.5928, fail, steps:160, total reward:5.0000, 0.6811048984527588 sec
Episod

Episode 1555, loss:12.7095, succeed, steps:152, total reward:10.4000, 0.7174112796783447 sec
Episode 1556, loss:17.3286, succeed, steps:102, total reward:6.9000, 0.41445302963256836 sec
Episode 1557, loss:12.5055, fail, steps:160, total reward:7.6000, 0.7521862983703613 sec
Episode 1558, loss:8.8289, fail, steps:160, total reward:8.1000, 0.7160568237304688 sec
Episode 1559, loss:9.1929, fail, steps:160, total reward:6.1000, 0.6388754844665527 sec
Episode 1560, loss:0.6821, fail, steps:159, total reward:3.0000, 0.74422287940979 sec
Episode 1561, loss:14.9786, fail, steps:160, total reward:8.1000, 0.6513032913208008 sec
Episode 1562, loss:5.5954, succeed, steps:86, total reward:6.3000, 0.54506516456604 sec
Episode 1563, loss:18.2428, succeed, steps:134, total reward:11.0000, 0.528691291809082 sec
Episode 1564, loss:8.0858, succeed, steps:44, total reward:4.7000, 0.20078659057617188 sec
Episode 1565, loss:17.1270, succeed, steps:159, total reward:10.6000, 0.6635799407958984 sec
Episode 15

Episode 1646, loss:16.9244, succeed, steps:128, total reward:8.1000, 0.6146280765533447 sec
Episode 1647, loss:2.6808, fail, steps:159, total reward:2.0000, 0.6519505977630615 sec
Episode 1648, loss:16.1432, fail, steps:161, total reward:9.1000, 0.680767297744751 sec
Episode 1649, loss:4.2616, succeed, steps:95, total reward:3.0000, 0.4689309597015381 sec
Episode 1650, loss:8.7961, fail, steps:160, total reward:8.1000, 0.6710846424102783 sec
Episode 1651, loss:9.4349, succeed, steps:108, total reward:6.7000, 0.4763963222503662 sec
Episode 1652, loss:20.9577, succeed, steps:141, total reward:11.8000, 0.6012651920318604 sec
Episode 1653, loss:8.5007, succeed, steps:124, total reward:7.2000, 0.5635874271392822 sec
Episode 1654, loss:9.7746, fail, steps:160, total reward:7.1000, 0.6361331939697266 sec
Episode 1655, loss:18.1319, succeed, steps:105, total reward:9.9000, 0.5277218818664551 sec
Episode 1656, loss:31.9220, succeed, steps:133, total reward:16.2000, 0.5263826847076416 sec
Episod

Episode 1737, loss:8.2966, fail, steps:160, total reward:4.5000, 0.6363422870635986 sec
Episode 1738, loss:7.7136, succeed, steps:133, total reward:6.8000, 0.5382781028747559 sec
Episode 1739, loss:-78.2732, fail, steps:59, total reward:-4.7000, 0.24198031425476074 sec
Episode 1740, loss:7.3069, succeed, steps:73, total reward:5.8000, 0.39674901962280273 sec
Episode 1741, loss:17.7908, fail, steps:160, total reward:8.1000, 0.6660680770874023 sec
Episode 1742, loss:21.3645, succeed, steps:107, total reward:11.9000, 0.4420483112335205 sec
Episode 1743, loss:6.8274, fail, steps:159, total reward:3.0000, 0.7174520492553711 sec
Episode 1744, loss:16.5777, fail, steps:161, total reward:10.2000, 0.6763570308685303 sec
Episode 1745, loss:8.6444, fail, steps:160, total reward:8.1000, 0.6878278255462646 sec
Episode 1746, loss:12.7716, fail, steps:160, total reward:7.1000, 0.7406201362609863 sec
Episode 1747, loss:-1.4534, fail, steps:159, total reward:2.0000, 0.6545836925506592 sec
Episode 1748,

Episode 1828, loss:6.6310, fail, steps:160, total reward:8.1000, 0.7439861297607422 sec
Episode 1829, loss:6.5440, succeed, steps:70, total reward:9.0000, 0.29265713691711426 sec
Episode 1830, loss:20.3855, succeed, steps:113, total reward:8.6000, 0.46491336822509766 sec
Episode 1831, loss:5.1451, succeed, steps:55, total reward:6.4000, 0.3146789073944092 sec
Episode 1832, loss:16.7257, succeed, steps:112, total reward:10.7000, 0.4418497085571289 sec
Episode 1833, loss:12.2622, fail, steps:161, total reward:11.2000, 0.6338887214660645 sec
Episode 1834, loss:20.9626, succeed, steps:156, total reward:17.5000, 0.7772519588470459 sec
Episode 1835, loss:14.9582, succeed, steps:123, total reward:12.4000, 0.5686647891998291 sec
Episode 1836, loss:14.2080, fail, steps:160, total reward:8.1000, 0.6360499858856201 sec
Episode 1837, loss:15.9672, succeed, steps:137, total reward:10.9000, 0.6406221389770508 sec
Episode 1838, loss:13.7590, succeed, steps:160, total reward:13.3000, 0.670255422592163

Episode 1919, loss:13.1951, fail, steps:161, total reward:12.2000, 0.7510333061218262 sec
Episode 1920, loss:13.2154, succeed, steps:90, total reward:10.4000, 0.37533998489379883 sec
Episode 1921, loss:-38.6665, fail, steps:9, total reward:-8.2000, 0.09447026252746582 sec
Episode 1922, loss:15.1563, succeed, steps:114, total reward:12.7000, 0.48100805282592773 sec
Episode 1923, loss:16.1313, succeed, steps:120, total reward:9.4000, 0.4777517318725586 sec
Episode 1924, loss:14.3623, fail, steps:160, total reward:7.1000, 0.7571732997894287 sec
Episode 1925, loss:6.7679, succeed, steps:40, total reward:3.8000, 0.17040681838989258 sec
Episode 1926, loss:21.9572, fail, steps:161, total reward:12.2000, 0.7174065113067627 sec
Episode 1927, loss:-47.5339, fail, steps:55, total reward:-2.5000, 0.2237236499786377 sec
Episode 1928, loss:14.2042, fail, steps:161, total reward:12.2000, 0.7214152812957764 sec
Episode 1929, loss:18.6956, succeed, steps:149, total reward:13.6000, 0.6052131652832031 se

Episode 2008, loss:19.1903, succeed, steps:104, total reward:12.0000, 0.4544827938079834 sec
Episode 2009, loss:16.5711, succeed, steps:124, total reward:10.3000, 0.5256617069244385 sec
Episode 2010, loss:4.6875, fail, steps:159, total reward:3.9000, 0.6480803489685059 sec
Episode 2011, loss:10.6018, fail, steps:161, total reward:9.1000, 0.6928708553314209 sec
Episode 2012, loss:26.4963, succeed, steps:112, total reward:16.9000, 0.45090317726135254 sec
Episode 2013, loss:21.9479, succeed, steps:143, total reward:16.9000, 0.6774969100952148 sec
Episode 2014, loss:23.2330, succeed, steps:149, total reward:13.6000, 0.6319363117218018 sec
Episode 2015, loss:-1.5625, fail, steps:159, total reward:2.0000, 0.6843857765197754 sec
Episode 2016, loss:10.8007, fail, steps:161, total reward:9.8000, 0.7344403266906738 sec
Episode 2017, loss:11.3508, succeed, steps:130, total reward:7.0000, 0.5291824340820312 sec
Episode 2018, loss:12.3928, fail, steps:160, total reward:8.1000, 0.7310512065887451 se

Episode 2099, loss:10.7174, succeed, steps:101, total reward:8.8000, 0.5045502185821533 sec
Episode 2100, loss:11.9182, fail, steps:160, total reward:8.1000, 0.6367583274841309 sec
Episode 2101, loss:8.1357, succeed, steps:101, total reward:9.0000, 0.44218015670776367 sec
Episode 2102, loss:9.9103, fail, steps:160, total reward:6.1000, 0.6806912422180176 sec
Episode 2103, loss:20.2841, fail, steps:162, total reward:14.3000, 0.7032911777496338 sec
Episode 2104, loss:15.6646, fail, steps:161, total reward:11.2000, 0.6429953575134277 sec
Episode 2105, loss:13.7353, fail, steps:160, total reward:6.1000, 0.7407326698303223 sec
Episode 2106, loss:17.0108, succeed, steps:90, total reward:6.9000, 0.36422300338745117 sec
Episode 2107, loss:16.6768, succeed, steps:153, total reward:11.4000, 0.6937389373779297 sec
Episode 2108, loss:7.0994, succeed, steps:128, total reward:7.8000, 0.5319454669952393 sec
Episode 2109, loss:15.8511, fail, steps:161, total reward:11.1000, 0.7303051948547363 sec
Epis

Episode 2190, loss:4.0481, succeed, steps:98, total reward:6.0000, 0.5086910724639893 sec
Episode 2191, loss:8.3375, fail, steps:160, total reward:6.7000, 0.6704635620117188 sec
Episode 2192, loss:11.9213, succeed, steps:121, total reward:10.4000, 0.4879791736602783 sec
Episode 2193, loss:8.0445, succeed, steps:121, total reward:7.3000, 0.5381989479064941 sec
Episode 2194, loss:11.3257, succeed, steps:158, total reward:10.2000, 0.6140451431274414 sec
Episode 2195, loss:5.1246, fail, steps:160, total reward:5.0000, 0.7314989566802979 sec
Episode 2196, loss:5.0417, fail, steps:160, total reward:5.0000, 0.6788415908813477 sec
Episode 2197, loss:11.6800, fail, steps:160, total reward:7.1000, 0.6545755863189697 sec
Episode 2198, loss:6.5528, succeed, steps:125, total reward:7.8000, 0.4975602626800537 sec
Episode 2199, loss:11.1537, fail, steps:161, total reward:11.2000, 0.7327241897583008 sec
Episode 2200, loss:20.6433, fail, steps:162, total reward:13.2000, 0.6476774215698242 sec
Episode 2

Episode 2281, loss:5.0476, succeed, steps:147, total reward:7.8000, 0.700620174407959 sec
Episode 2282, loss:8.9478, succeed, steps:134, total reward:11.0000, 0.5225317478179932 sec
Episode 2283, loss:23.9118, succeed, steps:144, total reward:14.7000, 0.606041669845581 sec
Episode 2284, loss:18.0246, succeed, steps:65, total reward:13.3000, 0.2704348564147949 sec
Episode 2285, loss:7.2981, succeed, steps:145, total reward:6.5000, 0.6687426567077637 sec
Episode 2286, loss:9.0785, succeed, steps:112, total reward:10.7000, 0.46771788597106934 sec
Episode 2287, loss:21.1757, succeed, steps:120, total reward:15.4000, 0.5421297550201416 sec
Episode 2288, loss:2.5925, fail, steps:159, total reward:4.0000, 0.6743881702423096 sec
Episode 2289, loss:12.5857, succeed, steps:138, total reward:11.9000, 0.5499069690704346 sec
Episode 2290, loss:15.0307, succeed, steps:128, total reward:11.2000, 0.591015100479126 sec
Episode 2291, loss:4.8286, succeed, steps:43, total reward:3.7000, 0.152390718460083

Episode 2371, loss:18.5365, succeed, steps:138, total reward:11.9000, 0.6292300224304199 sec
Episode 2372, loss:19.9208, fail, steps:161, total reward:11.2000, 0.6549413204193115 sec
Episode 2373, loss:12.1625, succeed, steps:117, total reward:12.4000, 0.5756800174713135 sec
Episode 2374, loss:6.2072, fail, steps:160, total reward:7.7000, 0.6449368000030518 sec
Episode 2375, loss:13.2619, succeed, steps:139, total reward:12.9000, 0.5973083972930908 sec
Episode 2376, loss:9.0034, succeed, steps:149, total reward:10.5000, 0.7806131839752197 sec
Episode 2377, loss:16.1863, fail, steps:161, total reward:11.7000, 0.6446676254272461 sec
Episode 2378, loss:26.3454, succeed, steps:135, total reward:15.1000, 0.6330161094665527 sec
Episode 2379, loss:17.4032, succeed, steps:124, total reward:10.3000, 0.5045082569122314 sec
Episode 2380, loss:12.0932, succeed, steps:102, total reward:13.1000, 0.511699914932251 sec
Episode 2381, loss:19.1899, succeed, steps:113, total reward:17.9000, 0.46999740600

Episode 2462, loss:10.1367, succeed, steps:155, total reward:10.3000, 0.7170119285583496 sec
Episode 2463, loss:10.0099, succeed, steps:136, total reward:9.9000, 0.5584321022033691 sec
Episode 2464, loss:8.5551, succeed, steps:104, total reward:8.6000, 0.48807382583618164 sec
Episode 2465, loss:7.0524, succeed, steps:109, total reward:7.6000, 0.431485652923584 sec
Episode 2466, loss:8.7383, fail, steps:161, total reward:11.2000, 0.6824436187744141 sec
Episode 2467, loss:16.5447, succeed, steps:115, total reward:10.6000, 0.5223047733306885 sec
Episode 2468, loss:7.9076, fail, steps:160, total reward:8.1000, 0.6721386909484863 sec
Episode 2469, loss:4.5003, succeed, steps:92, total reward:9.3000, 0.43738555908203125 sec
Episode 2470, loss:6.7488, succeed, steps:82, total reward:5.5000, 0.32618093490600586 sec
Episode 2471, loss:16.0226, succeed, steps:143, total reward:10.7000, 0.6521205902099609 sec
Episode 2472, loss:5.5107, fail, steps:161, total reward:9.1000, 0.6439399719238281 sec


Episode 2553, loss:10.0795, succeed, steps:86, total reward:12.6000, 0.3351314067840576 sec
Episode 2554, loss:13.0084, succeed, steps:80, total reward:9.7000, 0.3891603946685791 sec
Episode 2555, loss:6.0107, succeed, steps:57, total reward:8.4000, 0.23109650611877441 sec
Episode 2556, loss:12.5666, fail, steps:160, total reward:7.8000, 0.7051122188568115 sec
Episode 2557, loss:5.2107, fail, steps:160, total reward:7.9000, 0.6390683650970459 sec
Episode 2558, loss:17.2986, succeed, steps:118, total reward:13.6000, 0.5652916431427002 sec
Episode 2559, loss:9.4384, succeed, steps:72, total reward:7.9000, 0.2891690731048584 sec
Episode 2560, loss:11.5076, succeed, steps:96, total reward:10.2000, 0.40849804878234863 sec
Episode 2561, loss:24.4260, succeed, steps:149, total reward:13.6000, 0.5953245162963867 sec
Episode 2562, loss:9.3106, succeed, steps:128, total reward:11.2000, 0.6103610992431641 sec
Episode 2563, loss:2.4448, fail, steps:160, total reward:7.1000, 0.6472055912017822 sec


Episode 2644, loss:9.4636, succeed, steps:108, total reward:6.7000, 0.5426042079925537 sec
Episode 2645, loss:17.5983, succeed, steps:86, total reward:12.6000, 0.34021711349487305 sec
Episode 2646, loss:9.0306, fail, steps:161, total reward:11.2000, 0.6941661834716797 sec
Episode 2647, loss:4.9595, succeed, steps:137, total reward:4.7000, 0.5756301879882812 sec
Episode 2648, loss:10.3332, succeed, steps:98, total reward:9.1000, 0.4688832759857178 sec
Episode 2649, loss:9.9832, succeed, steps:124, total reward:10.3000, 0.4935331344604492 sec
Episode 2650, loss:12.3346, succeed, steps:121, total reward:10.4000, 0.5926847457885742 sec
Episode 2651, loss:12.4622, fail, steps:162, total reward:13.2000, 0.6858081817626953 sec
Episode 2652, loss:10.9681, fail, steps:161, total reward:11.2000, 0.7107138633728027 sec
Episode 2653, loss:8.7923, fail, steps:161, total reward:10.9000, 0.7228732109069824 sec
Episode 2654, loss:10.4776, fail, steps:161, total reward:9.1000, 0.6545295715332031 sec
Ep

Episode 2735, loss:9.4527, succeed, steps:135, total reward:8.9000, 0.6293642520904541 sec
Episode 2736, loss:11.2334, fail, steps:161, total reward:10.2000, 0.6455678939819336 sec
Episode 2737, loss:16.5820, succeed, steps:131, total reward:14.2000, 0.6550416946411133 sec
Episode 2738, loss:13.7370, succeed, steps:68, total reward:6.9000, 0.2821221351623535 sec
Episode 2739, loss:5.7287, succeed, steps:52, total reward:6.5000, 0.17495059967041016 sec
Episode 2740, loss:8.7823, succeed, steps:62, total reward:7.2000, 0.3267652988433838 sec
Episode 2741, loss:7.5436, succeed, steps:71, total reward:6.9000, 0.28278470039367676 sec
Episode 2742, loss:13.6346, succeed, steps:122, total reward:14.5000, 0.49651432037353516 sec
Episode 2743, loss:4.4038, fail, steps:160, total reward:6.1000, 0.7096514701843262 sec
Episode 2744, loss:11.4171, succeed, steps:114, total reward:9.3000, 0.5300610065460205 sec
Episode 2745, loss:6.8646, succeed, steps:112, total reward:7.6000, 0.4537196159362793 se

Episode 2825, loss:9.7870, fail, steps:161, total reward:10.2000, 0.6906657218933105 sec
Episode 2826, loss:6.8083, succeed, steps:76, total reward:8.5000, 0.3171713352203369 sec
Episode 2827, loss:21.1546, fail, steps:162, total reward:14.3000, 0.7521069049835205 sec
Episode 2828, loss:3.1472, succeed, steps:107, total reward:5.7000, 0.42969632148742676 sec
Episode 2829, loss:11.8078, succeed, steps:80, total reward:9.7000, 0.4058561325073242 sec
Episode 2830, loss:18.0337, fail, steps:163, total reward:17.3000, 0.6157617568969727 sec
Episode 2831, loss:18.5019, succeed, steps:133, total reward:13.1000, 0.5654792785644531 sec
Episode 2832, loss:17.9055, succeed, steps:108, total reward:15.8000, 0.4883277416229248 sec
Episode 2833, loss:3.7744, fail, steps:159, total reward:3.0000, 0.6298892498016357 sec
Episode 2834, loss:7.3118, fail, steps:161, total reward:10.2000, 0.7246980667114258 sec
Episode 2835, loss:18.0019, succeed, steps:73, total reward:8.9000, 0.29695582389831543 sec
Epi

Episode 2916, loss:2.3123, succeed, steps:112, total reward:8.4000, 0.4954555034637451 sec
Episode 2917, loss:11.4339, succeed, steps:150, total reward:11.5000, 0.6443119049072266 sec
Episode 2918, loss:14.3502, succeed, steps:85, total reward:11.6000, 0.35695409774780273 sec
Episode 2919, loss:7.4352, succeed, steps:109, total reward:10.8000, 0.48610687255859375 sec
Episode 2920, loss:12.3662, succeed, steps:150, total reward:14.4000, 0.6710190773010254 sec
Episode 2921, loss:4.5539, succeed, steps:148, total reward:6.4000, 0.5916931629180908 sec
Episode 2922, loss:17.3298, succeed, steps:72, total reward:10.9000, 0.30866003036499023 sec
Episode 2923, loss:4.7939, fail, steps:160, total reward:6.5000, 0.671440839767456 sec
Episode 2924, loss:8.2322, succeed, steps:117, total reward:12.6000, 0.483076810836792 sec
Episode 2925, loss:14.6620, succeed, steps:91, total reward:14.5000, 0.39162516593933105 sec
Episode 2926, loss:2.7342, succeed, steps:52, total reward:3.4000, 0.2069325447082

Episode 3005, loss:6.7352, succeed, steps:61, total reward:6.2000, 0.31436729431152344 sec
Episode 3006, loss:10.5671, succeed, steps:107, total reward:15.0000, 0.4179084300994873 sec
Episode 3007, loss:3.5553, succeed, steps:137, total reward:7.2000, 0.6463773250579834 sec
Episode 3008, loss:13.2380, fail, steps:161, total reward:11.8000, 0.6811478137969971 sec
Episode 3009, loss:6.8217, fail, steps:161, total reward:10.2000, 0.6836802959442139 sec
Episode 3010, loss:2.8756, succeed, steps:55, total reward:6.4000, 0.2628300189971924 sec
Episode 3011, loss:8.3025, fail, steps:160, total reward:8.1000, 0.628542423248291 sec
Episode 3012, loss:6.5838, fail, steps:161, total reward:10.2000, 0.7759201526641846 sec
Episode 3013, loss:8.4302, succeed, steps:100, total reward:11.1000, 0.46011805534362793 sec
Episode 3014, loss:6.0026, fail, steps:160, total reward:8.1000, 0.6327793598175049 sec
Episode 3015, loss:8.8889, succeed, steps:110, total reward:8.7000, 0.5438096523284912 sec
Episode 

Episode 3096, loss:5.2461, succeed, steps:61, total reward:9.3000, 0.2504761219024658 sec
Episode 3097, loss:10.8456, succeed, steps:159, total reward:11.2000, 0.6974084377288818 sec
Episode 3098, loss:6.6922, succeed, steps:139, total reward:9.2000, 0.5619876384735107 sec
Episode 3099, loss:11.2968, fail, steps:162, total reward:13.3000, 0.7401025295257568 sec
Episode 3100, loss:11.8974, succeed, steps:134, total reward:17.1000, 0.48537230491638184 sec
Episode 3101, loss:16.4036, fail, steps:163, total reward:18.4000, 0.7172305583953857 sec
Episode 3102, loss:13.1916, fail, steps:162, total reward:14.3000, 0.7811098098754883 sec
Episode 3103, loss:7.4873, succeed, steps:116, total reward:11.6000, 0.5219993591308594 sec
Episode 3104, loss:4.0612, succeed, steps:60, total reward:8.3000, 0.24480247497558594 sec
Episode 3105, loss:8.4222, succeed, steps:72, total reward:10.9000, 0.2912757396697998 sec
Episode 3106, loss:14.8549, succeed, steps:150, total reward:17.7000, 0.6640563011169434

Episode 3186, loss:4.0853, fail, steps:161, total reward:9.1000, 0.7634561061859131 sec
Episode 3187, loss:6.1685, succeed, steps:110, total reward:11.8000, 0.44692468643188477 sec
Episode 3188, loss:16.1343, succeed, steps:99, total reward:13.2000, 0.4728422164916992 sec
Episode 3189, loss:8.1465, succeed, steps:150, total reward:11.4000, 0.6250250339508057 sec
Episode 3190, loss:3.1027, fail, steps:160, total reward:7.6000, 0.6795563697814941 sec
Episode 3191, loss:7.8118, succeed, steps:80, total reward:9.7000, 0.3667130470275879 sec
Episode 3192, loss:8.7884, succeed, steps:117, total reward:9.5000, 0.45981454849243164 sec
Episode 3193, loss:13.5761, fail, steps:161, total reward:11.2000, 0.7314002513885498 sec
Episode 3194, loss:14.9301, succeed, steps:111, total reward:12.8000, 0.4685478210449219 sec
Episode 3195, loss:7.1028, fail, steps:160, total reward:6.1000, 0.6995186805725098 sec
Episode 3196, loss:6.8120, succeed, steps:79, total reward:8.7000, 0.3247857093811035 sec
Epis

Episode 3277, loss:5.3011, succeed, steps:61, total reward:6.2000, 0.23887181282043457 sec
Episode 3278, loss:15.6670, succeed, steps:138, total reward:11.9000, 0.5307941436767578 sec
Episode 3279, loss:4.6328, succeed, steps:122, total reward:8.3000, 0.5381817817687988 sec
Episode 3280, loss:11.6102, succeed, steps:133, total reward:13.1000, 0.5404980182647705 sec
Episode 3281, loss:11.6294, succeed, steps:61, total reward:9.3000, 0.3166332244873047 sec
Episode 3282, loss:4.9919, fail, steps:161, total reward:8.6000, 0.6317260265350342 sec
Episode 3283, loss:3.7208, fail, steps:161, total reward:10.2000, 0.7303595542907715 sec
Episode 3284, loss:13.9662, succeed, steps:156, total reward:14.4000, 0.7141361236572266 sec
Episode 3285, loss:3.9273, succeed, steps:65, total reward:7.1000, 0.26929163932800293 sec
Episode 3286, loss:8.1937, succeed, steps:148, total reward:12.6000, 0.6654489040374756 sec
Episode 3287, loss:7.4774, succeed, steps:158, total reward:10.2000, 0.632326602935791 s

Episode 3368, loss:2.4845, succeed, steps:154, total reward:5.5000, 0.6982975006103516 sec
Episode 3369, loss:12.1438, fail, steps:162, total reward:14.3000, 0.678321123123169 sec
Episode 3370, loss:10.1754, succeed, steps:132, total reward:12.1000, 0.6093127727508545 sec
Episode 3371, loss:0.3655, succeed, steps:117, total reward:10.6000, 0.512812614440918 sec
Episode 3372, loss:4.3789, succeed, steps:145, total reward:3.4000, 0.6397769451141357 sec
Episode 3373, loss:6.1491, succeed, steps:77, total reward:9.8000, 0.2917623519897461 sec
Episode 3374, loss:8.6918, succeed, steps:143, total reward:13.6000, 0.6256728172302246 sec
Episode 3375, loss:0.7068, fail, steps:159, total reward:1.1000, 0.7212707996368408 sec
Episode 3376, loss:6.6710, succeed, steps:124, total reward:6.8000, 0.49680662155151367 sec
Episode 3377, loss:4.5434, fail, steps:160, total reward:6.1000, 0.7225382328033447 sec
Episode 3378, loss:15.8449, succeed, steps:93, total reward:13.4000, 0.3775618076324463 sec
Epi

Episode 3459, loss:5.7134, fail, steps:161, total reward:11.2000, 0.7431480884552002 sec
Episode 3460, loss:8.0836, fail, steps:161, total reward:11.2000, 0.6805629730224609 sec
Episode 3461, loss:11.3109, succeed, steps:95, total reward:9.0000, 0.3775475025177002 sec
Episode 3462, loss:7.2921, succeed, steps:47, total reward:7.7000, 0.1877448558807373 sec
Episode 3463, loss:16.4424, succeed, steps:121, total reward:16.6000, 0.5190432071685791 sec
Episode 3464, loss:15.2481, succeed, steps:109, total reward:10.8000, 0.4281578063964844 sec
Episode 3465, loss:6.8381, succeed, steps:145, total reward:9.6000, 0.5840179920196533 sec
Episode 3466, loss:2.8562, succeed, steps:62, total reward:7.1000, 0.3054018020629883 sec
Episode 3467, loss:10.3831, succeed, steps:156, total reward:11.1000, 0.612316370010376 sec
Episode 3468, loss:10.9964, succeed, steps:108, total reward:12.9000, 0.5484235286712646 sec
Episode 3469, loss:9.8504, succeed, steps:105, total reward:9.9000, 0.41954565048217773 s

Episode 3550, loss:7.6932, succeed, steps:84, total reward:10.6000, 0.3199796676635742 sec
Episode 3551, loss:9.4592, succeed, steps:48, total reward:8.7000, 0.2582371234893799 sec
Episode 3552, loss:5.3240, succeed, steps:88, total reward:8.4000, 0.3420886993408203 sec
Episode 3553, loss:13.6289, succeed, steps:138, total reward:15.0000, 0.5540177822113037 sec
Episode 3554, loss:3.6413, succeed, steps:54, total reward:5.4000, 0.2855191230773926 sec
Episode 3555, loss:5.0823, succeed, steps:108, total reward:12.9000, 0.42162609100341797 sec
Episode 3556, loss:8.5307, succeed, steps:139, total reward:12.9000, 0.6313352584838867 sec
Episode 3557, loss:5.7459, succeed, steps:88, total reward:11.5000, 0.3524909019470215 sec
Episode 3558, loss:9.2662, succeed, steps:62, total reward:7.2000, 0.24358606338500977 sec
Episode 3559, loss:13.4437, succeed, steps:159, total reward:14.3000, 0.6449058055877686 sec
Episode 3560, loss:14.3249, succeed, steps:102, total reward:10.0000, 0.40705847740173

Episode 3641, loss:14.0316, succeed, steps:126, total reward:9.2000, 0.5176727771759033 sec
Episode 3642, loss:12.5833, succeed, steps:125, total reward:14.4000, 0.49604058265686035 sec
Episode 3643, loss:15.5682, succeed, steps:118, total reward:13.6000, 0.5892574787139893 sec
Episode 3644, loss:7.0970, fail, steps:160, total reward:6.1000, 0.6356310844421387 sec
Episode 3645, loss:11.5017, succeed, steps:135, total reward:12.0000, 0.6334936618804932 sec
Episode 3646, loss:7.2127, succeed, steps:72, total reward:11.0000, 0.29016661643981934 sec
Episode 3647, loss:7.4190, succeed, steps:139, total reward:9.3000, 0.6175906658172607 sec
Episode 3648, loss:3.6415, succeed, steps:107, total reward:8.8000, 0.4411933422088623 sec
Episode 3649, loss:9.3622, succeed, steps:120, total reward:9.4000, 0.5216085910797119 sec
Episode 3650, loss:7.5109, succeed, steps:116, total reward:11.6000, 0.5065274238586426 sec
Episode 3651, loss:12.1811, succeed, steps:113, total reward:11.7000, 0.42358016967

Episode 3731, loss:14.6108, fail, steps:161, total reward:12.2000, 0.7527024745941162 sec
Episode 3732, loss:12.3080, succeed, steps:54, total reward:8.5000, 0.24873733520507812 sec
Episode 3733, loss:10.1897, succeed, steps:137, total reward:10.9000, 0.5348355770111084 sec
Episode 3734, loss:18.3317, fail, steps:161, total reward:12.0000, 0.7419602870941162 sec
Episode 3735, loss:9.4340, succeed, steps:100, total reward:8.0000, 0.40440797805786133 sec
Episode 3736, loss:6.5233, succeed, steps:132, total reward:12.1000, 0.6305623054504395 sec
Episode 3737, loss:10.5814, succeed, steps:102, total reward:13.1000, 0.41182470321655273 sec
Episode 3738, loss:15.6634, succeed, steps:144, total reward:15.9000, 0.6606018543243408 sec
Episode 3739, loss:11.0219, succeed, steps:149, total reward:13.6000, 0.6076879501342773 sec
Episode 3740, loss:8.4844, succeed, steps:88, total reward:8.4000, 0.3624451160430908 sec
Episode 3741, loss:17.0295, succeed, steps:155, total reward:13.4000, 0.615447282

Episode 3821, loss:-58.5507, fail, steps:99, total reward:1.2000, 0.39344310760498047 sec
Episode 3822, loss:8.0438, succeed, steps:102, total reward:10.0000, 0.49564290046691895 sec
Episode 3823, loss:5.2286, succeed, steps:52, total reward:6.5000, 0.2094264030456543 sec
Episode 3824, loss:11.8214, succeed, steps:86, total reward:9.5000, 0.3342735767364502 sec
Episode 3825, loss:6.1526, succeed, steps:106, total reward:10.7000, 0.4680347442626953 sec
Episode 3826, loss:7.1464, succeed, steps:150, total reward:8.4000, 0.618518590927124 sec
Episode 3827, loss:13.7524, fail, steps:162, total reward:13.2000, 0.7100379467010498 sec
Episode 3828, loss:9.9786, succeed, steps:148, total reward:9.5000, 0.6935069561004639 sec
Episode 3829, loss:6.4457, succeed, steps:97, total reward:8.1000, 0.390378475189209 sec
Episode 3830, loss:6.9514, succeed, steps:65, total reward:7.1000, 0.2620546817779541 sec
Episode 3831, loss:13.1821, succeed, steps:153, total reward:14.5000, 0.7042372226715088 sec
E

Episode 3912, loss:2.4783, succeed, steps:108, total reward:6.7000, 0.42516303062438965 sec
Episode 3913, loss:3.0822, succeed, steps:65, total reward:7.1000, 0.33365321159362793 sec
Episode 3914, loss:5.6769, succeed, steps:80, total reward:9.7000, 0.3136613368988037 sec
Episode 3915, loss:8.0437, succeed, steps:140, total reward:10.8000, 0.6183874607086182 sec
Episode 3916, loss:3.4614, fail, steps:160, total reward:6.1000, 0.6249270439147949 sec
Episode 3917, loss:4.6007, succeed, steps:143, total reward:7.6000, 0.7007350921630859 sec
Episode 3918, loss:7.6387, succeed, steps:134, total reward:11.0000, 0.5885951519012451 sec
Episode 3919, loss:1.9357, succeed, steps:121, total reward:3.6000, 0.5887665748596191 sec
Episode 3920, loss:8.9095, succeed, steps:104, total reward:11.8000, 0.39787817001342773 sec
Episode 3921, loss:3.4375, succeed, steps:108, total reward:6.7000, 0.5067634582519531 sec
Episode 3922, loss:8.4826, succeed, steps:82, total reward:11.7000, 0.335735559463501 sec

Episode 4001, loss:8.3025, succeed, steps:102, total reward:10.0000, 0.4791750907897949 sec
Episode 4002, loss:2.9037, succeed, steps:118, total reward:7.4000, 0.4872305393218994 sec
Episode 4003, loss:5.1821, succeed, steps:85, total reward:11.4000, 0.426055908203125 sec
Episode 4004, loss:10.1846, succeed, steps:116, total reward:11.3000, 0.45951223373413086 sec
Episode 4005, loss:8.3303, succeed, steps:155, total reward:10.3000, 0.7012221813201904 sec
Episode 4006, loss:10.5477, succeed, steps:80, total reward:9.7000, 0.32766151428222656 sec
Episode 4007, loss:12.9622, succeed, steps:119, total reward:14.4000, 0.5587856769561768 sec
Episode 4008, loss:6.6775, succeed, steps:152, total reward:10.1000, 0.63578200340271 sec
Episode 4009, loss:4.8154, succeed, steps:123, total reward:9.3000, 0.5598046779632568 sec
Episode 4010, loss:9.4342, succeed, steps:125, total reward:11.3000, 0.6073164939880371 sec
Episode 4011, loss:3.6237, succeed, steps:64, total reward:6.1000, 0.25898933410644

Episode 4091, loss:16.8462, succeed, steps:90, total reward:16.6000, 0.4581301212310791 sec
Episode 4092, loss:16.2826, succeed, steps:154, total reward:15.5000, 0.6080293655395508 sec
Episode 4093, loss:3.6340, succeed, steps:100, total reward:4.9000, 0.4054088592529297 sec
Episode 4094, loss:14.6632, succeed, steps:125, total reward:17.5000, 0.5234065055847168 sec
Episode 4095, loss:7.8624, fail, steps:161, total reward:9.8000, 0.7209343910217285 sec
Episode 4096, loss:6.9353, succeed, steps:129, total reward:12.2000, 0.5657787322998047 sec
Episode 4097, loss:3.7924, succeed, steps:34, total reward:7.1000, 0.14177155494689941 sec
Episode 4098, loss:10.1905, fail, steps:162, total reward:15.3000, 0.7354345321655273 sec
Episode 4099, loss:5.9390, succeed, steps:95, total reward:6.1000, 0.38303136825561523 sec
Episode 4100, loss:5.7583, succeed, steps:123, total reward:12.4000, 0.5560617446899414 sec
Episode 4101, loss:13.5944, succeed, steps:114, total reward:12.6000, 0.455327272415161

Episode 4182, loss:1.3807, succeed, steps:152, total reward:4.2000, 0.6991217136383057 sec
Episode 4183, loss:11.1443, succeed, steps:110, total reward:14.9000, 0.4464261531829834 sec
Episode 4184, loss:18.1749, fail, steps:162, total reward:15.3000, 0.7734687328338623 sec
Episode 4185, loss:11.1330, succeed, steps:138, total reward:14.9000, 0.60652756690979 sec
Episode 4186, loss:10.4477, succeed, steps:150, total reward:8.4000, 0.6485676765441895 sec
Episode 4187, loss:12.1976, fail, steps:162, total reward:14.3000, 0.7437732219696045 sec
Episode 4188, loss:8.0691, fail, steps:160, total reward:7.1000, 0.6826343536376953 sec
Episode 4189, loss:12.2403, fail, steps:161, total reward:9.1000, 0.6611051559448242 sec
Episode 4190, loss:15.3596, fail, steps:162, total reward:16.3000, 0.7500967979431152 sec
Episode 4191, loss:5.6488, succeed, steps:110, total reward:8.7000, 0.4816102981567383 sec
Episode 4192, loss:8.6069, succeed, steps:133, total reward:13.1000, 0.5377781391143799 sec
Epi

Episode 4273, loss:8.7270, succeed, steps:113, total reward:11.5000, 0.43907713890075684 sec
Episode 4274, loss:6.0512, succeed, steps:143, total reward:10.7000, 0.5842161178588867 sec
Episode 4275, loss:10.1522, succeed, steps:101, total reward:15.2000, 0.39986276626586914 sec
Episode 4276, loss:6.0706, succeed, steps:133, total reward:12.7000, 0.6131057739257812 sec
Episode 4277, loss:10.1095, succeed, steps:101, total reward:12.1000, 0.4101850986480713 sec
Episode 4278, loss:5.8784, succeed, steps:77, total reward:9.8000, 0.42322731018066406 sec
Episode 4279, loss:11.4289, succeed, steps:159, total reward:14.3000, 0.6299052238464355 sec
Episode 4280, loss:8.8447, fail, steps:161, total reward:10.2000, 0.7788891792297363 sec
Episode 4281, loss:6.5710, succeed, steps:95, total reward:9.2000, 0.3955700397491455 sec
Episode 4282, loss:13.6752, succeed, steps:144, total reward:17.9000, 0.6648304462432861 sec
Episode 4283, loss:4.0429, succeed, steps:72, total reward:7.9000, 0.30016064643

Episode 4363, loss:9.7727, succeed, steps:93, total reward:10.3000, 0.4550294876098633 sec
Episode 4364, loss:4.8344, succeed, steps:66, total reward:7.9000, 0.26303696632385254 sec
Episode 4365, loss:5.1259, fail, steps:161, total reward:10.2000, 0.7364888191223145 sec
Episode 4366, loss:7.3141, succeed, steps:135, total reward:12.0000, 0.5386888980865479 sec
Episode 4367, loss:6.4640, succeed, steps:68, total reward:10.1000, 0.32775449752807617 sec
Episode 4368, loss:14.9038, succeed, steps:124, total reward:16.5000, 0.4924652576446533 sec
Episode 4369, loss:5.7085, succeed, steps:81, total reward:10.5000, 0.32360339164733887 sec
Episode 4370, loss:10.4549, succeed, steps:139, total reward:16.0000, 0.6361596584320068 sec
Episode 4371, loss:9.4446, succeed, steps:117, total reward:6.4000, 0.4684407711029053 sec
Episode 4372, loss:7.8213, fail, steps:161, total reward:10.2000, 0.730891227722168 sec
Episode 4373, loss:6.0912, succeed, steps:133, total reward:10.0000, 0.5270349979400635 

Episode 4453, loss:8.0098, succeed, steps:102, total reward:16.2000, 0.42429494857788086 sec
Episode 4454, loss:3.9687, succeed, steps:142, total reward:9.7000, 0.6626613140106201 sec
Episode 4455, loss:6.7795, succeed, steps:114, total reward:12.7000, 0.44028449058532715 sec
Episode 4456, loss:5.6300, succeed, steps:140, total reward:13.9000, 0.6165626049041748 sec
Episode 4457, loss:3.7609, succeed, steps:116, total reward:8.5000, 0.5072002410888672 sec
Episode 4458, loss:2.4918, succeed, steps:116, total reward:5.4000, 0.4612388610839844 sec
Episode 4459, loss:5.6527, fail, steps:161, total reward:11.2000, 0.721482515335083 sec
Episode 4460, loss:14.1169, succeed, steps:122, total reward:17.6000, 0.4889349937438965 sec
Episode 4461, loss:4.8361, succeed, steps:85, total reward:5.4000, 0.42334413528442383 sec
Episode 4462, loss:7.5052, succeed, steps:86, total reward:6.1000, 0.3476450443267822 sec
Episode 4463, loss:6.9633, succeed, steps:76, total reward:11.9000, 0.3247950077056885 

Episode 4543, loss:7.6244, succeed, steps:88, total reward:8.4000, 0.34059596061706543 sec
Episode 4544, loss:6.9979, fail, steps:161, total reward:9.1000, 0.7290408611297607 sec
Episode 4545, loss:5.4713, succeed, steps:124, total reward:10.3000, 0.47831106185913086 sec
Episode 4546, loss:4.3493, succeed, steps:135, total reward:8.9000, 0.6021959781646729 sec
Episode 4547, loss:13.5827, succeed, steps:108, total reward:12.9000, 0.47255921363830566 sec
Episode 4548, loss:8.7632, succeed, steps:88, total reward:11.5000, 0.34806060791015625 sec
Episode 4549, loss:3.3939, fail, steps:161, total reward:9.1000, 0.7323298454284668 sec
Episode 4550, loss:7.2442, succeed, steps:91, total reward:8.0000, 0.36652040481567383 sec
Episode 4551, loss:10.8479, succeed, steps:140, total reward:13.9000, 0.6568002700805664 sec
Episode 4552, loss:13.4512, fail, steps:161, total reward:12.2000, 0.6389093399047852 sec
Episode 4553, loss:8.2167, fail, steps:160, total reward:7.1000, 0.7309823036193848 sec
E

Episode 4633, loss:1.9896, fail, steps:160, total reward:5.0000, 0.6866803169250488 sec
Episode 4634, loss:6.8240, succeed, steps:127, total reward:9.9000, 0.6224203109741211 sec
Episode 4635, loss:3.4217, succeed, steps:79, total reward:8.7000, 0.32498884201049805 sec
Episode 4636, loss:6.4755, succeed, steps:81, total reward:7.6000, 0.31948375701904297 sec
Episode 4637, loss:12.6615, succeed, steps:103, total reward:14.1000, 0.4932839870452881 sec
Episode 4638, loss:4.5717, succeed, steps:78, total reward:6.3000, 0.3089730739593506 sec
Episode 4639, loss:16.2942, succeed, steps:160, total reward:15.3000, 0.6905977725982666 sec
Episode 4640, loss:6.0255, succeed, steps:83, total reward:15.8000, 0.3419513702392578 sec
Episode 4641, loss:3.4092, succeed, steps:39, total reward:5.9000, 0.15593814849853516 sec
Episode 4642, loss:6.8959, succeed, steps:87, total reward:10.4000, 0.329636812210083 sec
Episode 4643, loss:5.4494, succeed, steps:79, total reward:14.9000, 0.40253520011901855 sec

Episode 4724, loss:10.1296, succeed, steps:123, total reward:15.5000, 0.604271650314331 sec
Episode 4725, loss:3.7905, succeed, steps:47, total reward:7.7000, 0.18557024002075195 sec
Episode 4726, loss:11.7312, succeed, steps:106, total reward:14.0000, 0.4726855754852295 sec
Episode 4727, loss:1.9360, succeed, steps:53, total reward:7.5000, 0.21213245391845703 sec
Episode 4728, loss:6.8980, succeed, steps:100, total reward:14.2000, 0.4610319137573242 sec
Episode 4729, loss:7.3647, succeed, steps:106, total reward:7.8000, 0.4262824058532715 sec
Episode 4730, loss:9.6443, succeed, steps:74, total reward:9.9000, 0.29288411140441895 sec
Episode 4731, loss:12.5761, succeed, steps:118, total reward:16.7000, 0.5555894374847412 sec
Episode 4732, loss:16.1111, succeed, steps:139, total reward:16.0000, 0.5500805377960205 sec
Episode 4733, loss:7.8941, fail, steps:161, total reward:9.1000, 0.7305421829223633 sec
Episode 4734, loss:5.5610, succeed, steps:38, total reward:8.0000, 0.1589231491088867

Episode 4814, loss:8.2225, succeed, steps:88, total reward:8.4000, 0.4129960536956787 sec
Episode 4815, loss:7.5353, succeed, steps:123, total reward:9.3000, 0.5026006698608398 sec
Episode 4816, loss:10.2381, succeed, steps:109, total reward:13.9000, 0.5152254104614258 sec
Episode 4817, loss:-0.5267, succeed, steps:128, total reward:12.2000, 0.5398991107940674 sec
Episode 4818, loss:-1.2360, fail, steps:159, total reward:2.6000, 0.7129173278808594 sec
Episode 4819, loss:7.4935, succeed, steps:103, total reward:14.1000, 0.4078793525695801 sec
Episode 4820, loss:6.4575, succeed, steps:79, total reward:11.8000, 0.3997461795806885 sec
Episode 4821, loss:-71.9654, fail, steps:15, total reward:-8.4000, 0.06403422355651855 sec
Episode 4822, loss:7.0384, succeed, steps:143, total reward:13.8000, 0.5604791641235352 sec
Episode 4823, loss:8.8415, succeed, steps:81, total reward:10.6000, 0.45632362365722656 sec
Episode 4824, loss:5.8883, succeed, steps:95, total reward:12.3000, 0.3728106021881103

Episode 4905, loss:5.3273, fail, steps:161, total reward:12.2000, 0.6765401363372803 sec
Episode 4906, loss:11.1536, fail, steps:162, total reward:15.3000, 0.7524693012237549 sec
Episode 4907, loss:7.3933, succeed, steps:112, total reward:10.7000, 0.4520432949066162 sec
Episode 4908, loss:3.9144, succeed, steps:33, total reward:6.1000, 0.1243896484375 sec
Episode 4909, loss:1.3802, succeed, steps:29, total reward:5.2000, 0.22450494766235352 sec
Episode 4910, loss:5.4191, succeed, steps:89, total reward:12.5000, 0.3465449810028076 sec
Episode 4911, loss:13.4798, succeed, steps:105, total reward:16.1000, 0.423839807510376 sec
Episode 4912, loss:6.4263, succeed, steps:103, total reward:14.1000, 0.48751235008239746 sec
Episode 4913, loss:10.5910, succeed, steps:79, total reward:11.8000, 0.31181883811950684 sec
Episode 4914, loss:9.4028, succeed, steps:129, total reward:12.2000, 0.5027711391448975 sec
Episode 4915, loss:11.1956, succeed, steps:116, total reward:14.6000, 0.4622328281402588 s

Episode 4995, loss:3.4707, succeed, steps:114, total reward:9.6000, 0.4223599433898926 sec
Episode 4996, loss:5.8150, succeed, steps:79, total reward:11.8000, 0.31823134422302246 sec
Episode 4997, loss:3.6925, fail, steps:161, total reward:12.2000, 0.716874361038208 sec
Episode 4998, loss:12.0145, succeed, steps:126, total reward:12.3000, 0.5338690280914307 sec
Episode 4999, loss:6.4247, succeed, steps:160, total reward:9.1000, 0.6351470947265625 sec
Checkpoint saved at episode 5000 to datasets/rl_sort_transformer_easy/list6_transformer3_128_gamma08_step210_v1/ckpt_5000_0.8130_117.42.pth
Episode 5000, loss:11.5678, succeed, steps:117, total reward:12.6000, 0.5858635902404785 sec
Episode 5001, loss:4.6508, succeed, steps:78, total reward:7.7000, 0.31148576736450195 sec
Episode 5002, loss:9.5483, succeed, steps:151, total reward:15.6000, 0.6608846187591553 sec
Episode 5003, loss:2.9809, succeed, steps:56, total reward:4.3000, 0.2328333854675293 sec
Episode 5004, loss:8.3504, succeed, ste

Episode 5084, loss:1.1722, succeed, steps:64, total reward:6.0000, 0.2592895030975342 sec
Episode 5085, loss:1.1832, succeed, steps:104, total reward:8.9000, 0.4793088436126709 sec
Episode 5086, loss:7.8677, succeed, steps:100, total reward:11.1000, 0.3963284492492676 sec
Episode 5087, loss:3.3822, succeed, steps:120, total reward:12.2000, 0.47484517097473145 sec
Episode 5088, loss:3.5379, succeed, steps:103, total reward:7.9000, 0.4897143840789795 sec
Episode 5089, loss:3.0110, succeed, steps:88, total reward:8.4000, 0.34882187843322754 sec
Episode 5090, loss:9.4482, succeed, steps:97, total reward:14.3000, 0.44720029830932617 sec
Episode 5091, loss:8.9906, succeed, steps:112, total reward:13.8000, 0.4315974712371826 sec
Episode 5092, loss:2.9124, succeed, steps:158, total reward:10.2000, 0.7008552551269531 sec
Episode 5093, loss:10.4744, succeed, steps:96, total reward:10.2000, 0.3856315612792969 sec
Episode 5094, loss:9.3365, succeed, steps:100, total reward:14.2000, 0.3898561000823

Episode 5174, loss:9.5583, succeed, steps:96, total reward:16.4000, 0.4334259033203125 sec
Episode 5175, loss:11.8537, succeed, steps:100, total reward:11.1000, 0.39467859268188477 sec
Episode 5176, loss:3.8777, succeed, steps:117, total reward:12.6000, 0.5555078983306885 sec
Episode 5177, loss:7.6094, succeed, steps:114, total reward:12.7000, 0.4520087242126465 sec
Episode 5178, loss:6.7147, succeed, steps:69, total reward:4.9000, 0.2588925361633301 sec
Episode 5179, loss:3.3816, succeed, steps:114, total reward:9.6000, 0.5598196983337402 sec
Episode 5180, loss:2.5981, succeed, steps:74, total reward:6.8000, 0.30129408836364746 sec
Episode 5181, loss:10.7123, succeed, steps:103, total reward:17.2000, 0.4429507255554199 sec
Episode 5182, loss:13.4603, succeed, steps:97, total reward:11.2000, 0.38790225982666016 sec
Episode 5183, loss:9.6812, succeed, steps:116, total reward:11.6000, 0.45858049392700195 sec
Episode 5184, loss:5.6825, succeed, steps:152, total reward:13.5000, 0.695780992

Episode 5264, loss:8.1930, fail, steps:161, total reward:11.0000, 0.6495471000671387 sec
Episode 5265, loss:6.7233, succeed, steps:126, total reward:15.4000, 0.6175286769866943 sec
Episode 5266, loss:6.3306, succeed, steps:117, total reward:12.6000, 0.49760961532592773 sec
Episode 5267, loss:3.5794, succeed, steps:90, total reward:7.3000, 0.38260841369628906 sec
Episode 5268, loss:7.8071, succeed, steps:106, total reward:14.0000, 0.4250502586364746 sec
Episode 5269, loss:5.2693, succeed, steps:112, total reward:7.6000, 0.5672156810760498 sec
Episode 5270, loss:6.4359, succeed, steps:146, total reward:13.4000, 0.6268408298492432 sec
Episode 5271, loss:10.5099, succeed, steps:78, total reward:10.8000, 0.3165912628173828 sec
Episode 5272, loss:5.3298, succeed, steps:77, total reward:9.8000, 0.3089182376861572 sec
Episode 5273, loss:4.9125, fail, steps:161, total reward:12.2000, 0.6515305042266846 sec
Episode 5274, loss:4.9262, fail, steps:161, total reward:10.2000, 0.6975252628326416 sec


Episode 5354, loss:8.1893, succeed, steps:90, total reward:13.5000, 0.36605215072631836 sec
Episode 5355, loss:1.7421, succeed, steps:82, total reward:8.6000, 0.4244959354400635 sec
Episode 5356, loss:2.9660, succeed, steps:69, total reward:4.9000, 0.2772841453552246 sec
Episode 5357, loss:7.3042, fail, steps:160, total reward:5.0000, 0.7181403636932373 sec
Episode 5358, loss:2.9304, succeed, steps:125, total reward:11.3000, 0.5200562477111816 sec
Episode 5359, loss:1.7233, succeed, steps:120, total reward:9.2000, 0.4684293270111084 sec
Episode 5360, loss:7.9165, succeed, steps:134, total reward:11.0000, 0.5488896369934082 sec
Episode 5361, loss:-0.0037, succeed, steps:140, total reward:7.7000, 0.6574440002441406 sec
Episode 5362, loss:9.5392, succeed, steps:76, total reward:11.9000, 0.358112096786499 sec
Episode 5363, loss:9.7913, succeed, steps:130, total reward:13.2000, 0.6192824840545654 sec
Episode 5364, loss:5.6832, succeed, steps:154, total reward:9.3000, 0.6192560195922852 sec


Episode 5444, loss:3.3715, fail, steps:161, total reward:12.2000, 0.6598148345947266 sec
Episode 5445, loss:0.0094, fail, steps:160, total reward:7.8000, 0.6740052700042725 sec
Episode 5446, loss:0.8846, fail, steps:160, total reward:7.1000, 0.7119941711425781 sec
Episode 5447, loss:2.3331, succeed, steps:55, total reward:6.4000, 0.2277991771697998 sec
Episode 5448, loss:6.1967, succeed, steps:96, total reward:10.2000, 0.4549582004547119 sec
Episode 5449, loss:0.4345, succeed, steps:80, total reward:6.5000, 0.31665968894958496 sec
Episode 5450, loss:8.6650, succeed, steps:141, total reward:9.8000, 0.580888032913208 sec
Episode 5451, loss:8.0360, succeed, steps:105, total reward:9.9000, 0.4958367347717285 sec
Episode 5452, loss:5.4267, fail, steps:161, total reward:10.2000, 0.665313720703125 sec
Episode 5453, loss:7.2948, succeed, steps:63, total reward:8.2000, 0.3106510639190674 sec
Episode 5454, loss:6.0658, succeed, steps:78, total reward:7.7000, 0.30647897720336914 sec
Episode 5455,

Episode 5534, loss:8.7238, succeed, steps:117, total reward:15.7000, 0.457904577255249 sec
Episode 5535, loss:8.3522, fail, steps:161, total reward:12.0000, 0.736534833908081 sec
Episode 5536, loss:5.4047, succeed, steps:156, total reward:14.4000, 0.6230218410491943 sec
Episode 5537, loss:5.2682, succeed, steps:130, total reward:10.1000, 0.6096475124359131 sec
Episode 5538, loss:0.9872, succeed, steps:36, total reward:6.0000, 0.14976024627685547 sec
Episode 5539, loss:9.9007, succeed, steps:107, total reward:11.7000, 0.44964075088500977 sec
Episode 5540, loss:4.3576, succeed, steps:102, total reward:13.1000, 0.4020531177520752 sec
Episode 5541, loss:3.8543, succeed, steps:94, total reward:11.3000, 0.3759009838104248 sec
Episode 5542, loss:2.2004, succeed, steps:94, total reward:8.2000, 0.48703861236572266 sec
Episode 5543, loss:3.0411, succeed, steps:66, total reward:8.1000, 0.2656066417694092 sec
Episode 5544, loss:1.0311, succeed, steps:148, total reward:6.4000, 0.6605038642883301 se

Episode 5624, loss:5.4606, succeed, steps:153, total reward:8.1000, 0.6003248691558838 sec
Episode 5625, loss:4.8105, succeed, steps:99, total reward:13.2000, 0.47226405143737793 sec
Episode 5626, loss:6.7513, succeed, steps:94, total reward:11.3000, 0.37245678901672363 sec
Episode 5627, loss:2.0963, succeed, steps:42, total reward:2.7000, 0.1684410572052002 sec
Episode 5628, loss:3.6011, succeed, steps:59, total reward:10.4000, 0.2763824462890625 sec
Episode 5629, loss:14.2246, succeed, steps:120, total reward:15.6000, 0.4709045886993408 sec
Episode 5630, loss:4.4254, succeed, steps:119, total reward:11.5000, 0.4668300151824951 sec
Episode 5631, loss:9.2800, succeed, steps:117, total reward:15.7000, 0.5114338397979736 sec
Episode 5632, loss:13.5703, succeed, steps:148, total reward:15.7000, 0.6799492835998535 sec
Episode 5633, loss:0.4729, succeed, steps:36, total reward:2.9000, 0.1533212661743164 sec
Episode 5634, loss:5.3147, succeed, steps:132, total reward:15.2000, 0.5737864971160

Episode 5714, loss:4.6900, succeed, steps:118, total reward:13.6000, 0.46946120262145996 sec
Episode 5715, loss:7.6650, fail, steps:162, total reward:15.3000, 0.7724297046661377 sec
Episode 5716, loss:3.8545, succeed, steps:93, total reward:10.2000, 0.3772273063659668 sec
Episode 5717, loss:0.7375, fail, steps:160, total reward:6.1000, 0.7186934947967529 sec
Episode 5718, loss:5.7397, succeed, steps:46, total reward:6.7000, 0.19098472595214844 sec
Episode 5719, loss:2.1458, succeed, steps:134, total reward:10.8000, 0.5987339019775391 sec
Episode 5720, loss:8.5685, succeed, steps:110, total reward:8.7000, 0.44045019149780273 sec
Episode 5721, loss:1.9102, succeed, steps:50, total reward:4.5000, 0.2429506778717041 sec
Episode 5722, loss:4.9225, succeed, steps:81, total reward:7.6000, 0.3157079219818115 sec
Episode 5723, loss:-1.1337, succeed, steps:91, total reward:5.2000, 0.37142467498779297 sec
Episode 5724, loss:4.3571, succeed, steps:89, total reward:9.2000, 0.47033214569091797 sec
E

Episode 5804, loss:5.0941, succeed, steps:126, total reward:15.4000, 0.49807238578796387 sec
Episode 5805, loss:14.2947, succeed, steps:141, total reward:14.9000, 0.6921370029449463 sec
Episode 5806, loss:7.6840, succeed, steps:89, total reward:18.7000, 0.4370119571685791 sec
Episode 5807, loss:8.7855, fail, steps:161, total reward:12.2000, 0.6528580188751221 sec
Episode 5808, loss:10.6881, succeed, steps:141, total reward:18.0000, 0.5706675052642822 sec
Episode 5809, loss:5.2155, succeed, steps:121, total reward:13.2000, 0.5963795185089111 sec
Episode 5810, loss:4.0660, succeed, steps:64, total reward:9.2000, 0.2584669589996338 sec
Episode 5811, loss:3.7451, succeed, steps:154, total reward:12.4000, 0.6960752010345459 sec
Episode 5812, loss:6.9659, succeed, steps:132, total reward:15.2000, 0.527475118637085 sec
Episode 5813, loss:11.2261, succeed, steps:103, total reward:14.1000, 0.502269983291626 sec
Episode 5814, loss:5.1072, succeed, steps:77, total reward:9.8000, 0.305282831192016

Episode 5894, loss:3.7865, succeed, steps:99, total reward:9.8000, 0.42885899543762207 sec
Episode 5895, loss:4.9544, succeed, steps:63, total reward:11.3000, 0.2503325939178467 sec
Episode 5896, loss:7.0165, succeed, steps:127, total reward:13.3000, 0.556225061416626 sec
Episode 5897, loss:2.8807, succeed, steps:50, total reward:7.6000, 0.2024548053741455 sec
Episode 5898, loss:8.8405, succeed, steps:160, total reward:15.3000, 0.6546065807342529 sec
Episode 5899, loss:11.4038, succeed, steps:143, total reward:16.9000, 0.5934803485870361 sec
Episode 5900, loss:8.3097, fail, steps:163, total reward:17.3000, 0.7278702259063721 sec
Episode 5901, loss:4.9047, succeed, steps:50, total reward:7.6000, 0.1909472942352295 sec
Episode 5902, loss:3.5776, succeed, steps:108, total reward:12.9000, 0.4426422119140625 sec
Episode 5903, loss:2.0601, fail, steps:161, total reward:11.2000, 0.7072782516479492 sec
Episode 5904, loss:4.7766, fail, steps:162, total reward:15.3000, 0.6899216175079346 sec
Epi

Episode 5985, loss:3.9656, succeed, steps:111, total reward:9.7000, 0.44090747833251953 sec
Episode 5986, loss:9.2101, succeed, steps:87, total reward:7.3000, 0.3571431636810303 sec
Episode 5987, loss:5.4526, succeed, steps:75, total reward:10.9000, 0.4069809913635254 sec
Episode 5988, loss:1.7971, succeed, steps:64, total reward:9.2000, 0.271956205368042 sec
Episode 5989, loss:7.2999, succeed, steps:58, total reward:6.2000, 0.22600293159484863 sec
Episode 5990, loss:7.7109, succeed, steps:65, total reward:10.2000, 0.2817692756652832 sec
Episode 5991, loss:2.2297, succeed, steps:60, total reward:11.4000, 0.2888948917388916 sec
Episode 5992, loss:4.5848, succeed, steps:80, total reward:9.7000, 0.3117973804473877 sec
Episode 5993, loss:5.7247, succeed, steps:138, total reward:8.8000, 0.6093714237213135 sec
Episode 5994, loss:5.9455, succeed, steps:69, total reward:11.1000, 0.27626633644104004 sec
Episode 5995, loss:5.5562, succeed, steps:50, total reward:10.7000, 0.1975712776184082 sec
E

Episode 6074, loss:2.6357, succeed, steps:53, total reward:7.5000, 0.2086174488067627 sec
Episode 6075, loss:7.9568, succeed, steps:154, total reward:9.3000, 0.6965453624725342 sec
Episode 6076, loss:4.3933, succeed, steps:51, total reward:8.6000, 0.14996576309204102 sec
Episode 6077, loss:5.6640, succeed, steps:95, total reward:9.2000, 0.3795900344848633 sec
Episode 6078, loss:5.7785, succeed, steps:136, total reward:16.1000, 0.623035192489624 sec
Episode 6079, loss:2.7155, succeed, steps:44, total reward:7.8000, 0.18311858177185059 sec
Episode 6080, loss:4.6231, succeed, steps:72, total reward:7.9000, 0.3156752586364746 sec
Episode 6081, loss:4.4821, succeed, steps:105, total reward:13.0000, 0.417938232421875 sec
Episode 6082, loss:8.6511, succeed, steps:91, total reward:14.5000, 0.3665904998779297 sec
Episode 6083, loss:6.0867, succeed, steps:138, total reward:11.9000, 0.6411488056182861 sec
Episode 6084, loss:5.0455, succeed, steps:105, total reward:13.0000, 0.425595760345459 sec
E

Episode 6165, loss:3.2438, succeed, steps:75, total reward:10.9000, 0.4070265293121338 sec
Episode 6166, loss:8.5477, succeed, steps:126, total reward:9.2000, 0.4948279857635498 sec
Episode 6167, loss:4.7180, succeed, steps:72, total reward:11.0000, 0.34206318855285645 sec
Episode 6168, loss:6.7898, succeed, steps:87, total reward:13.6000, 0.34099817276000977 sec
Episode 6169, loss:5.5746, succeed, steps:150, total reward:11.5000, 0.6420414447784424 sec
Episode 6170, loss:2.2484, succeed, steps:109, total reward:7.7000, 0.4382772445678711 sec
Episode 6171, loss:1.6048, fail, steps:162, total reward:14.3000, 0.7080345153808594 sec
Episode 6172, loss:-1.4614, fail, steps:159, total reward:2.0000, 0.6614646911621094 sec
Episode 6173, loss:9.7823, fail, steps:160, total reward:7.1000, 0.6755006313323975 sec
Episode 6174, loss:11.3379, fail, steps:162, total reward:14.3000, 0.65401291847229 sec
Episode 6175, loss:5.8496, succeed, steps:133, total reward:13.0000, 0.6444997787475586 sec
Episo

Episode 6255, loss:7.8710, succeed, steps:135, total reward:11.3000, 0.5417208671569824 sec
Episode 6256, loss:4.9923, succeed, steps:153, total reward:11.4000, 0.7434906959533691 sec
Episode 6257, loss:3.3942, succeed, steps:114, total reward:9.5000, 0.46582627296447754 sec
Episode 6258, loss:7.8030, succeed, steps:68, total reward:10.1000, 0.3193824291229248 sec
Episode 6259, loss:12.3899, succeed, steps:111, total reward:19.0000, 0.4339783191680908 sec
Episode 6260, loss:10.3232, fail, steps:162, total reward:13.2000, 0.7383716106414795 sec
Episode 6261, loss:4.8548, succeed, steps:108, total reward:9.8000, 0.44774651527404785 sec
Episode 6262, loss:5.4336, succeed, steps:98, total reward:15.3000, 0.4806382656097412 sec
Episode 6263, loss:6.4316, succeed, steps:119, total reward:11.5000, 0.47491908073425293 sec
Episode 6264, loss:5.1670, succeed, steps:49, total reward:6.6000, 0.2887849807739258 sec
Episode 6265, loss:9.7166, succeed, steps:107, total reward:11.9000, 0.4034307003021

Episode 6345, loss:2.1728, succeed, steps:71, total reward:6.9000, 0.3006620407104492 sec
Episode 6346, loss:2.1361, fail, steps:161, total reward:10.9000, 0.6894345283508301 sec
Episode 6347, loss:4.7477, succeed, steps:99, total reward:10.1000, 0.3998682498931885 sec
Episode 6348, loss:11.2858, succeed, steps:91, total reward:11.3000, 0.43114185333251953 sec
Episode 6349, loss:5.3910, succeed, steps:83, total reward:12.7000, 0.32879090309143066 sec
Episode 6350, loss:8.1486, fail, steps:161, total reward:10.2000, 0.6810815334320068 sec
Episode 6351, loss:7.6730, succeed, steps:121, total reward:13.5000, 0.52724289894104 sec
Episode 6352, loss:2.8131, succeed, steps:97, total reward:10.7000, 0.3867151737213135 sec
Episode 6353, loss:12.8488, succeed, steps:122, total reward:14.5000, 0.49830079078674316 sec
Episode 6354, loss:7.3553, succeed, steps:105, total reward:16.1000, 0.4948616027832031 sec
Episode 6355, loss:4.1500, succeed, steps:124, total reward:7.2000, 0.5365877151489258 se

Episode 6435, loss:2.5231, succeed, steps:126, total reward:12.3000, 0.549079418182373 sec
Episode 6436, loss:2.6706, succeed, steps:66, total reward:8.1000, 0.2602987289428711 sec
Episode 6437, loss:3.1654, succeed, steps:102, total reward:13.1000, 0.407717227935791 sec
Episode 6438, loss:7.1949, succeed, steps:139, total reward:12.9000, 0.632174015045166 sec
Episode 6439, loss:4.3835, succeed, steps:73, total reward:12.0000, 0.2950296401977539 sec
Episode 6440, loss:2.5894, succeed, steps:71, total reward:6.9000, 0.3949906826019287 sec
Episode 6441, loss:3.5013, succeed, steps:94, total reward:8.2000, 0.37047624588012695 sec
Episode 6442, loss:2.9695, succeed, steps:68, total reward:10.1000, 0.2779057025909424 sec
Episode 6443, loss:4.8825, succeed, steps:119, total reward:14.6000, 0.5901875495910645 sec
Episode 6444, loss:6.0374, fail, steps:161, total reward:10.2000, 0.6595745086669922 sec
Episode 6445, loss:5.1884, succeed, steps:88, total reward:11.3000, 0.41308116912841797 sec
E

Episode 6525, loss:4.8642, succeed, steps:142, total reward:12.1000, 0.5948944091796875 sec
Episode 6526, loss:3.6139, succeed, steps:78, total reward:10.7000, 0.31950950622558594 sec
Episode 6527, loss:6.4833, succeed, steps:125, total reward:11.3000, 0.6162765026092529 sec
Episode 6528, loss:6.0949, fail, steps:162, total reward:12.6000, 0.6660685539245605 sec
Episode 6529, loss:7.5035, succeed, steps:82, total reward:11.7000, 0.3349149227142334 sec
Episode 6530, loss:7.6309, succeed, steps:161, total reward:16.3000, 0.7609167098999023 sec
Episode 6531, loss:3.7431, succeed, steps:69, total reward:8.0000, 0.2792649269104004 sec
Episode 6532, loss:8.9520, succeed, steps:118, total reward:10.5000, 0.55393385887146 sec
Episode 6533, loss:14.4616, succeed, steps:93, total reward:13.3000, 0.421245813369751 sec
Episode 6534, loss:1.7878, fail, steps:161, total reward:12.2000, 0.7089095115661621 sec
Episode 6535, loss:7.5388, succeed, steps:148, total reward:12.6000, 0.6520323753356934 sec


Episode 6615, loss:5.7303, succeed, steps:115, total reward:7.5000, 0.5563147068023682 sec
Episode 6616, loss:5.1943, succeed, steps:94, total reward:11.3000, 0.37450623512268066 sec
Episode 6617, loss:5.5184, succeed, steps:93, total reward:16.5000, 0.4501945972442627 sec
Episode 6618, loss:2.4448, succeed, steps:70, total reward:9.0000, 0.2850203514099121 sec
Episode 6619, loss:8.4314, succeed, steps:127, total reward:9.8000, 0.5288946628570557 sec
Episode 6620, loss:2.8168, succeed, steps:62, total reward:7.2000, 0.2672586441040039 sec
Episode 6621, loss:5.5292, succeed, steps:87, total reward:13.6000, 0.3747138977050781 sec
Episode 6622, loss:6.6743, succeed, steps:67, total reward:12.2000, 0.3001112937927246 sec
Episode 6623, loss:4.2149, succeed, steps:83, total reward:12.7000, 0.32924580574035645 sec
Episode 6624, loss:6.9001, succeed, steps:74, total reward:9.9000, 0.3003373146057129 sec
Episode 6625, loss:5.0950, succeed, steps:62, total reward:10.3000, 0.35608935356140137 sec

Episode 6705, loss:3.7712, succeed, steps:99, total reward:13.2000, 0.3888230323791504 sec
Episode 6706, loss:7.7165, succeed, steps:126, total reward:15.4000, 0.632591724395752 sec
Episode 6707, loss:5.4659, succeed, steps:124, total reward:13.4000, 0.5088636875152588 sec
Episode 6708, loss:3.8056, succeed, steps:137, total reward:14.0000, 0.6457870006561279 sec
Episode 6709, loss:3.4935, succeed, steps:44, total reward:4.7000, 0.17616558074951172 sec
Episode 6710, loss:3.7658, succeed, steps:70, total reward:9.0000, 0.29215312004089355 sec
Episode 6711, loss:1.0741, succeed, steps:47, total reward:7.7000, 0.1840822696685791 sec
Episode 6712, loss:6.2396, succeed, steps:107, total reward:18.0000, 0.46883511543273926 sec
Episode 6713, loss:2.3596, fail, steps:160, total reward:7.1000, 0.6723437309265137 sec
Episode 6714, loss:6.7411, succeed, steps:103, total reward:17.2000, 0.43248867988586426 sec
Episode 6715, loss:8.5505, succeed, steps:126, total reward:12.3000, 0.4587881565093994 

Episode 6795, loss:3.0865, succeed, steps:111, total reward:9.7000, 0.46598076820373535 sec
Episode 6796, loss:2.4331, succeed, steps:119, total reward:14.6000, 0.4712791442871094 sec
Episode 6797, loss:6.9632, succeed, steps:107, total reward:11.9000, 0.4687466621398926 sec
Episode 6798, loss:2.4199, succeed, steps:86, total reward:6.4000, 0.33632349967956543 sec
Episode 6799, loss:9.0617, succeed, steps:138, total reward:11.9000, 0.6382272243499756 sec
Episode 6800, loss:3.3756, succeed, steps:14, total reward:2.6000, 0.06448483467102051 sec
Episode 6801, loss:2.6780, succeed, steps:106, total reward:10.5000, 0.4927680492401123 sec
Episode 6802, loss:2.1534, succeed, steps:118, total reward:10.5000, 0.4660937786102295 sec
Episode 6803, loss:3.0194, succeed, steps:145, total reward:12.7000, 0.5944259166717529 sec
Episode 6804, loss:6.4962, succeed, steps:79, total reward:8.4000, 0.32376790046691895 sec
Episode 6805, loss:3.6622, succeed, steps:71, total reward:10.0000, 0.2858676910400

Episode 6885, loss:3.5782, succeed, steps:64, total reward:9.0000, 0.2576558589935303 sec
Episode 6886, loss:3.7069, succeed, steps:70, total reward:9.0000, 0.2781822681427002 sec
Episode 6887, loss:5.8200, succeed, steps:96, total reward:13.3000, 0.4466710090637207 sec
Episode 6888, loss:1.3299, fail, steps:161, total reward:9.1000, 0.6629528999328613 sec
Episode 6889, loss:3.5300, succeed, steps:90, total reward:6.8000, 0.36521339416503906 sec
Episode 6890, loss:2.7846, succeed, steps:88, total reward:11.5000, 0.3449208736419678 sec
Episode 6891, loss:2.9683, succeed, steps:66, total reward:11.2000, 0.31508398056030273 sec
Episode 6892, loss:7.4613, succeed, steps:129, total reward:9.1000, 0.5033986568450928 sec
Episode 6893, loss:2.2422, succeed, steps:110, total reward:8.7000, 0.5181245803833008 sec
Episode 6894, loss:2.2938, succeed, steps:87, total reward:10.4000, 0.343808650970459 sec
Episode 6895, loss:2.2949, succeed, steps:83, total reward:9.6000, 0.3824794292449951 sec
Episo

Episode 6976, loss:1.0275, fail, steps:161, total reward:9.1000, 0.6663944721221924 sec
Episode 6977, loss:6.7914, succeed, steps:113, total reward:7.9000, 0.5085532665252686 sec
Episode 6978, loss:8.6599, fail, steps:161, total reward:10.2000, 0.6458079814910889 sec
Episode 6979, loss:1.4371, succeed, steps:54, total reward:5.4000, 0.2599318027496338 sec
Episode 6980, loss:5.5958, succeed, steps:93, total reward:10.3000, 0.36409711837768555 sec
Episode 6981, loss:6.0036, succeed, steps:110, total reward:11.8000, 0.4217255115509033 sec
Episode 6982, loss:3.0303, succeed, steps:95, total reward:5.9000, 0.4563751220703125 sec
Episode 6983, loss:8.3529, succeed, steps:120, total reward:12.5000, 0.5037240982055664 sec
Episode 6984, loss:3.7512, succeed, steps:91, total reward:8.3000, 0.3632926940917969 sec
Episode 6985, loss:2.1243, succeed, steps:127, total reward:10.2000, 0.5475752353668213 sec
Episode 6986, loss:2.8947, succeed, steps:62, total reward:7.1000, 0.2523820400238037 sec
Epis

Episode 7065, loss:2.7617, succeed, steps:146, total reward:13.7000, 0.6448543071746826 sec
Episode 7066, loss:3.2837, succeed, steps:86, total reward:12.6000, 0.3446228504180908 sec
Episode 7067, loss:6.7492, succeed, steps:109, total reward:13.6000, 0.4459257125854492 sec
Episode 7068, loss:5.1639, succeed, steps:118, total reward:16.7000, 0.555323600769043 sec
Episode 7069, loss:1.5936, succeed, steps:107, total reward:8.8000, 0.45718884468078613 sec
Episode 7070, loss:6.8315, fail, steps:163, total reward:19.4000, 0.6161987781524658 sec
Episode 7071, loss:4.4032, succeed, steps:134, total reward:14.1000, 0.5369069576263428 sec
Episode 7072, loss:3.0070, succeed, steps:102, total reward:13.1000, 0.5708613395690918 sec
Episode 7073, loss:5.2113, succeed, steps:86, total reward:9.5000, 0.34223031997680664 sec
Episode 7074, loss:2.5537, succeed, steps:131, total reward:8.0000, 0.5638556480407715 sec
Episode 7075, loss:3.0740, succeed, steps:107, total reward:11.9000, 0.4249258041381836

Episode 7155, loss:1.5385, succeed, steps:75, total reward:7.8000, 0.4326333999633789 sec
Episode 7156, loss:2.6123, succeed, steps:137, total reward:10.9000, 0.5369219779968262 sec
Episode 7157, loss:6.3076, succeed, steps:107, total reward:11.9000, 0.4249727725982666 sec
Episode 7158, loss:5.9882, succeed, steps:97, total reward:17.4000, 0.43218517303466797 sec
Episode 7159, loss:4.9522, succeed, steps:99, total reward:13.2000, 0.38968563079833984 sec
Episode 7160, loss:1.6489, succeed, steps:61, total reward:9.3000, 0.18935632705688477 sec
Episode 7161, loss:1.3690, succeed, steps:98, total reward:8.7000, 0.4792609214782715 sec
Episode 7162, loss:2.9509, fail, steps:161, total reward:9.4000, 0.6691362857818604 sec
Episode 7163, loss:2.6690, succeed, steps:112, total reward:13.8000, 0.5068602561950684 sec
Episode 7164, loss:5.2171, succeed, steps:68, total reward:10.1000, 0.27831602096557617 sec
Episode 7165, loss:4.1613, succeed, steps:82, total reward:14.8000, 0.3247864246368408 se

Episode 7245, loss:2.8524, succeed, steps:116, total reward:8.5000, 0.4700193405151367 sec
Episode 7246, loss:2.2547, succeed, steps:91, total reward:8.3000, 0.37697315216064453 sec
Episode 7247, loss:5.9172, succeed, steps:97, total reward:17.4000, 0.48632001876831055 sec
Episode 7248, loss:6.5127, fail, steps:161, total reward:12.2000, 0.6087844371795654 sec
Episode 7249, loss:1.9245, succeed, steps:101, total reward:9.0000, 0.43550920486450195 sec
Episode 7250, loss:6.8406, succeed, steps:103, total reward:14.1000, 0.41033339500427246 sec
Episode 7251, loss:8.6700, succeed, steps:81, total reward:10.5000, 0.4295051097869873 sec
Episode 7252, loss:10.2294, succeed, steps:139, total reward:16.0000, 0.5459170341491699 sec
Episode 7253, loss:9.2878, succeed, steps:122, total reward:14.5000, 0.549649715423584 sec
Episode 7254, loss:0.2204, succeed, steps:99, total reward:10.1000, 0.4433579444885254 sec
Episode 7255, loss:6.5192, succeed, steps:141, total reward:18.0000, 0.651702880859375

Episode 7336, loss:5.9389, succeed, steps:133, total reward:16.2000, 0.538339376449585 sec
Episode 7337, loss:3.9783, succeed, steps:115, total reward:13.7000, 0.49718189239501953 sec
Episode 7338, loss:5.2201, succeed, steps:114, total reward:18.9000, 0.518547773361206 sec
Episode 7339, loss:6.8064, succeed, steps:126, total reward:15.4000, 0.49944162368774414 sec
Episode 7340, loss:0.7354, succeed, steps:63, total reward:8.2000, 0.2302563190460205 sec
Episode 7341, loss:4.5297, succeed, steps:105, total reward:16.1000, 0.5061228275299072 sec
Episode 7342, loss:5.8195, succeed, steps:134, total reward:11.0000, 0.5769262313842773 sec
Episode 7343, loss:1.6245, succeed, steps:73, total reward:8.8000, 0.29306578636169434 sec
Episode 7344, loss:2.5728, succeed, steps:143, total reward:13.8000, 0.5718696117401123 sec
Episode 7345, loss:1.9601, succeed, steps:109, total reward:7.7000, 0.49254536628723145 sec
Episode 7346, loss:1.5594, succeed, steps:122, total reward:8.3000, 0.5236494541168

Episode 7426, loss:8.6695, succeed, steps:125, total reward:17.5000, 0.5469129085540771 sec
Episode 7427, loss:2.8187, succeed, steps:119, total reward:14.5000, 0.48229169845581055 sec
Episode 7428, loss:-45.7959, fail, steps:19, total reward:-7.5000, 0.13868188858032227 sec
Episode 7429, loss:10.9486, succeed, steps:110, total reward:18.0000, 0.4197559356689453 sec
Episode 7430, loss:8.4254, succeed, steps:99, total reward:9.8000, 0.43616199493408203 sec
Episode 7431, loss:5.2859, succeed, steps:44, total reward:4.7000, 0.2676975727081299 sec
Episode 7432, loss:6.2541, succeed, steps:70, total reward:8.7000, 0.2722766399383545 sec
Episode 7433, loss:4.7625, succeed, steps:71, total reward:9.8000, 0.2787971496582031 sec
Episode 7434, loss:3.6064, succeed, steps:97, total reward:11.0000, 0.38082432746887207 sec
Episode 7435, loss:1.9008, succeed, steps:84, total reward:7.5000, 0.3293445110321045 sec
Episode 7436, loss:5.2553, succeed, steps:69, total reward:8.0000, 0.3288912773132324 se

Episode 7516, loss:3.0170, succeed, steps:92, total reward:15.5000, 0.3596949577331543 sec
Episode 7517, loss:3.7210, succeed, steps:134, total reward:14.1000, 0.5482339859008789 sec
Episode 7518, loss:3.9550, succeed, steps:114, total reward:12.5000, 0.5365157127380371 sec
Episode 7519, loss:7.5742, succeed, steps:94, total reward:8.2000, 0.37940144538879395 sec
Episode 7520, loss:6.3369, succeed, steps:102, total reward:16.1000, 0.5157673358917236 sec
Episode 7521, loss:7.0785, succeed, steps:113, total reward:14.8000, 0.4502573013305664 sec
Episode 7522, loss:6.4747, succeed, steps:128, total reward:11.2000, 0.6200649738311768 sec
Episode 7523, loss:7.6517, succeed, steps:133, total reward:13.1000, 0.5455300807952881 sec
Episode 7524, loss:8.9439, succeed, steps:121, total reward:19.7000, 0.4980778694152832 sec
Episode 7525, loss:4.1977, succeed, steps:109, total reward:13.9000, 0.4886167049407959 sec
Episode 7526, loss:9.6973, succeed, steps:131, total reward:17.3000, 0.54079270362

Episode 7606, loss:1.7004, succeed, steps:75, total reward:7.8000, 0.3131587505340576 sec
Episode 7607, loss:4.7024, succeed, steps:87, total reward:16.7000, 0.3902163505554199 sec
Episode 7608, loss:6.1471, succeed, steps:128, total reward:14.1000, 0.5343263149261475 sec
Episode 7609, loss:1.3712, succeed, steps:42, total reward:5.8000, 0.24475336074829102 sec
Episode 7610, loss:-0.4648, fail, steps:160, total reward:8.1000, 0.6365907192230225 sec
Episode 7611, loss:4.4202, succeed, steps:67, total reward:9.1000, 0.29260706901550293 sec
Episode 7612, loss:4.8321, succeed, steps:129, total reward:12.1000, 0.5191559791564941 sec
Episode 7613, loss:6.8702, succeed, steps:117, total reward:15.7000, 0.45075345039367676 sec
Episode 7614, loss:4.5651, succeed, steps:83, total reward:9.6000, 0.410477876663208 sec
Episode 7615, loss:4.4022, succeed, steps:124, total reward:19.6000, 0.4993114471435547 sec
Episode 7616, loss:3.7642, succeed, steps:88, total reward:8.4000, 0.453716516494751 sec
E

Episode 7696, loss:5.4304, succeed, steps:136, total reward:9.9000, 0.6636271476745605 sec
Episode 7697, loss:6.3941, succeed, steps:95, total reward:15.2000, 0.38458967208862305 sec
Episode 7698, loss:1.2137, succeed, steps:83, total reward:6.3000, 0.3317677974700928 sec
Episode 7699, loss:1.0982, succeed, steps:121, total reward:13.5000, 0.5344269275665283 sec
Episode 7700, loss:7.7753, succeed, steps:101, total reward:12.1000, 0.403735876083374 sec
Episode 7701, loss:13.7475, succeed, steps:95, total reward:15.4000, 0.4966905117034912 sec
Episode 7702, loss:4.1989, succeed, steps:123, total reward:6.2000, 0.5010151863098145 sec
Episode 7703, loss:2.3468, succeed, steps:88, total reward:11.5000, 0.35367846488952637 sec
Episode 7704, loss:1.7430, succeed, steps:86, total reward:6.4000, 0.3485391139984131 sec
Episode 7705, loss:0.6409, succeed, steps:49, total reward:6.5000, 0.29485034942626953 sec
Episode 7706, loss:-1.7269, fail, steps:159, total reward:4.0000, 0.6473450660705566 sec

Episode 7786, loss:1.2061, succeed, steps:123, total reward:8.9000, 0.4962153434753418 sec
Episode 7787, loss:7.1770, fail, steps:162, total reward:14.3000, 0.7536351680755615 sec
Episode 7788, loss:5.1592, succeed, steps:74, total reward:13.0000, 0.29964232444763184 sec
Episode 7789, loss:1.3520, fail, steps:161, total reward:10.2000, 0.7241291999816895 sec
Episode 7790, loss:2.8950, succeed, steps:43, total reward:6.8000, 0.1830158233642578 sec
Episode 7791, loss:3.7221, succeed, steps:109, total reward:13.9000, 0.4345831871032715 sec
Episode 7792, loss:6.5606, succeed, steps:78, total reward:13.9000, 0.3434174060821533 sec
Episode 7793, loss:0.7215, succeed, steps:79, total reward:5.6000, 0.3103158473968506 sec
Episode 7794, loss:2.5591, succeed, steps:76, total reward:8.6000, 0.308765172958374 sec
Episode 7795, loss:3.0659, fail, steps:160, total reward:8.1000, 0.78926682472229 sec
Episode 7796, loss:0.5953, succeed, steps:52, total reward:6.4000, 0.21867680549621582 sec
Episode 77

Episode 7877, loss:4.0342, fail, steps:162, total reward:15.3000, 0.6396484375 sec
Episode 7878, loss:3.4674, succeed, steps:83, total reward:9.6000, 0.46856164932250977 sec
Episode 7879, loss:1.1098, succeed, steps:80, total reward:9.7000, 0.31531596183776855 sec
Episode 7880, loss:-49.9120, fail, steps:1, total reward:-10.0000, 0.009929180145263672 sec
Episode 7881, loss:4.1119, succeed, steps:114, total reward:9.4000, 0.5108094215393066 sec
Episode 7882, loss:8.3522, succeed, steps:108, total reward:16.0000, 0.42676830291748047 sec
Episode 7883, loss:2.8632, fail, steps:162, total reward:13.2000, 0.7270240783691406 sec
Episode 7884, loss:1.6322, fail, steps:160, total reward:7.1000, 0.6322944164276123 sec
Episode 7885, loss:6.9654, succeed, steps:100, total reward:8.0000, 0.5291950702667236 sec
Episode 7886, loss:1.8179, succeed, steps:63, total reward:8.2000, 0.2512795925140381 sec
Episode 7887, loss:5.0115, succeed, steps:70, total reward:5.9000, 0.27467870712280273 sec
Episode 78

Episode 7967, loss:3.5860, succeed, steps:106, total reward:20.2000, 0.4151184558868408 sec
Episode 7968, loss:2.9402, succeed, steps:103, total reward:17.2000, 0.40274691581726074 sec
Episode 7969, loss:2.3521, succeed, steps:88, total reward:11.5000, 0.3502800464630127 sec
Episode 7970, loss:3.4395, succeed, steps:85, total reward:11.6000, 0.44550251960754395 sec
Episode 7971, loss:5.6882, succeed, steps:96, total reward:13.3000, 0.378187894821167 sec
Episode 7972, loss:5.5203, succeed, steps:120, total reward:12.5000, 0.4527895450592041 sec
Episode 7973, loss:2.6795, succeed, steps:66, total reward:8.1000, 0.33060145378112793 sec
Episode 7974, loss:3.2192, succeed, steps:113, total reward:11.7000, 0.4474012851715088 sec
Episode 7975, loss:6.6518, fail, steps:161, total reward:12.2000, 0.6663715839385986 sec
Episode 7976, loss:7.4129, succeed, steps:109, total reward:13.6000, 0.43959808349609375 sec
Episode 7977, loss:2.4786, succeed, steps:98, total reward:12.2000, 0.376664161682128

Episode 8056, loss:3.9722, succeed, steps:73, total reward:5.8000, 0.2862570285797119 sec
Episode 8057, loss:2.9703, succeed, steps:100, total reward:4.3000, 0.39133143424987793 sec
Episode 8058, loss:3.4101, succeed, steps:128, total reward:17.4000, 0.5977320671081543 sec
Episode 8059, loss:3.3639, succeed, steps:116, total reward:11.6000, 0.4661595821380615 sec
Episode 8060, loss:2.5870, succeed, steps:78, total reward:10.8000, 0.3980741500854492 sec
Episode 8061, loss:5.9835, succeed, steps:122, total reward:17.6000, 0.4703805446624756 sec
Episode 8062, loss:6.4566, succeed, steps:110, total reward:14.9000, 0.4399552345275879 sec
Episode 8063, loss:1.1821, succeed, steps:74, total reward:9.9000, 0.3845791816711426 sec
Episode 8064, loss:4.6306, succeed, steps:151, total reward:18.3000, 0.6228840351104736 sec
Episode 8065, loss:3.9157, succeed, steps:78, total reward:13.9000, 0.3183622360229492 sec
Episode 8066, loss:3.1769, fail, steps:161, total reward:10.2000, 0.7247865200042725 s

Episode 8146, loss:5.0041, succeed, steps:98, total reward:8.8000, 0.3959159851074219 sec
Episode 8147, loss:1.7206, fail, steps:160, total reward:8.1000, 0.7340798377990723 sec
Episode 8148, loss:3.4319, succeed, steps:98, total reward:9.1000, 0.45253896713256836 sec
Episode 8149, loss:2.2592, succeed, steps:141, total reward:11.8000, 0.5652050971984863 sec
Episode 8150, loss:6.8196, succeed, steps:142, total reward:19.0000, 0.6837852001190186 sec
Episode 8151, loss:5.8663, succeed, steps:135, total reward:12.0000, 0.5919194221496582 sec
Episode 8152, loss:6.8231, succeed, steps:80, total reward:9.7000, 0.3264286518096924 sec
Episode 8153, loss:8.2137, succeed, steps:125, total reward:14.0000, 0.5741095542907715 sec
Episode 8154, loss:4.2142, fail, steps:161, total reward:10.8000, 0.6833782196044922 sec
Episode 8155, loss:1.8608, succeed, steps:50, total reward:7.6000, 0.15326428413391113 sec
Episode 8156, loss:5.0746, succeed, steps:123, total reward:9.3000, 0.48154735565185547 sec
E

Episode 8236, loss:3.4290, succeed, steps:124, total reward:13.4000, 0.5009849071502686 sec
Episode 8237, loss:1.6337, succeed, steps:119, total reward:8.4000, 0.5335259437561035 sec
Episode 8238, loss:9.1873, succeed, steps:99, total reward:13.2000, 0.40270543098449707 sec
Episode 8239, loss:1.1228, succeed, steps:58, total reward:6.3000, 0.37306809425354004 sec
Episode 8240, loss:1.3889, succeed, steps:58, total reward:3.2000, 0.2289896011352539 sec
Episode 8241, loss:2.3300, succeed, steps:98, total reward:12.2000, 0.38280224800109863 sec
Episode 8242, loss:6.9184, succeed, steps:86, total reward:12.6000, 0.43532228469848633 sec
Episode 8243, loss:4.1508, succeed, steps:129, total reward:9.1000, 0.5076932907104492 sec
Episode 8244, loss:9.1592, succeed, steps:85, total reward:11.6000, 0.4848194122314453 sec
Episode 8245, loss:3.6099, fail, steps:161, total reward:10.2000, 0.666654109954834 sec
Episode 8246, loss:3.5527, succeed, steps:82, total reward:8.6000, 0.3756992816925049 sec


Episode 8327, loss:-0.4014, fail, steps:160, total reward:5.0000, 0.6692318916320801 sec
Episode 8328, loss:3.7979, fail, steps:161, total reward:9.1000, 0.7110142707824707 sec
Episode 8329, loss:4.0349, succeed, steps:137, total reward:10.9000, 0.6284136772155762 sec
Episode 8330, loss:7.8122, succeed, steps:140, total reward:13.9000, 0.5599703788757324 sec
Episode 8331, loss:1.2141, succeed, steps:51, total reward:5.2000, 0.21297240257263184 sec
Episode 8332, loss:0.3611, fail, steps:161, total reward:9.1000, 0.7564656734466553 sec
Episode 8333, loss:6.9525, succeed, steps:125, total reward:11.3000, 0.5444178581237793 sec
Episode 8334, loss:1.0500, fail, steps:160, total reward:5.0000, 0.5803890228271484 sec
Episode 8335, loss:-3.2524, fail, steps:158, total reward:-0.1000, 0.6814794540405273 sec
Episode 8336, loss:5.6311, succeed, steps:148, total reward:6.4000, 0.6769313812255859 sec
Episode 8337, loss:-1.0160, fail, steps:160, total reward:8.1000, 0.6927971839904785 sec
Episode 83

Episode 8418, loss:5.7064, succeed, steps:77, total reward:12.9000, 0.3556830883026123 sec
Episode 8419, loss:5.4908, succeed, steps:95, total reward:9.2000, 0.38199949264526367 sec
Episode 8420, loss:4.4451, succeed, steps:136, total reward:19.2000, 0.6257448196411133 sec
Episode 8421, loss:6.4025, succeed, steps:102, total reward:10.0000, 0.4208519458770752 sec
Episode 8422, loss:1.6357, succeed, steps:73, total reward:5.8000, 0.3365030288696289 sec
Episode 8423, loss:2.0956, succeed, steps:50, total reward:7.6000, 0.19765496253967285 sec
Episode 8424, loss:15.8919, succeed, steps:118, total reward:13.6000, 0.46672701835632324 sec
Episode 8425, loss:9.9653, succeed, steps:118, total reward:16.7000, 0.5445904731750488 sec
Episode 8426, loss:2.4505, succeed, steps:111, total reward:15.9000, 0.433239221572876 sec
Episode 8427, loss:5.9438, succeed, steps:97, total reward:14.1000, 0.4945857524871826 sec
Episode 8428, loss:7.7158, succeed, steps:113, total reward:8.6000, 0.446246385574340

Episode 8508, loss:2.3748, succeed, steps:52, total reward:6.5000, 0.34282684326171875 sec
Episode 8509, loss:0.4442, fail, steps:161, total reward:12.2000, 0.6302180290222168 sec
Episode 8510, loss:-0.1390, fail, steps:160, total reward:8.1000, 0.7317991256713867 sec
Episode 8511, loss:4.8111, succeed, steps:81, total reward:10.7000, 0.32948875427246094 sec
Episode 8512, loss:2.2810, fail, steps:161, total reward:12.2000, 0.7252035140991211 sec
Episode 8513, loss:3.0211, succeed, steps:81, total reward:7.4000, 0.33220458030700684 sec
Episode 8514, loss:10.0219, succeed, steps:70, total reward:8.7000, 0.2666025161743164 sec
Episode 8515, loss:5.4542, succeed, steps:107, total reward:11.9000, 0.5289113521575928 sec
Episode 8516, loss:2.9419, succeed, steps:105, total reward:13.0000, 0.36205077171325684 sec
Episode 8517, loss:3.8575, succeed, steps:136, total reward:12.8000, 0.6414511203765869 sec
Episode 8518, loss:5.3059, succeed, steps:73, total reward:8.9000, 0.2949378490447998 sec
E

Episode 8598, loss:4.9351, succeed, steps:106, total reward:17.1000, 0.5456860065460205 sec
Episode 8599, loss:1.5430, fail, steps:161, total reward:11.2000, 0.6795220375061035 sec
Episode 8600, loss:0.6727, succeed, steps:69, total reward:8.0000, 0.2844710350036621 sec
Episode 8601, loss:10.4681, succeed, steps:108, total reward:16.0000, 0.42868781089782715 sec
Episode 8602, loss:0.6332, succeed, steps:100, total reward:11.1000, 0.4035036563873291 sec
Episode 8603, loss:6.2644, succeed, steps:122, total reward:10.9000, 0.5948188304901123 sec
Episode 8604, loss:0.6910, succeed, steps:79, total reward:11.8000, 0.31923866271972656 sec
Episode 8605, loss:1.7501, succeed, steps:56, total reward:4.3000, 0.34166383743286133 sec
Episode 8606, loss:1.9789, succeed, steps:92, total reward:12.4000, 0.3585987091064453 sec
Episode 8607, loss:4.8872, succeed, steps:124, total reward:16.5000, 0.5269951820373535 sec
Episode 8608, loss:8.0102, succeed, steps:96, total reward:16.4000, 0.421134948730468

Episode 8688, loss:5.7397, succeed, steps:122, total reward:14.5000, 0.4932560920715332 sec
Episode 8689, loss:4.6921, fail, steps:162, total reward:13.2000, 0.6958098411560059 sec
Episode 8690, loss:2.9953, fail, steps:160, total reward:7.1000, 0.6657228469848633 sec
Episode 8691, loss:-1.2778, fail, steps:160, total reward:6.1000, 0.6438846588134766 sec
Episode 8692, loss:4.3060, succeed, steps:128, total reward:8.1000, 0.6787688732147217 sec
Episode 8693, loss:1.8741, fail, steps:161, total reward:10.2000, 0.6076245307922363 sec
Episode 8694, loss:2.0341, succeed, steps:156, total reward:11.3000, 0.6686279773712158 sec
Episode 8695, loss:5.5033, succeed, steps:106, total reward:10.9000, 0.44600367546081543 sec
Episode 8696, loss:0.3677, succeed, steps:134, total reward:11.0000, 0.5510046482086182 sec
Episode 8697, loss:3.3827, succeed, steps:86, total reward:6.4000, 0.39698100090026855 sec
Episode 8698, loss:0.4869, succeed, steps:113, total reward:8.6000, 0.45073914527893066 sec
Ep

Episode 8779, loss:3.7620, succeed, steps:119, total reward:11.3000, 0.5391368865966797 sec
Episode 8780, loss:8.5518, succeed, steps:132, total reward:15.2000, 0.5216882228851318 sec
Episode 8781, loss:9.4527, succeed, steps:136, total reward:16.1000, 0.6233532428741455 sec
Episode 8782, loss:5.2945, succeed, steps:93, total reward:10.3000, 0.37723541259765625 sec
Episode 8783, loss:4.8963, succeed, steps:75, total reward:10.9000, 0.3006157875061035 sec
Episode 8784, loss:6.4718, succeed, steps:129, total reward:15.3000, 0.5326282978057861 sec
Episode 8785, loss:13.3679, succeed, steps:132, total reward:18.1000, 0.47149157524108887 sec
Episode 8786, loss:4.5332, succeed, steps:72, total reward:7.9000, 0.3918788433074951 sec
Episode 8787, loss:2.2689, succeed, steps:153, total reward:11.2000, 0.6037752628326416 sec
Episode 8788, loss:4.4415, succeed, steps:94, total reward:11.3000, 0.4944155216217041 sec
Episode 8789, loss:3.4329, fail, steps:161, total reward:10.2000, 0.68997240066528

Episode 8869, loss:7.3381, succeed, steps:75, total reward:10.9000, 0.2943437099456787 sec
Episode 8870, loss:2.2870, succeed, steps:98, total reward:12.2000, 0.47776079177856445 sec
Episode 8871, loss:3.8401, succeed, steps:89, total reward:12.5000, 0.3517918586730957 sec
Episode 8872, loss:3.7888, succeed, steps:117, total reward:12.6000, 0.5315930843353271 sec
Episode 8873, loss:2.3590, succeed, steps:65, total reward:10.1000, 0.27702760696411133 sec
Episode 8874, loss:4.1151, succeed, steps:115, total reward:10.6000, 0.45543432235717773 sec
Episode 8875, loss:1.8317, succeed, steps:105, total reward:13.0000, 0.4394402503967285 sec
Episode 8876, loss:2.8825, succeed, steps:98, total reward:15.3000, 0.4850587844848633 sec
Episode 8877, loss:1.1988, succeed, steps:140, total reward:13.9000, 0.5656614303588867 sec
Episode 8878, loss:2.5937, succeed, steps:81, total reward:10.7000, 0.4283115863800049 sec
Episode 8879, loss:2.6070, succeed, steps:116, total reward:8.0000, 0.4565479755401

Episode 8959, loss:1.8004, succeed, steps:110, total reward:11.8000, 0.5102205276489258 sec
Episode 8960, loss:1.0070, succeed, steps:49, total reward:6.5000, 0.19546151161193848 sec
Episode 8961, loss:5.3605, succeed, steps:125, total reward:14.4000, 0.5406696796417236 sec
Episode 8962, loss:9.8096, succeed, steps:141, total reward:14.7000, 0.5594680309295654 sec
Episode 8963, loss:7.1910, succeed, steps:150, total reward:14.6000, 0.7180368900299072 sec
Episode 8964, loss:4.7470, succeed, steps:83, total reward:9.6000, 0.3386201858520508 sec
Episode 8965, loss:1.4924, succeed, steps:155, total reward:12.8000, 0.6988861560821533 sec
Episode 8966, loss:1.5207, succeed, steps:155, total reward:10.3000, 0.6422655582427979 sec
Episode 8967, loss:5.4024, succeed, steps:91, total reward:11.4000, 0.4228529930114746 sec
Episode 8968, loss:0.1325, succeed, steps:48, total reward:5.6000, 0.19413065910339355 sec
Episode 8969, loss:5.0578, succeed, steps:111, total reward:9.7000, 0.445452928543090

Episode 9048, loss:1.1405, succeed, steps:81, total reward:10.7000, 0.32639479637145996 sec
Episode 9049, loss:2.5401, succeed, steps:120, total reward:18.7000, 0.5989134311676025 sec
Episode 9050, loss:4.0091, succeed, steps:111, total reward:15.9000, 0.4658942222595215 sec
Episode 9051, loss:1.6093, succeed, steps:94, total reward:8.1000, 0.3724558353424072 sec
Episode 9052, loss:2.5117, succeed, steps:144, total reward:11.7000, 0.6462235450744629 sec
Episode 9053, loss:3.4200, succeed, steps:147, total reward:14.7000, 0.6019906997680664 sec
Episode 9054, loss:3.5061, succeed, steps:105, total reward:12.8000, 0.4734375476837158 sec
Episode 9055, loss:4.0033, succeed, steps:102, total reward:16.2000, 0.40591955184936523 sec
Episode 9056, loss:1.3788, succeed, steps:78, total reward:7.7000, 0.3302304744720459 sec
Episode 9057, loss:4.8474, succeed, steps:90, total reward:13.5000, 0.36269640922546387 sec
Episode 9058, loss:3.7280, succeed, steps:90, total reward:10.4000, 0.4281151294708

Episode 9138, loss:5.8729, succeed, steps:157, total reward:15.4000, 0.6885175704956055 sec
Episode 9139, loss:5.9677, succeed, steps:147, total reward:14.7000, 0.6362197399139404 sec
Episode 9140, loss:3.3522, succeed, steps:53, total reward:7.5000, 0.22028779983520508 sec
Episode 9141, loss:5.8549, succeed, steps:125, total reward:14.4000, 0.5805914402008057 sec
Episode 9142, loss:1.9992, succeed, steps:136, total reward:9.9000, 0.5815105438232422 sec
Episode 9143, loss:0.7851, succeed, steps:104, total reward:15.1000, 0.4927999973297119 sec
Episode 9144, loss:6.0717, succeed, steps:120, total reward:18.7000, 0.5221841335296631 sec
Episode 9145, loss:2.1393, succeed, steps:160, total reward:12.2000, 0.700040340423584 sec
Episode 9146, loss:5.9346, succeed, steps:115, total reward:13.7000, 0.46078944206237793 sec
Episode 9147, loss:0.6660, succeed, steps:82, total reward:8.6000, 0.37110185623168945 sec
Episode 9148, loss:2.2554, succeed, steps:34, total reward:7.1000, 0.13899135589599

Episode 9228, loss:4.6527, succeed, steps:114, total reward:15.8000, 0.47743964195251465 sec
Episode 9229, loss:2.9388, succeed, steps:67, total reward:9.1000, 0.2957773208618164 sec
Episode 9230, loss:4.4963, succeed, steps:122, total reward:11.4000, 0.4769454002380371 sec
Episode 9231, loss:2.0126, succeed, steps:137, total reward:10.9000, 0.6561241149902344 sec
Episode 9232, loss:12.1256, succeed, steps:124, total reward:16.5000, 0.5035490989685059 sec
Episode 9233, loss:0.2591, succeed, steps:37, total reward:3.9000, 0.26813387870788574 sec
Episode 9234, loss:2.0333, fail, steps:161, total reward:11.2000, 0.6430704593658447 sec
Episode 9235, loss:1.8409, succeed, steps:58, total reward:6.3000, 0.2380218505859375 sec
Episode 9236, loss:2.1867, succeed, steps:146, total reward:10.6000, 0.5991039276123047 sec
Episode 9237, loss:2.7353, succeed, steps:88, total reward:11.5000, 0.4195876121520996 sec
Episode 9238, loss:2.3933, succeed, steps:50, total reward:7.6000, 0.19905710220336914 

Episode 9318, loss:2.1595, succeed, steps:61, total reward:6.2000, 0.2510986328125 sec
Episode 9319, loss:0.9263, succeed, steps:97, total reward:4.4000, 0.5288205146789551 sec
Episode 9320, loss:0.4534, succeed, steps:75, total reward:10.9000, 0.29657816886901855 sec
Episode 9321, loss:2.4495, succeed, steps:89, total reward:12.5000, 0.3837120532989502 sec
Episode 9322, loss:2.1340, succeed, steps:125, total reward:14.4000, 0.5012214183807373 sec
Episode 9323, loss:3.9676, succeed, steps:106, total reward:14.0000, 0.4688849449157715 sec
Episode 9324, loss:4.9070, succeed, steps:106, total reward:14.0000, 0.42247676849365234 sec
Episode 9325, loss:6.1923, succeed, steps:83, total reward:9.6000, 0.40710020065307617 sec
Episode 9326, loss:2.2742, succeed, steps:103, total reward:11.0000, 0.40413546562194824 sec
Episode 9327, loss:2.4866, succeed, steps:112, total reward:13.8000, 0.449800968170166 sec
Episode 9328, loss:1.5896, succeed, steps:81, total reward:10.7000, 0.369473934173584 se

Episode 9408, loss:1.8009, succeed, steps:103, total reward:11.0000, 0.5050923824310303 sec
Episode 9409, loss:5.3627, succeed, steps:90, total reward:13.5000, 0.38602542877197266 sec
Episode 9410, loss:3.8220, succeed, steps:105, total reward:16.1000, 0.4132974147796631 sec
Episode 9411, loss:4.9736, succeed, steps:119, total reward:17.7000, 0.4757564067840576 sec
Episode 9412, loss:0.4385, succeed, steps:95, total reward:12.1000, 0.449387788772583 sec
Episode 9413, loss:5.4538, succeed, steps:84, total reward:10.5000, 0.33211660385131836 sec
Episode 9414, loss:1.7396, succeed, steps:61, total reward:6.2000, 0.23984861373901367 sec
Episode 9415, loss:5.8252, fail, steps:162, total reward:14.3000, 0.7491762638092041 sec
Episode 9416, loss:1.8200, succeed, steps:69, total reward:11.1000, 0.2790944576263428 sec
Episode 9417, loss:2.4809, succeed, steps:112, total reward:16.9000, 0.5472710132598877 sec
Episode 9418, loss:2.3647, succeed, steps:109, total reward:17.0000, 0.4542183876037597

Episode 9498, loss:2.2627, succeed, steps:143, total reward:13.8000, 0.575040340423584 sec
Episode 9499, loss:5.3923, succeed, steps:93, total reward:10.0000, 0.39600110054016113 sec
Episode 9500, loss:1.7697, succeed, steps:97, total reward:11.2000, 0.39821529388427734 sec
Episode 9501, loss:5.0474, succeed, steps:115, total reward:16.8000, 0.5386340618133545 sec
Episode 9502, loss:1.9566, succeed, steps:144, total reward:14.6000, 0.6013760566711426 sec
Episode 9503, loss:1.1128, succeed, steps:90, total reward:7.3000, 0.42804479598999023 sec
Episode 9504, loss:0.7145, succeed, steps:91, total reward:8.3000, 0.3592250347137451 sec
Episode 9505, loss:0.6430, succeed, steps:94, total reward:8.0000, 0.4604330062866211 sec
Episode 9506, loss:3.3662, succeed, steps:128, total reward:17.4000, 0.5367856025695801 sec
Episode 9507, loss:3.9110, succeed, steps:118, total reward:13.6000, 0.5311062335968018 sec
Episode 9508, loss:4.2682, succeed, steps:117, total reward:15.6000, 0.476163387298584

Episode 9588, loss:3.1109, succeed, steps:80, total reward:2.9000, 0.31405019760131836 sec
Episode 9589, loss:2.6310, succeed, steps:109, total reward:13.9000, 0.5359752178192139 sec
Episode 9590, loss:6.7476, succeed, steps:155, total reward:16.5000, 0.611243724822998 sec
Episode 9591, loss:1.0875, succeed, steps:66, total reward:11.2000, 0.312694787979126 sec
Episode 9592, loss:2.6582, succeed, steps:48, total reward:5.5000, 0.2131364345550537 sec
Episode 9593, loss:6.1334, succeed, steps:81, total reward:10.7000, 0.3159301280975342 sec
Episode 9594, loss:-0.3002, succeed, steps:100, total reward:11.1000, 0.40778613090515137 sec
Episode 9595, loss:4.1777, succeed, steps:71, total reward:10.0000, 0.3868227005004883 sec
Episode 9596, loss:1.7683, succeed, steps:120, total reward:15.6000, 0.4715995788574219 sec
Episode 9597, loss:0.4071, succeed, steps:79, total reward:8.7000, 0.3862800598144531 sec
Episode 9598, loss:2.8318, succeed, steps:87, total reward:10.5000, 0.3427293300628662 s

Episode 9678, loss:0.8085, fail, steps:161, total reward:12.2000, 0.6085848808288574 sec
Episode 9679, loss:4.2997, succeed, steps:157, total reward:18.5000, 0.7223446369171143 sec
Episode 9680, loss:2.1132, succeed, steps:77, total reward:9.8000, 0.3176994323730469 sec
Episode 9681, loss:1.5542, succeed, steps:144, total reward:14.8000, 0.6519513130187988 sec
Episode 9682, loss:0.7483, succeed, steps:100, total reward:11.1000, 0.3992152214050293 sec
Episode 9683, loss:1.3617, succeed, steps:51, total reward:8.6000, 0.20517516136169434 sec
Episode 9684, loss:2.2352, succeed, steps:123, total reward:12.4000, 0.5724694728851318 sec
Episode 9685, loss:2.9457, succeed, steps:71, total reward:10.0000, 0.28293371200561523 sec
Episode 9686, loss:0.9562, succeed, steps:48, total reward:5.4000, 0.19022130966186523 sec
Episode 9687, loss:2.1153, succeed, steps:113, total reward:11.7000, 0.5145821571350098 sec
Episode 9688, loss:3.1846, succeed, steps:106, total reward:17.1000, 0.4189257621765136

Episode 9768, loss:2.7039, succeed, steps:97, total reward:14.3000, 0.4175539016723633 sec
Episode 9769, loss:-0.4313, succeed, steps:52, total reward:3.4000, 0.19895434379577637 sec
Episode 9770, loss:0.5079, succeed, steps:65, total reward:7.1000, 0.37351226806640625 sec
Episode 9771, loss:3.8141, succeed, steps:106, total reward:17.1000, 0.41327762603759766 sec
Episode 9772, loss:3.1239, succeed, steps:84, total reward:13.7000, 0.3098275661468506 sec
Episode 9773, loss:2.4544, succeed, steps:116, total reward:8.5000, 0.5202348232269287 sec
Episode 9774, loss:3.6166, succeed, steps:123, total reward:15.5000, 0.4984550476074219 sec
Episode 9775, loss:0.6444, succeed, steps:125, total reward:8.2000, 0.629554033279419 sec
Episode 9776, loss:-2.2178, fail, steps:160, total reward:8.1000, 0.6867868900299072 sec
Episode 9777, loss:1.4870, succeed, steps:122, total reward:14.5000, 0.498582124710083 sec
Episode 9778, loss:4.5742, fail, steps:162, total reward:14.9000, 0.6740090847015381 sec


Episode 9858, loss:0.2382, fail, steps:160, total reward:6.1000, 0.6796925067901611 sec
Episode 9859, loss:0.9614, succeed, steps:75, total reward:10.9000, 0.3115544319152832 sec
Episode 9860, loss:3.9883, succeed, steps:75, total reward:10.9000, 0.30532097816467285 sec
Episode 9861, loss:1.2526, succeed, steps:82, total reward:11.6000, 0.2770977020263672 sec
Episode 9862, loss:3.8423, succeed, steps:113, total reward:11.7000, 0.5237374305725098 sec
Episode 9863, loss:7.0311, succeed, steps:142, total reward:15.9000, 0.5932049751281738 sec
Episode 9864, loss:0.9022, succeed, steps:67, total reward:6.0000, 0.3460235595703125 sec
Episode 9865, loss:0.3895, succeed, steps:80, total reward:12.7000, 0.3134276866912842 sec
Episode 9866, loss:6.3087, succeed, steps:85, total reward:11.4000, 0.34314489364624023 sec
Episode 9867, loss:2.3132, succeed, steps:56, total reward:10.5000, 0.3316621780395508 sec
Episode 9868, loss:0.1290, succeed, steps:36, total reward:5.8000, 0.1427290439605713 sec


Episode 9948, loss:5.9472, succeed, steps:53, total reward:10.6000, 0.20981645584106445 sec
Episode 9949, loss:3.3397, succeed, steps:105, total reward:16.1000, 0.45885801315307617 sec
Episode 9950, loss:4.4197, succeed, steps:130, total reward:10.1000, 0.5286085605621338 sec
Episode 9951, loss:1.0027, succeed, steps:74, total reward:9.9000, 0.3045985698699951 sec
Episode 9952, loss:1.9812, succeed, steps:77, total reward:9.8000, 0.33222007751464844 sec
Episode 9953, loss:0.7013, succeed, steps:50, total reward:7.6000, 0.20596957206726074 sec
Episode 9954, loss:0.8396, succeed, steps:78, total reward:10.8000, 0.3406336307525635 sec
Episode 9955, loss:1.3768, succeed, steps:72, total reward:11.0000, 0.21986627578735352 sec
Episode 9956, loss:0.6703, fail, steps:162, total reward:13.6000, 0.6939282417297363 sec
Episode 9957, loss:2.4708, fail, steps:161, total reward:10.2000, 0.7308499813079834 sec
Episode 9958, loss:2.9335, fail, steps:162, total reward:13.2000, 0.6833827495574951 sec
E

Episode 10037, loss:2.6146, succeed, steps:113, total reward:17.9000, 0.5292973518371582 sec
Episode 10038, loss:2.0444, succeed, steps:140, total reward:17.0000, 0.5832879543304443 sec
Episode 10039, loss:6.0207, succeed, steps:148, total reward:21.9000, 0.5937530994415283 sec
Episode 10040, loss:2.3110, fail, steps:162, total reward:13.2000, 0.7274911403656006 sec
Episode 10041, loss:3.4106, succeed, steps:114, total reward:9.6000, 0.4639780521392822 sec
Episode 10042, loss:1.5590, succeed, steps:68, total reward:10.1000, 0.36461710929870605 sec
Episode 10043, loss:3.1925, succeed, steps:115, total reward:16.8000, 0.45238232612609863 sec
Episode 10044, loss:1.4237, succeed, steps:75, total reward:10.9000, 0.3619353771209717 sec
Episode 10045, loss:4.2010, succeed, steps:50, total reward:4.5000, 0.19765925407409668 sec
Episode 10046, loss:5.7248, succeed, steps:126, total reward:15.4000, 0.5015041828155518 sec
Episode 10047, loss:3.3888, succeed, steps:86, total reward:9.5000, 0.39240

Episode 10126, loss:1.2396, succeed, steps:58, total reward:9.4000, 0.2555413246154785 sec
Episode 10127, loss:1.1478, succeed, steps:83, total reward:12.7000, 0.37656283378601074 sec
Episode 10128, loss:2.0963, succeed, steps:99, total reward:13.2000, 0.41220951080322266 sec
Episode 10129, loss:6.8386, succeed, steps:123, total reward:15.4000, 0.4911637306213379 sec
Episode 10130, loss:8.7197, succeed, steps:124, total reward:19.6000, 0.5605137348175049 sec
Episode 10131, loss:2.2948, succeed, steps:103, total reward:14.1000, 0.4267134666442871 sec
Episode 10132, loss:2.5850, succeed, steps:139, total reward:12.9000, 0.6571981906890869 sec
Episode 10133, loss:2.1825, succeed, steps:84, total reward:13.7000, 0.33631110191345215 sec
Episode 10134, loss:1.8859, succeed, steps:96, total reward:13.3000, 0.4870162010192871 sec
Episode 10135, loss:0.3198, succeed, steps:67, total reward:12.2000, 0.26999473571777344 sec
Episode 10136, loss:3.6683, succeed, steps:82, total reward:11.7000, 0.37

Episode 10215, loss:2.6242, succeed, steps:84, total reward:13.7000, 0.33109211921691895 sec
Episode 10216, loss:4.7544, succeed, steps:86, total reward:15.7000, 0.3386232852935791 sec
Episode 10217, loss:1.5804, succeed, steps:69, total reward:4.9000, 0.36883997917175293 sec
Episode 10218, loss:0.8110, succeed, steps:93, total reward:10.3000, 0.37816572189331055 sec
Episode 10219, loss:3.5747, succeed, steps:129, total reward:18.4000, 0.5358858108520508 sec
Episode 10220, loss:2.0533, succeed, steps:76, total reward:11.9000, 0.3148341178894043 sec
Episode 10221, loss:0.4549, fail, steps:161, total reward:9.1000, 0.6397628784179688 sec
Episode 10222, loss:6.1259, succeed, steps:155, total reward:10.3000, 0.6686499118804932 sec
Episode 10223, loss:6.9493, fail, steps:161, total reward:12.2000, 0.6433358192443848 sec
Episode 10224, loss:2.8582, succeed, steps:99, total reward:16.3000, 0.4785909652709961 sec
Episode 10225, loss:3.3475, succeed, steps:62, total reward:7.2000, 0.25054121017

Episode 10304, loss:0.6753, succeed, steps:132, total reward:15.2000, 0.6115531921386719 sec
Episode 10305, loss:3.0479, succeed, steps:81, total reward:10.7000, 0.3235199451446533 sec
Episode 10306, loss:4.0509, fail, steps:162, total reward:13.2000, 0.7213475704193115 sec
Episode 10307, loss:7.0432, succeed, steps:102, total reward:16.1000, 0.411149263381958 sec
Episode 10308, loss:2.4385, succeed, steps:75, total reward:10.9000, 0.3809056282043457 sec
Episode 10309, loss:-1.0352, fail, steps:160, total reward:6.9000, 0.6297979354858398 sec
Episode 10310, loss:2.8872, succeed, steps:93, total reward:10.2000, 0.44553542137145996 sec
Episode 10311, loss:0.7492, succeed, steps:71, total reward:6.7000, 0.28299951553344727 sec
Episode 10312, loss:1.9426, succeed, steps:111, total reward:12.8000, 0.44936442375183105 sec
Episode 10313, loss:1.5578, fail, steps:161, total reward:11.2000, 0.7357845306396484 sec
Episode 10314, loss:7.4490, succeed, steps:117, total reward:15.7000, 0.5191266536

Episode 10393, loss:0.8775, succeed, steps:146, total reward:16.8000, 0.5756950378417969 sec
Episode 10394, loss:2.0070, succeed, steps:99, total reward:10.1000, 0.48575735092163086 sec
Episode 10395, loss:0.2323, succeed, steps:111, total reward:6.6000, 0.44837236404418945 sec
Episode 10396, loss:-0.1768, fail, steps:162, total reward:16.3000, 0.7155776023864746 sec
Episode 10397, loss:3.3225, succeed, steps:134, total reward:14.1000, 0.544447660446167 sec
Episode 10398, loss:1.0309, succeed, steps:100, total reward:7.7000, 0.4241335391998291 sec
Episode 10399, loss:7.7941, succeed, steps:71, total reward:10.0000, 0.2860696315765381 sec
Episode 10400, loss:4.3897, succeed, steps:75, total reward:14.0000, 0.3897988796234131 sec
Episode 10401, loss:10.0889, succeed, steps:157, total reward:15.4000, 0.6280274391174316 sec
Episode 10402, loss:4.3738, succeed, steps:82, total reward:14.8000, 0.3403658866882324 sec
Episode 10403, loss:2.6006, succeed, steps:72, total reward:7.9000, 0.284583

Episode 10482, loss:5.3593, succeed, steps:154, total reward:12.4000, 0.6637873649597168 sec
Episode 10483, loss:-3.9255, fail, steps:161, total reward:7.1000, 0.700636625289917 sec
Episode 10484, loss:1.4504, fail, steps:161, total reward:10.2000, 0.6485810279846191 sec
Episode 10485, loss:4.4272, fail, steps:160, total reward:8.1000, 0.7517940998077393 sec
Episode 10486, loss:3.7994, succeed, steps:94, total reward:11.3000, 0.38744473457336426 sec
Episode 10487, loss:2.9894, fail, steps:160, total reward:8.1000, 0.7469055652618408 sec
Episode 10488, loss:-0.3155, fail, steps:160, total reward:7.1000, 0.6391370296478271 sec
Episode 10489, loss:0.3647, fail, steps:161, total reward:10.2000, 0.706402063369751 sec
Episode 10490, loss:1.0856, fail, steps:160, total reward:8.1000, 0.671760082244873 sec
Episode 10491, loss:2.6901, succeed, steps:96, total reward:10.2000, 0.43753886222839355 sec
Episode 10492, loss:1.3130, succeed, steps:113, total reward:8.5000, 0.5203254222869873 sec
Episo

Episode 10572, loss:3.4655, succeed, steps:95, total reward:12.2000, 0.3858206272125244 sec
Episode 10573, loss:4.5348, succeed, steps:136, total reward:6.8000, 0.6219658851623535 sec
Episode 10574, loss:2.3049, succeed, steps:149, total reward:16.7000, 0.5923318862915039 sec
Episode 10575, loss:2.3937, succeed, steps:115, total reward:10.6000, 0.5627028942108154 sec
Episode 10576, loss:3.6888, succeed, steps:92, total reward:9.3000, 0.3684706687927246 sec
Episode 10577, loss:1.9340, fail, steps:161, total reward:10.2000, 0.6918036937713623 sec
Episode 10578, loss:4.1301, succeed, steps:92, total reward:9.3000, 0.3854362964630127 sec
Episode 10579, loss:2.8723, fail, steps:161, total reward:9.8000, 0.7377901077270508 sec
Episode 10580, loss:4.8165, succeed, steps:95, total reward:9.2000, 0.36071300506591797 sec
Episode 10581, loss:3.0296, succeed, steps:106, total reward:14.0000, 0.47953128814697266 sec
Episode 10582, loss:1.1401, succeed, steps:67, total reward:8.9000, 0.2671051025390

Episode 10662, loss:0.5069, succeed, steps:71, total reward:10.0000, 0.2809262275695801 sec
Episode 10663, loss:1.1171, succeed, steps:144, total reward:14.8000, 0.6451327800750732 sec
Episode 10664, loss:0.6132, succeed, steps:77, total reward:12.9000, 0.33852243423461914 sec
Episode 10665, loss:2.3987, succeed, steps:129, total reward:9.1000, 0.5542869567871094 sec
Episode 10666, loss:5.8558, succeed, steps:105, total reward:15.8000, 0.4399585723876953 sec
Episode 10667, loss:1.0219, succeed, steps:91, total reward:8.1000, 0.4524555206298828 sec
Episode 10668, loss:3.7827, succeed, steps:128, total reward:14.1000, 0.5322520732879639 sec
Episode 10669, loss:4.0206, succeed, steps:62, total reward:7.2000, 0.2547900676727295 sec
Episode 10670, loss:5.0173, succeed, steps:93, total reward:16.5000, 0.3676412105560303 sec
Episode 10671, loss:1.7345, succeed, steps:129, total reward:18.4000, 0.6084334850311279 sec
Episode 10672, loss:3.7556, succeed, steps:130, total reward:15.9000, 0.53131

Episode 10751, loss:3.1686, succeed, steps:68, total reward:10.1000, 0.3322722911834717 sec
Episode 10752, loss:1.6456, fail, steps:162, total reward:13.2000, 0.6472234725952148 sec
Episode 10753, loss:0.7193, succeed, steps:71, total reward:6.9000, 0.2890963554382324 sec
Episode 10754, loss:0.7016, succeed, steps:58, total reward:6.1000, 0.2937757968902588 sec
Episode 10755, loss:6.9611, succeed, steps:77, total reward:9.7000, 0.30124831199645996 sec
Episode 10756, loss:4.8713, fail, steps:162, total reward:16.3000, 0.7098755836486816 sec
Episode 10757, loss:0.5570, succeed, steps:92, total reward:9.3000, 0.368211030960083 sec
Episode 10758, loss:1.8327, succeed, steps:97, total reward:14.3000, 0.4462242126464844 sec
Episode 10759, loss:1.2824, succeed, steps:74, total reward:6.8000, 0.276641845703125 sec
Episode 10760, loss:1.4369, succeed, steps:119, total reward:11.3000, 0.4823338985443115 sec
Episode 10761, loss:8.5729, fail, steps:161, total reward:12.2000, 0.749352216720581 sec


Episode 10841, loss:1.0748, succeed, steps:146, total reward:10.6000, 0.6606249809265137 sec
Episode 10842, loss:0.0397, succeed, steps:156, total reward:8.2000, 0.6919941902160645 sec
Episode 10843, loss:0.4927, succeed, steps:136, total reward:9.9000, 0.594611406326294 sec
Episode 10844, loss:3.5156, fail, steps:161, total reward:12.2000, 0.7141604423522949 sec
Episode 10845, loss:1.3592, succeed, steps:122, total reward:8.3000, 0.49040699005126953 sec
Episode 10846, loss:0.1603, succeed, steps:65, total reward:10.2000, 0.26183032989501953 sec
Episode 10847, loss:0.8054, succeed, steps:159, total reward:14.3000, 0.7357368469238281 sec
Episode 10848, loss:4.1816, succeed, steps:113, total reward:11.7000, 0.4672667980194092 sec
Episode 10849, loss:-0.3542, fail, steps:161, total reward:9.1000, 0.7510473728179932 sec
Episode 10850, loss:2.0026, fail, steps:163, total reward:17.3000, 0.7670621871948242 sec
Episode 10851, loss:1.7401, fail, steps:159, total reward:0.8000, 0.66785168647766

Episode 10930, loss:1.3601, succeed, steps:101, total reward:15.2000, 0.44371604919433594 sec
Episode 10931, loss:3.3316, succeed, steps:93, total reward:10.3000, 0.37239861488342285 sec
Episode 10932, loss:1.1051, succeed, steps:102, total reward:10.0000, 0.40381908416748047 sec
Episode 10933, loss:2.8408, succeed, steps:92, total reward:12.4000, 0.44225406646728516 sec
Episode 10934, loss:1.4360, succeed, steps:83, total reward:9.6000, 0.32740163803100586 sec
Episode 10935, loss:3.8403, succeed, steps:71, total reward:10.0000, 0.29059791564941406 sec
Episode 10936, loss:0.9055, succeed, steps:99, total reward:3.9000, 0.49442553520202637 sec
Episode 10937, loss:0.3119, fail, steps:161, total reward:11.2000, 0.6883683204650879 sec
Episode 10938, loss:2.0018, succeed, steps:90, total reward:10.2000, 0.366131067276001 sec
Episode 10939, loss:8.6791, succeed, steps:114, total reward:12.7000, 0.48639798164367676 sec
Episode 10940, loss:1.0186, succeed, steps:74, total reward:6.8000, 0.3204

Episode 11018, loss:0.6646, succeed, steps:76, total reward:8.8000, 0.38397860527038574 sec
Episode 11019, loss:3.8526, succeed, steps:101, total reward:18.3000, 0.39691948890686035 sec
Episode 11020, loss:2.4555, succeed, steps:58, total reward:12.5000, 0.24550080299377441 sec
Episode 11021, loss:6.3996, succeed, steps:76, total reward:11.8000, 0.3440523147583008 sec
Episode 11022, loss:1.9988, succeed, steps:150, total reward:17.7000, 0.6099507808685303 sec
Episode 11023, loss:5.9716, succeed, steps:137, total reward:17.1000, 0.5656952857971191 sec
Episode 11024, loss:6.5456, succeed, steps:108, total reward:12.9000, 0.4347667694091797 sec
Episode 11025, loss:2.7670, succeed, steps:91, total reward:11.2000, 0.443927526473999 sec
Episode 11026, loss:8.1348, succeed, steps:110, total reward:14.9000, 0.44251465797424316 sec
Episode 11027, loss:4.7541, succeed, steps:86, total reward:12.6000, 0.4207932949066162 sec
Episode 11028, loss:0.7283, succeed, steps:89, total reward:9.4000, 0.351

Episode 11107, loss:1.0671, succeed, steps:92, total reward:9.3000, 0.3643612861633301 sec
Episode 11108, loss:3.2787, succeed, steps:71, total reward:10.0000, 0.356778621673584 sec
Episode 11109, loss:2.2635, succeed, steps:112, total reward:16.8000, 0.47042179107666016 sec
Episode 11110, loss:1.8829, succeed, steps:87, total reward:13.6000, 0.3494679927825928 sec
Episode 11111, loss:2.7422, succeed, steps:72, total reward:14.1000, 0.3492269515991211 sec
Episode 11112, loss:6.3178, succeed, steps:105, total reward:16.1000, 0.4746744632720947 sec
Episode 11113, loss:0.0623, succeed, steps:103, total reward:14.1000, 0.4164395332336426 sec
Episode 11114, loss:0.0123, succeed, steps:48, total reward:5.6000, 0.18641328811645508 sec
Episode 11115, loss:3.8974, succeed, steps:67, total reward:12.2000, 0.33380842208862305 sec
Episode 11116, loss:0.6008, succeed, steps:92, total reward:9.1000, 0.3593292236328125 sec
Episode 11117, loss:1.6846, succeed, steps:97, total reward:14.3000, 0.4580168

Episode 11196, loss:1.3909, succeed, steps:104, total reward:15.1000, 0.4177546501159668 sec
Episode 11197, loss:0.3050, succeed, steps:92, total reward:9.3000, 0.4090259075164795 sec
Episode 11198, loss:1.4643, succeed, steps:95, total reward:9.2000, 0.3724799156188965 sec
Episode 11199, loss:1.5613, succeed, steps:98, total reward:9.1000, 0.3681488037109375 sec
Episode 11200, loss:3.0810, succeed, steps:62, total reward:10.3000, 0.24624037742614746 sec
Episode 11201, loss:3.0846, succeed, steps:105, total reward:16.1000, 0.40874266624450684 sec
Episode 11202, loss:1.2164, succeed, steps:97, total reward:7.8000, 0.4293942451477051 sec
Episode 11203, loss:1.4513, succeed, steps:73, total reward:12.0000, 0.29247069358825684 sec
Episode 11204, loss:1.6282, succeed, steps:67, total reward:9.1000, 0.3612031936645508 sec
Episode 11205, loss:1.8823, succeed, steps:64, total reward:9.2000, 0.2503786087036133 sec
Episode 11206, loss:3.6918, succeed, steps:77, total reward:9.8000, 0.34424066543

Episode 11285, loss:10.8812, succeed, steps:161, total reward:16.3000, 0.7069628238677979 sec
Episode 11286, loss:0.4507, succeed, steps:94, total reward:8.2000, 0.431898832321167 sec
Episode 11287, loss:3.5768, fail, steps:161, total reward:11.2000, 0.6948468685150146 sec
Episode 11288, loss:0.8530, succeed, steps:49, total reward:6.6000, 0.20181632041931152 sec
Episode 11289, loss:2.1407, succeed, steps:108, total reward:9.8000, 0.47798657417297363 sec
Episode 11290, loss:1.0273, fail, steps:161, total reward:10.5000, 0.6866674423217773 sec
Episode 11291, loss:1.1193, succeed, steps:140, total reward:10.8000, 0.5682392120361328 sec
Episode 11292, loss:2.5102, succeed, steps:80, total reward:12.8000, 0.3270585536956787 sec
Episode 11293, loss:2.1067, succeed, steps:79, total reward:11.8000, 0.45820140838623047 sec
Episode 11294, loss:4.5685, succeed, steps:124, total reward:16.5000, 0.4882051944732666 sec
Episode 11295, loss:2.7948, succeed, steps:117, total reward:18.8000, 0.46330738

Episode 11374, loss:5.2725, succeed, steps:113, total reward:8.6000, 0.5117788314819336 sec
Episode 11375, loss:0.6696, succeed, steps:67, total reward:9.1000, 0.27103281021118164 sec
Episode 11376, loss:1.4870, succeed, steps:60, total reward:8.3000, 0.22378087043762207 sec
Episode 11377, loss:0.4390, fail, steps:161, total reward:12.2000, 0.7181251049041748 sec
Episode 11378, loss:3.1886, succeed, steps:137, total reward:13.9000, 0.6206891536712646 sec
Episode 11379, loss:2.7260, succeed, steps:97, total reward:14.3000, 0.4028310775756836 sec
Episode 11380, loss:10.0914, succeed, steps:98, total reward:15.3000, 0.42836833000183105 sec
Episode 11381, loss:2.1274, succeed, steps:132, total reward:15.2000, 0.5658242702484131 sec
Episode 11382, loss:5.1705, succeed, steps:145, total reward:18.9000, 0.6434531211853027 sec
Episode 11383, loss:1.5586, succeed, steps:39, total reward:5.9000, 0.16347312927246094 sec
Episode 11384, loss:2.2914, succeed, steps:108, total reward:12.7000, 0.42221

Episode 11463, loss:1.3869, succeed, steps:105, total reward:9.9000, 0.4126405715942383 sec
Episode 11464, loss:0.5139, fail, steps:162, total reward:13.2000, 0.6984233856201172 sec
Episode 11465, loss:-0.6303, fail, steps:160, total reward:6.1000, 0.7417221069335938 sec
Episode 11466, loss:2.0634, fail, steps:161, total reward:9.1000, 0.6890473365783691 sec
Episode 11467, loss:4.1165, succeed, steps:144, total reward:11.1000, 0.5899085998535156 sec
Episode 11468, loss:-0.3869, fail, steps:160, total reward:5.5000, 0.7412257194519043 sec
Episode 11469, loss:0.7986, succeed, steps:117, total reward:12.6000, 0.4770967960357666 sec
Episode 11470, loss:2.2058, succeed, steps:153, total reward:13.7000, 0.703782320022583 sec
Episode 11471, loss:2.5170, succeed, steps:86, total reward:9.5000, 0.3521847724914551 sec
Episode 11472, loss:1.0463, succeed, steps:84, total reward:10.6000, 0.3788337707519531 sec
Episode 11473, loss:3.2035, succeed, steps:111, total reward:9.7000, 0.4436676502227783 

Episode 11553, loss:3.3470, succeed, steps:95, total reward:15.3000, 0.38875746726989746 sec
Episode 11554, loss:1.5224, succeed, steps:121, total reward:13.5000, 0.5535099506378174 sec
Episode 11555, loss:-0.2836, succeed, steps:136, total reward:9.9000, 0.5588510036468506 sec
Episode 11556, loss:0.3428, succeed, steps:104, total reward:8.9000, 0.49492573738098145 sec
Episode 11557, loss:0.5567, succeed, steps:56, total reward:7.2000, 0.20868182182312012 sec
Episode 11558, loss:4.3615, succeed, steps:116, total reward:17.8000, 0.5004558563232422 sec
Episode 11559, loss:1.7550, succeed, steps:113, total reward:11.7000, 0.5098137855529785 sec
Episode 11560, loss:1.7046, succeed, steps:91, total reward:14.5000, 0.40917086601257324 sec
Episode 11561, loss:2.6181, succeed, steps:111, total reward:9.7000, 0.5150301456451416 sec
Episode 11562, loss:7.8576, succeed, steps:120, total reward:18.7000, 0.4755113124847412 sec
Episode 11563, loss:1.4436, succeed, steps:115, total reward:10.4000, 0.

Episode 11643, loss:0.2696, succeed, steps:99, total reward:12.8000, 0.37741875648498535 sec
Episode 11644, loss:-2.0403, fail, steps:161, total reward:9.1000, 0.6947212219238281 sec
Episode 11645, loss:4.3973, succeed, steps:81, total reward:10.7000, 0.37567877769470215 sec
Episode 11646, loss:4.8619, succeed, steps:92, total reward:9.2000, 0.36897945404052734 sec
Episode 11647, loss:1.9257, succeed, steps:162, total reward:17.3000, 0.7137281894683838 sec
Episode 11648, loss:0.4694, succeed, steps:73, total reward:8.9000, 0.2967233657836914 sec
Episode 11649, loss:0.6376, succeed, steps:115, total reward:13.7000, 0.46858811378479004 sec
Episode 11650, loss:-0.8044, fail, steps:160, total reward:5.0000, 0.6440682411193848 sec
Episode 11651, loss:7.4631, fail, steps:162, total reward:14.3000, 0.7786178588867188 sec
Episode 11652, loss:1.5256, succeed, steps:88, total reward:5.3000, 0.43398094177246094 sec
Episode 11653, loss:4.9375, succeed, steps:110, total reward:14.9000, 0.4333405494

Episode 11732, loss:0.9589, succeed, steps:70, total reward:12.1000, 0.27959251403808594 sec
Episode 11733, loss:6.6148, succeed, steps:82, total reward:14.8000, 0.4085042476654053 sec
Episode 11734, loss:2.5929, succeed, steps:123, total reward:9.3000, 0.42290258407592773 sec
Episode 11735, loss:3.6630, succeed, steps:93, total reward:10.3000, 0.43118715286254883 sec
Episode 11736, loss:0.8708, succeed, steps:104, total reward:14.9000, 0.4353647232055664 sec
Episode 11737, loss:1.6171, succeed, steps:56, total reward:10.5000, 0.24158406257629395 sec
Episode 11738, loss:0.6383, succeed, steps:85, total reward:14.7000, 0.3314046859741211 sec
Episode 11739, loss:3.5446, succeed, steps:87, total reward:10.5000, 0.40947723388671875 sec
Episode 11740, loss:2.9340, succeed, steps:105, total reward:13.0000, 0.4212071895599365 sec
Episode 11741, loss:1.3267, succeed, steps:98, total reward:9.1000, 0.47537899017333984 sec
Episode 11742, loss:0.8389, succeed, steps:101, total reward:9.0000, 0.39

Episode 11821, loss:4.1186, succeed, steps:75, total reward:10.9000, 0.3065943717956543 sec
Episode 11822, loss:0.4036, fail, steps:161, total reward:9.1000, 0.7037026882171631 sec
Episode 11823, loss:2.2597, succeed, steps:89, total reward:15.6000, 0.35546875 sec
Episode 11824, loss:0.8058, succeed, steps:60, total reward:8.3000, 0.33144235610961914 sec
Episode 11825, loss:0.5618, succeed, steps:60, total reward:8.3000, 0.23577046394348145 sec
Episode 11826, loss:3.2969, succeed, steps:94, total reward:14.3000, 0.3668360710144043 sec
Episode 11827, loss:0.7075, succeed, steps:58, total reward:9.4000, 0.25166869163513184 sec
Episode 11828, loss:0.4455, succeed, steps:103, total reward:11.0000, 0.4460721015930176 sec
Episode 11829, loss:3.0018, succeed, steps:113, total reward:17.8000, 0.4457871913909912 sec
Episode 11830, loss:0.9339, succeed, steps:102, total reward:10.0000, 0.49323153495788574 sec
Episode 11831, loss:6.4593, succeed, steps:126, total reward:15.0000, 0.499011993408203

Episode 11911, loss:0.3051, succeed, steps:24, total reward:3.2000, 0.09758543968200684 sec
Episode 11912, loss:4.1409, succeed, steps:86, total reward:12.5000, 0.3669300079345703 sec
Episode 11913, loss:0.9881, succeed, steps:98, total reward:15.3000, 0.39605236053466797 sec
Episode 11914, loss:1.7371, succeed, steps:110, total reward:18.0000, 0.45194554328918457 sec
Episode 11915, loss:0.1952, succeed, steps:41, total reward:4.8000, 0.16868019104003906 sec
Episode 11916, loss:0.2403, succeed, steps:54, total reward:8.5000, 0.21363258361816406 sec
Episode 11917, loss:0.9393, succeed, steps:97, total reward:14.3000, 0.5070228576660156 sec
Episode 11918, loss:0.8288, succeed, steps:69, total reward:11.0000, 0.2788536548614502 sec
Episode 11919, loss:1.7050, succeed, steps:92, total reward:9.3000, 0.4119110107421875 sec
Episode 11920, loss:0.8234, succeed, steps:52, total reward:6.2000, 0.20668697357177734 sec
Episode 11921, loss:0.1244, succeed, steps:77, total reward:6.7000, 0.29303026

Episode 12001, loss:1.4107, succeed, steps:155, total reward:12.7000, 0.6811857223510742 sec
Episode 12002, loss:6.8436, succeed, steps:101, total reward:15.2000, 0.46207618713378906 sec
Episode 12003, loss:4.8372, succeed, steps:99, total reward:16.3000, 0.39141273498535156 sec
Episode 12004, loss:3.2569, succeed, steps:53, total reward:7.5000, 0.21210169792175293 sec
Episode 12005, loss:2.1936, succeed, steps:75, total reward:10.9000, 0.37758755683898926 sec
Episode 12006, loss:1.9911, succeed, steps:115, total reward:10.6000, 0.4508171081542969 sec
Episode 12007, loss:0.3330, succeed, steps:100, total reward:14.2000, 0.45482683181762695 sec
Episode 12008, loss:1.9535, succeed, steps:92, total reward:12.4000, 0.3642451763153076 sec
Episode 12009, loss:1.2116, fail, steps:161, total reward:9.1000, 0.6877660751342773 sec
Episode 12010, loss:0.0857, succeed, steps:92, total reward:9.1000, 0.37610578536987305 sec
Episode 12011, loss:0.8355, succeed, steps:96, total reward:16.4000, 0.4934

Episode 12090, loss:1.1119, succeed, steps:89, total reward:12.5000, 0.4435560703277588 sec
Episode 12091, loss:4.8196, succeed, steps:87, total reward:13.6000, 0.3415858745574951 sec
Episode 12092, loss:0.5469, succeed, steps:87, total reward:13.6000, 0.3508486747741699 sec
Episode 12093, loss:1.2305, succeed, steps:78, total reward:7.7000, 0.41962122917175293 sec
Episode 12094, loss:2.0526, succeed, steps:78, total reward:7.7000, 0.2953474521636963 sec
Episode 12095, loss:1.9776, succeed, steps:66, total reward:11.2000, 0.27817583084106445 sec
Episode 12096, loss:0.4304, succeed, steps:109, total reward:7.7000, 0.4616219997406006 sec
Episode 12097, loss:3.2575, succeed, steps:120, total reward:15.6000, 0.479170560836792 sec
Episode 12098, loss:0.9606, succeed, steps:126, total reward:9.2000, 0.5866439342498779 sec
Episode 12099, loss:0.4854, succeed, steps:152, total reward:19.7000, 0.677056074142456 sec
Episode 12100, loss:3.5917, succeed, steps:61, total reward:9.3000, 0.2443630695

Episode 12179, loss:3.1116, fail, steps:162, total reward:13.1000, 0.6432664394378662 sec
Episode 12180, loss:1.4669, succeed, steps:82, total reward:8.6000, 0.4277212619781494 sec
Episode 12181, loss:2.0414, succeed, steps:101, total reward:12.1000, 0.3985426425933838 sec
Episode 12182, loss:3.8804, succeed, steps:74, total reward:9.8000, 0.29721736907958984 sec
Episode 12183, loss:1.5310, succeed, steps:23, total reward:2.2000, 0.19266939163208008 sec
Episode 12184, loss:2.2650, succeed, steps:76, total reward:11.9000, 0.3164982795715332 sec
Episode 12185, loss:0.8448, succeed, steps:67, total reward:9.1000, 0.262620210647583 sec
Episode 12186, loss:2.0930, succeed, steps:111, total reward:12.8000, 0.42380189895629883 sec
Episode 12187, loss:3.4240, succeed, steps:92, total reward:12.4000, 0.4456326961517334 sec
Episode 12188, loss:1.4730, succeed, steps:105, total reward:13.0000, 0.42969846725463867 sec
Episode 12189, loss:3.0508, succeed, steps:106, total reward:13.9000, 0.48859930

Episode 12269, loss:3.4885, succeed, steps:129, total reward:15.3000, 0.5639941692352295 sec
Episode 12270, loss:-0.5365, fail, steps:161, total reward:10.2000, 0.6867258548736572 sec
Episode 12271, loss:1.9465, succeed, steps:113, total reward:11.7000, 0.45587873458862305 sec
Episode 12272, loss:1.1049, succeed, steps:119, total reward:14.6000, 0.5470137596130371 sec
Episode 12273, loss:-0.7306, fail, steps:160, total reward:5.0000, 0.6725361347198486 sec
Episode 12274, loss:1.4732, succeed, steps:131, total reward:11.1000, 0.5375545024871826 sec
Episode 12275, loss:0.9162, succeed, steps:88, total reward:11.5000, 0.39467477798461914 sec
Episode 12276, loss:1.6827, succeed, steps:111, total reward:12.8000, 0.4443366527557373 sec
Episode 12277, loss:0.1196, succeed, steps:51, total reward:8.6000, 0.312546968460083 sec
Episode 12278, loss:3.5781, succeed, steps:141, total reward:14.9000, 0.5515551567077637 sec
Episode 12279, loss:5.8418, fail, steps:163, total reward:17.3000, 0.73977017

KeyboardInterrupt: 