In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import math
import random

MIN_LIST_LEN = 8
MAX_LIST_LEN = 8
MAX_STEPS = 210

SUCCESS_REWARD = 2.0
STEP_REWARD = -0.2
COMPARISON_ENTROPY_MULTIPLIER = -0.05
SWAP_REWARD = 1.0
INVALID_ACTION_REWARD = -10.0

EPS_START = 0.5
EPS_END = 0.05
EPS_DECAY = 1000
GAMMA = 0.9
NUM_EPISODES = 100000
EPISODES_SAVE = 1000
OUTPUT_DIR = 'datasets/rl_sort_transformer_easy/list8_transformer4_192_gamma09_step210_v2'

# Define the vocabulary
vocab = {
    'Comparison': 0,
    'Swap': 1,
    'less': 2,
    'equal': 3,
    'more': 4,
    '0': 5,
    '1': 6,
    '2': 7,
    '3': 8,
    '4': 9,
    '5': 10,
    '6': 11,
    '7': 12,
    '8': 13,
    '9': 14,
    '10': 15,
    '11': 16,
    '12': 17,
    '13': 18,
    '14': 19,
    '15': 20,
    'len1': 21,
    'len2': 22,
    'len3': 23,
    'len4': 24,
    'len5': 25,
    'len6': 26,
    'len7': 27,
    'len8': 28,
    'len9': 29,
    'len10': 30,
    'len11': 31,
    'len12': 32,
    'len13': 33,
    'len14': 34,
    'len15': 35,
    'len16': 36,
}
inv_vocab = {v: k for k, v in vocab.items()}

def compute_entropy(N, alpha=1):
    K = 2**N
    values = np.arange(K)
    unnormalized_probs = np.exp(-alpha * values)
    Z = unnormalized_probs.sum()
    probs = unnormalized_probs / Z
    return values, -np.log2(probs)

_, int_entropy = compute_entropy(4)

def get_entropy_of_integer(x):
    x = min(15, abs(x))
    return int_entropy[x]

def compute_min_delta_entropy(comparisons):
    # Initialize the result list to store minDelta values
    min_delta = None

    # Iterate through each pair in the comparisons list
    i = len(comparisons) - 1
    xi, yi = comparisons[i]
    if i == 0:
        # For i = 0, use the first case directly
        min_delta = (xi, min(yi, yi - xi), 0)
    else:
        # For i > 0, compute all possible options and select the minimal one
        options = []

        # Simple Entropy
        simple_entropy = (xi, min(yi, yi - xi), 0)
        options.append(simple_entropy)

        # First Delta Entropy
        xi_prev, yi_prev = comparisons[i - 1]
        first_delta_entropy = (xi - xi_prev, yi - yi_prev, 0)
        options.append(first_delta_entropy)

        # Second Delta Entropy (only valid for i > 1)
        if i > 1:
            xi_prev2, yi_prev2 = comparisons[i - 2]
            second_delta_entropy = (
                (xi - xi_prev) - (xi_prev - xi_prev2),
                (yi - yi_prev) - (yi_prev - yi_prev2),
                0,
            )
            options.append(second_delta_entropy)

        # Arbitrary Position Entropy (only valid for i > 1)
        for j in range(i):
            xj, yj = comparisons[j]
            arbitrary_position_entropy = (
                xi - xj,
                yi - yj,
                min(j, i - j),
            )
            options.append(arbitrary_position_entropy)

        # Find the option with the minimal sum
        min_delta = min(options, key=lambda t: sum([get_entropy_of_integer(x) for x in t]))

    return sum([get_entropy_of_integer(x) for x in min_delta])

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define the environment
class SortingEnv:
    def __init__(self):
        self.max_steps = MAX_STEPS

    def reset(self):
        self.length = random.randint(MIN_LIST_LEN, MAX_LIST_LEN)
        self.list = [random.randint(1, 100) for _ in range(self.length)]
        while self.list == sorted(self.list):
            self.list = [random.randint(1, 100) for _ in range(self.length)]
        self.indices = None
        self.current_step = 0
        self.done = False
        initial_token = 'len{}'.format(self.length)
        return vocab[initial_token], self.list.copy()
    
    def get_list(self):
        return self.list
    
    def get_list_len(self):
        return len(self.list)

    def step(self, action_tokens):
        action = action_tokens[0]
        reward = -0.01  # default penalty
        response_token = None

        if action == vocab['Comparison']:
            if len(action_tokens) != 3:
                reward = INVALID_ACTION_REWARD
                self.done = True
                return response_token, reward, self.done, self.list.copy()
            index1 = action_tokens[1] - vocab['0']
            index2 = action_tokens[2] - vocab['0']
            if index1 >= self.length or index2 >= self.length or index1 < 0 or index2 < 0:
                reward = INVALID_ACTION_REWARD
                self.done = True
                return response_token, reward, self.done, self.list.copy()
            self.indices = (index1, index2)
            if self.list[index1] < self.list[index2]:
                response_token = vocab['less']
                reward = STEP_REWARD
            elif self.list[index1] == self.list[index2]:
                response_token = vocab['equal']
                reward = STEP_REWARD * 2
            else:
                response_token = vocab['more']
                reward = STEP_REWARD
        elif action == vocab['Swap']:
            if self.indices is None:
                reward = INVALID_ACTION_REWARD
                self.done = True
                return response_token, reward, self.done, self.list.copy()
            index1, index2 = self.indices
            prev_list = self.list.copy()
            self.list[index1], self.list[index2] = self.list[index2], self.list[index1]
            if self.list == sorted(self.list):
                reward = SUCCESS_REWARD
                self.done = True
            #elif prev_list[index1] > prev_list[index2] and self.list[index1] <= self.list[index2]:
            #    reward = 0.1
            elif (index1 < index2 and prev_list[index1] > prev_list[index2] and self.list[index1] <= self.list[index2]) or \
                (index1 > index2 and prev_list[index1] < prev_list[index2] and self.list[index1] >= self.list[index2]):
                reward = SWAP_REWARD
            elif (index1 < index2 and prev_list[index1] < prev_list[index2] and self.list[index1] >= self.list[index2]) or \
                (index1 > index2 and prev_list[index1] > prev_list[index2] and self.list[index1] <= self.list[index2]):
                reward = -SWAP_REWARD
            else:
                reward = STEP_REWARD
            self.indices = None
        else:
            reward = INVALID_ACTION_REWARD
            self.done = True

        self.current_step += 1
        if self.current_step >= self.max_steps:
            self.done = True
        return response_token, reward, self.done, self.list.copy()


Using device: cuda


In [2]:
# Positional Encoding for Transformer
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=512):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=0.1)

        pe = torch.zeros(max_len, d_model)  # (max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)  # (max_len, 1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() *
                             (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)  # Even indices
        pe[:, 1::2] = torch.cos(position * div_term)  # Odd indices
        pe = pe.unsqueeze(1)  # (max_len, 1, d_model)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)

# Transformer Model
class TransformerModel(nn.Module):
    def __init__(self, vocab_size, d_model=192, nhead=8, num_layers=4):
        super(TransformerModel, self).__init__()
        self.model_type = 'Transformer'
        self.d_model = d_model

        self.embedding = nn.Embedding(vocab_size, d_model)
        self.pos_encoder = PositionalEncoding(d_model)
        encoder_layers = nn.TransformerEncoderLayer(d_model, nhead)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        self.decoder = nn.Linear(d_model, vocab_size)

        self.init_weights()

    def init_weights(self):
        initrange = 0.1
        nn.init.uniform_(self.embedding.weight, -initrange, initrange)
        nn.init.zeros_(self.decoder.bias)
        nn.init.uniform_(self.decoder.weight, -initrange, initrange)

    def forward(self, src):
        src = self.embedding(src) * math.sqrt(self.d_model)
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src)
        output = self.decoder(output)
        return output

def decode(input_tokens, inv_vocab):
    return ' '.join([inv_vocab[x] for x in input_tokens])


def save_checkpoint(model, optimizer, episode, folder, filename):
    """
    Save the model and optimizer state to the designated filepath.

    Args:
        model (nn.Module): The model to save.
        optimizer (torch.optim.Optimizer): The optimizer whose state to save.
        episode (int): The current episode number.
        filepath (str): The path where to save the checkpoint.
    """
    filepath = os.path.join(folder, filename)
    # Ensure the directory exists
    os.makedirs(os.path.dirname(filepath), exist_ok=True)
    # Save the checkpoint
    torch.save({
        'episode': episode,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
    }, filepath)
    print(f"Checkpoint saved at episode {episode} to {filepath}")

def load_checkpoint(filepath, model, optimizer):
    """
    Load the model and optimizer state from the designated filepath.

    Args:
        filepath (str): The path from where to load the checkpoint.
        model (nn.Module): The model into which to load the state_dict.
        optimizer (torch.optim.Optimizer): The optimizer into which to load the state.

    Returns:
        int: The episode number to resume from.
    """
    if os.path.isfile(filepath):
        checkpoint = torch.load(filepath, map_location=device)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        episode = checkpoint['episode']
        print(f"Checkpoint loaded from {filepath}, resuming from episode {episode}")
        return episode
    else:
        print(f"No checkpoint found at {filepath}, starting from scratch.")
        return 0

In [None]:
# Training Loop
def train(verbose=False):
    # Removed torch.autograd.set_detect_anomaly(True)
    vocab_size = len(vocab)
    model = TransformerModel(vocab_size).to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-4)  # Reduced learning rate
    # Optionally, load a checkpoint
    checkpoint_path = os.path.join(OUTPUT_DIR, "ckpt_52000_0.6910_169.16.pth")
    load_checkpoint(checkpoint_path, model, optimizer)

    episode_cnt = 0
    total_reward = 0.0
    num_successes = 0
    total_steps = 0
    
    for episode in range(NUM_EPISODES):
        t1 = time.time()
        model.train()  # Set model to training mode
        env = SortingEnv()
        initial_token_id, current_list = env.reset()
        input_tokens = [initial_token_id]
        log_probs = []
        rewards = []
        comparisons = []
        
        state = 'expect_action'
        done = False
        success = False

        while not done and len(input_tokens) < env.max_steps:
            if verbose:
                print(decode(input_tokens, inv_vocab))
                print(env.get_list())
                print(comparisons)
            # Prepare input tensor
            input_seq = torch.tensor(input_tokens, dtype=torch.long, device=device).unsqueeze(1)  # (seq_len, batch_size)
            # Get model output
            with torch.autocast(device_type=device.type, dtype=torch.bfloat16):
                output = model(input_seq)  # (seq_len, batch_size, vocab_size)
                # Get logits for the last token
                logits = output[-1, 0, :]  # (vocab_size)

                # Check for NaNs in logits
                if torch.isnan(logits).any():
                    print(f"Episode {episode}, NaNs in logits before masking.")
                    break

                # Get valid tokens based on state
                def get_valid_tokens(state):
                    action_tokens = [vocab['Comparison'], vocab['Swap']]
                    index_tokens = [vocab[str(i)] for i in range(env.length)]
                    if state == 'expect_action':
                        return action_tokens
                    elif state == 'expect_index1':
                        return index_tokens[:-1]
                    elif state == 'expect_index2':
                        return [x for x in index_tokens if x > input_tokens[-1]]
                    else:
                        # Handle unexpected states by defaulting to expect_action
                        return action_tokens

                valid_token_ids = get_valid_tokens(state)

                # Ensure valid_token_ids are within the vocab range
                if any(idx >= vocab_size or idx < 0 for idx in valid_token_ids):
                    print(f"Episode {episode}, invalid indices in valid_token_ids: {valid_token_ids}")
                    break

                # Mask invalid tokens
                mask_value = -1e9  # Use a large negative value instead of -inf
                mask = torch.full_like(logits, mask_value).to(device)
                mask[valid_token_ids] = 0
                masked_logits = logits + mask

                # Sample action. Have some chance to randomly pick a valid action.
                eps_threshold = EPS_END + (EPS_START - EPS_END) * np.exp(-1.0 * episode / EPS_DECAY)
                if random.random() < eps_threshold:
                    masked_logits = masked_logits / 4

                # Check for NaNs in masked_logits
                if torch.isnan(masked_logits).any():
                    print(f"Episode {episode}, NaNs in masked_logits after masking.")
                    break

                # Compute probabilities
                probs = F.softmax(masked_logits, dim=0)

                # Check for NaNs in probs
                if torch.isnan(probs).any():
                    print(f"Episode {episode}, NaNs in probs after softmax.")
                    break

                try:
                    m = torch.distributions.Categorical(probs)
                    action_token = m.sample()
                    log_prob = m.log_prob(action_token)
                except ValueError as e:
                    print(f"Episode {episode}, error in sampling action: {e}")
                    break

            log_probs.append(log_prob)
            input_tokens.append(action_token.item())

            action = action_token.item()
            reward = 0.0
            if state == 'expect_action':
                if action == vocab['Comparison']:
                    state = 'expect_index1'
                elif action == vocab['Swap']:
                    if env.indices is None:
                        reward = INVALID_ACTION_REWARD
                        rewards.append(reward)
                        done = True
                        continue
                    action_tokens = [vocab['Swap']]
                    response_token, reward, done, current_list = env.step(action_tokens)
                    if done and reward == SUCCESS_REWARD:
                        success = True
                        #print(decode(input_tokens, inv_vocab))
                    if verbose:
                        print("Reward:", reward)
                    state = 'expect_action'
                else:
                    reward = INVALID_ACTION_REWARD
                    done = True
            elif state == 'expect_index1':
                index1_token = action_token
                state = 'expect_index2'
            elif state == 'expect_index2':
                index2_token = action_token
                action_tokens = [vocab['Comparison'], index1_token.item(), index2_token.item()]
                comparisons.append((int(inv_vocab[index1_token.item()]), 
                                    int(inv_vocab[index2_token.item()])))
                response_token, reward, done, current_list = env.step(action_tokens)
                if done and reward == SUCCESS_REWARD:
                    success = True
                    #print(decode(input_tokens, inv_vocab))
                else:
                    reward += COMPARISON_ENTROPY_MULTIPLIER * compute_min_delta_entropy(comparisons)
                if verbose:
                    print("Reward:", reward)
                if response_token is not None:
                    input_tokens.append(response_token)
                state = 'expect_action'
            else:
                reward = INVALID_ACTION_REWARD
                done = True

            rewards.append(reward)
        #
        if success: 
            num_successes += 1
        # Save checkpoint
        if episode > 0 and episode % EPISODES_SAVE == 0:
            avg_reward = total_reward / episode_cnt
            success_rate = num_successes / episode_cnt
            avg_steps = total_steps / episode_cnt
            episode_cnt = 0
            total_reward = 0.0
            num_successes = 0
            total_steps = 0
            save_checkpoint(model, optimizer, episode, OUTPUT_DIR, f"ckpt_{episode}_{success_rate:.4f}_{avg_steps:.2f}.pth")
        #
        assert len(log_probs) == len(rewards), "log_probs and returns have different sizes!"

        if len(log_probs) == 0:
            continue  # Skip if no actions were taken

        # Compute returns and loss within autocast
        with torch.autocast(device_type=device.type, dtype=torch.bfloat16):
            # Compute returns
            returns = []
            R = 0
            gamma = GAMMA
            for r in rewards[::-1]:
                R = r + gamma * R
                returns.insert(0, R)
            returns = torch.tensor(returns).to(device)

            # Check for NaNs in returns
            if torch.isnan(returns).any():
                print(f"Episode {episode}, NaNs in returns.")
                continue

            # Compute loss
            loss = 0
            for log_prob, R in zip(log_probs, returns):
                loss -= log_prob * R

            # Check for NaNs in loss
            if torch.isnan(loss):
                print(f"Episode {episode}, NaN in loss.")
                continue

        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping
        optimizer.step()

        episode_cnt += 1
        total_reward += sum(rewards)
        total_steps += len(rewards)
        t2 = time.time()
        if episode % 1 == 0:
            print(f"Episode {episode}, loss:{loss.item():.4f}, {'succeed' if success else 'fail'}, steps:{len(rewards)}, total reward:{sum(rewards):.4f}, {t2-t1} sec")

if __name__ == "__main__":
    train(verbose=False)




No checkpoint found at datasets/rl_sort_transformer_easy/list8_transformer4_192_gamma09_step210_v2/ckpt_52000_0.6910_169.16.pth, starting from scratch.
Episode 0, loss:-68.0615, fail, steps:12, total reward:-11.6913, 0.361278772354126 sec
Episode 1, loss:-14.7772, fail, steps:1, total reward:-10.0000, 0.0053904056549072266 sec
Episode 2, loss:-72.0592, fail, steps:8, total reward:-12.2477, 0.020350933074951172 sec
Episode 3, loss:-181.0044, fail, steps:159, total reward:-20.3830, 0.4952394962310791 sec
Episode 4, loss:-156.1559, fail, steps:158, total reward:-21.3937, 0.4323863983154297 sec
Episode 5, loss:-121.6667, fail, steps:158, total reward:-18.3224, 0.4314444065093994 sec
Episode 6, loss:-157.3369, fail, steps:158, total reward:-23.2601, 0.4921417236328125 sec
Episode 7, loss:-106.3106, fail, steps:158, total reward:-19.3838, 0.4371950626373291 sec
Episode 8, loss:-122.1537, fail, steps:78, total reward:-20.0783, 0.2270948886871338 sec
Episode 9, loss:-84.5488, fail, steps:158, 

Episode 89, loss:-70.4431, fail, steps:160, total reward:-10.3001, 0.4361541271209717 sec
Episode 90, loss:-116.6133, fail, steps:158, total reward:-18.1674, 0.4342975616455078 sec
Episode 91, loss:-131.2911, fail, steps:158, total reward:-17.8822, 0.43535399436950684 sec
Episode 92, loss:-103.5456, fail, steps:159, total reward:-14.7445, 0.44062185287475586 sec
Episode 93, loss:-119.9216, fail, steps:159, total reward:-13.5895, 0.4430429935455322 sec
Episode 94, loss:-97.5164, fail, steps:158, total reward:-15.3117, 0.44444799423217773 sec
Episode 95, loss:-97.9567, fail, steps:159, total reward:-14.6723, 0.44460630416870117 sec
Episode 96, loss:-146.0208, fail, steps:158, total reward:-19.1150, 0.5019218921661377 sec
Episode 97, loss:-110.6428, fail, steps:158, total reward:-15.9609, 0.44249820709228516 sec
Episode 98, loss:-91.6464, fail, steps:159, total reward:-13.0845, 0.4474372863769531 sec
Episode 99, loss:-119.9190, fail, steps:158, total reward:-18.5773, 0.4437685012817383 se

Episode 179, loss:-122.2065, fail, steps:160, total reward:-12.9699, 0.4394955635070801 sec
Episode 180, loss:-137.0694, fail, steps:159, total reward:-15.1666, 0.43956851959228516 sec
Episode 181, loss:-108.7911, fail, steps:160, total reward:-11.3459, 0.44884777069091797 sec
Episode 182, loss:-60.2971, fail, steps:160, total reward:-7.2181, 0.4455752372741699 sec
Episode 183, loss:-68.8557, fail, steps:160, total reward:-9.1459, 0.4458889961242676 sec
Episode 184, loss:-57.0074, fail, steps:160, total reward:-6.7131, 0.44843339920043945 sec
Episode 185, loss:-112.2681, fail, steps:159, total reward:-15.0330, 0.44401097297668457 sec
Episode 186, loss:-96.9544, fail, steps:160, total reward:-10.7952, 0.4511110782623291 sec
Episode 187, loss:-68.9890, fail, steps:160, total reward:-9.4246, 0.45188260078430176 sec
Episode 188, loss:-65.5376, fail, steps:161, total reward:-6.4860, 0.45093369483947754 sec
Episode 189, loss:-148.4350, fail, steps:155, total reward:-18.5483, 0.43459606170654

Episode 269, loss:-146.7460, fail, steps:159, total reward:-14.7895, 0.4132668972015381 sec
Episode 270, loss:-118.5549, succeed, steps:148, total reward:-12.3081, 0.4026782512664795 sec
Episode 271, loss:-97.2145, fail, steps:160, total reward:-8.9394, 0.44368672370910645 sec
Episode 272, loss:-133.9763, fail, steps:160, total reward:-13.0214, 0.4430713653564453 sec
Episode 273, loss:-120.4008, fail, steps:159, total reward:-12.7436, 0.44463396072387695 sec
Episode 274, loss:-109.5324, fail, steps:160, total reward:-10.3886, 0.4472992420196533 sec
Episode 275, loss:-135.4952, fail, steps:29, total reward:-12.8475, 0.09536147117614746 sec
Episode 276, loss:-116.6146, fail, steps:160, total reward:-11.5886, 0.41971874237060547 sec
Episode 277, loss:-64.0904, fail, steps:161, total reward:-6.1459, 0.43619275093078613 sec
Episode 278, loss:-90.0499, fail, steps:161, total reward:-8.5058, 0.44405484199523926 sec
Episode 279, loss:-129.2678, fail, steps:45, total reward:-10.6937, 0.19273614

Episode 359, loss:-66.8747, fail, steps:159, total reward:-10.9239, 0.4339718818664551 sec
Episode 360, loss:-40.6301, succeed, steps:116, total reward:-6.5342, 0.32627320289611816 sec
Episode 361, loss:-63.7337, fail, steps:160, total reward:-9.5075, 0.43323397636413574 sec
Episode 362, loss:-123.8121, fail, steps:159, total reward:-18.6101, 0.43907761573791504 sec
Episode 363, loss:-55.0605, succeed, steps:113, total reward:-6.3529, 0.3208746910095215 sec
Episode 364, loss:-98.7922, fail, steps:159, total reward:-12.5272, 0.4329383373260498 sec
Episode 365, loss:-112.5334, fail, steps:159, total reward:-15.5173, 0.4450092315673828 sec
Episode 366, loss:-40.4675, succeed, steps:111, total reward:-4.5029, 0.31476783752441406 sec
Episode 367, loss:-79.8624, fail, steps:160, total reward:-10.4082, 0.4344441890716553 sec
Episode 368, loss:-74.8429, fail, steps:160, total reward:-10.0124, 0.4420487880706787 sec
Episode 369, loss:-101.7374, fail, steps:159, total reward:-13.6616, 0.44271039

Episode 449, loss:-96.5222, fail, steps:159, total reward:-13.1666, 0.44763779640197754 sec
Episode 450, loss:-96.2822, fail, steps:160, total reward:-11.9600, 0.44559550285339355 sec
Episode 451, loss:-104.8001, fail, steps:159, total reward:-12.2387, 0.44540858268737793 sec
Episode 452, loss:-105.5455, fail, steps:159, total reward:-12.2288, 0.445110559463501 sec
Episode 453, loss:-84.5813, fail, steps:160, total reward:-9.3100, 0.45308780670166016 sec
Episode 454, loss:-46.2061, fail, steps:162, total reward:-3.3417, 0.45220279693603516 sec
Episode 455, loss:-60.8410, fail, steps:161, total reward:-6.5788, 0.45216894149780273 sec
Episode 456, loss:-84.3363, fail, steps:160, total reward:-10.2379, 0.4479086399078369 sec
Episode 457, loss:-61.2102, fail, steps:160, total reward:-8.3623, 0.5066168308258057 sec
Episode 458, loss:3.7854, succeed, steps:72, total reward:1.4072, 0.21843600273132324 sec
Episode 459, loss:-94.2429, fail, steps:159, total reward:-12.8059, 0.42293787002563477 

Episode 539, loss:-27.4071, succeed, steps:85, total reward:-2.8825, 0.24216032028198242 sec
Episode 540, loss:-75.0820, fail, steps:160, total reward:-9.0017, 0.42964792251586914 sec
Episode 541, loss:-18.7717, succeed, steps:75, total reward:-2.4798, 0.21465849876403809 sec
Episode 542, loss:-86.5647, succeed, steps:124, total reward:-10.5534, 0.3300650119781494 sec
Episode 543, loss:-88.0751, fail, steps:160, total reward:-11.3108, 0.43734025955200195 sec
Episode 544, loss:-36.5226, succeed, steps:158, total reward:-2.8459, 0.434983491897583 sec
Episode 545, loss:-8.9661, succeed, steps:82, total reward:0.6438, 0.2339479923248291 sec
Episode 546, loss:-25.6376, succeed, steps:109, total reward:-2.9872, 0.2939453125 sec
Episode 547, loss:-62.9637, fail, steps:161, total reward:-6.9918, 0.4319624900817871 sec
Episode 548, loss:-78.5804, fail, steps:161, total reward:-8.1459, 0.44315195083618164 sec
Episode 549, loss:-90.1952, fail, steps:159, total reward:-10.3632, 0.4465658664703369 

Episode 629, loss:-72.8789, fail, steps:160, total reward:-9.0025, 0.4447147846221924 sec
Episode 630, loss:-95.3502, fail, steps:160, total reward:-11.5590, 0.4467906951904297 sec
Episode 631, loss:-53.7215, succeed, steps:125, total reward:-6.2778, 0.3602912425994873 sec
Episode 632, loss:-50.4984, fail, steps:161, total reward:-5.2172, 0.4421703815460205 sec
Episode 633, loss:-35.8405, succeed, steps:129, total reward:-3.5763, 0.3652677536010742 sec
Episode 634, loss:-92.5093, fail, steps:160, total reward:-9.2246, 0.44129228591918945 sec
Episode 635, loss:-87.7051, succeed, steps:118, total reward:-10.8106, 0.3854494094848633 sec
Episode 636, loss:-85.4210, fail, steps:160, total reward:-12.0837, 0.4428744316101074 sec
Episode 637, loss:-28.5609, succeed, steps:144, total reward:-2.7340, 0.4029042720794678 sec
Episode 638, loss:-62.8037, fail, steps:160, total reward:-9.3001, 0.4442322254180908 sec
Episode 639, loss:-68.7847, fail, steps:160, total reward:-7.7754, 0.447383880615234

Episode 719, loss:-83.3145, fail, steps:160, total reward:-9.7952, 0.4512944221496582 sec
Episode 720, loss:-90.3305, fail, steps:160, total reward:-8.5788, 0.45024943351745605 sec
Episode 721, loss:-88.6714, fail, steps:160, total reward:-11.5994, 0.4528474807739258 sec
Episode 722, loss:-40.9575, succeed, steps:115, total reward:-4.0186, 0.3294486999511719 sec
Episode 723, loss:-44.2035, succeed, steps:125, total reward:-4.9262, 0.34717631340026855 sec
Episode 724, loss:-42.1079, fail, steps:161, total reward:-2.7745, 0.4377095699310303 sec
Episode 725, loss:-45.2505, fail, steps:161, total reward:-3.1352, 0.44736194610595703 sec
Episode 726, loss:-82.3359, fail, steps:160, total reward:-9.7329, 0.44557929039001465 sec
Episode 727, loss:-79.6035, fail, steps:160, total reward:-10.7230, 0.45290517807006836 sec
Episode 728, loss:-121.4911, fail, steps:159, total reward:-15.2494, 0.44763851165771484 sec
Episode 729, loss:-92.1603, fail, steps:159, total reward:-13.3009, 0.44830989837646

Episode 809, loss:-70.1525, fail, steps:161, total reward:-6.7230, 0.4490187168121338 sec
Episode 810, loss:-98.8813, fail, steps:160, total reward:-9.5886, 0.44943928718566895 sec
Episode 811, loss:-35.9874, succeed, steps:135, total reward:-4.5355, 0.38338589668273926 sec
Episode 812, loss:-21.4607, succeed, steps:128, total reward:-0.5557, 0.35878562927246094 sec
Episode 813, loss:-17.4926, succeed, steps:145, total reward:-0.5569, 0.39997315406799316 sec
Episode 814, loss:-101.2573, fail, steps:159, total reward:-15.6616, 0.43994951248168945 sec
Episode 815, loss:-96.4107, fail, steps:160, total reward:-11.0059, 0.4471406936645508 sec
Episode 816, loss:-109.3061, fail, steps:159, total reward:-14.8715, 0.45435237884521484 sec
Episode 817, loss:-114.8321, fail, steps:159, total reward:-13.8158, 0.44594693183898926 sec
Episode 818, loss:-32.3480, succeed, steps:133, total reward:-2.9477, 0.3763265609741211 sec
Episode 819, loss:-23.9916, succeed, steps:120, total reward:-1.5965, 0.33

Episode 899, loss:-89.5495, fail, steps:160, total reward:-9.8673, 0.4295988082885742 sec
Episode 900, loss:-81.5864, fail, steps:160, total reward:-8.1394, 0.4405088424682617 sec
Episode 901, loss:-43.5985, succeed, steps:138, total reward:-4.4019, 0.38610339164733887 sec
Episode 902, loss:-31.7782, succeed, steps:54, total reward:-3.5400, 0.15857315063476562 sec
Episode 903, loss:-90.2009, succeed, steps:143, total reward:-9.7325, 0.3770105838775635 sec
Episode 904, loss:-47.2904, fail, steps:161, total reward:-3.3516, 0.437469482421875 sec
Episode 905, loss:-81.2839, fail, steps:160, total reward:-8.4345, 0.44201016426086426 sec
Episode 906, loss:-132.1744, fail, steps:158, total reward:-15.6723, 0.4913294315338135 sec
Episode 907, loss:-83.0774, fail, steps:160, total reward:-8.2280, 0.44795751571655273 sec
Episode 908, loss:-126.1448, fail, steps:67, total reward:-9.0664, 0.19733166694641113 sec
Episode 909, loss:-102.0417, fail, steps:159, total reward:-11.8059, 0.432534694671630

Episode 989, loss:-73.5651, fail, steps:160, total reward:-10.2288, 0.44603514671325684 sec
Episode 990, loss:-34.5046, succeed, steps:117, total reward:-2.5956, 0.3323323726654053 sec
Episode 991, loss:1.8797, succeed, steps:51, total reward:0.4192, 0.14746403694152832 sec
Episode 992, loss:-6.2959, succeed, steps:112, total reward:0.0849, 0.29805588722229004 sec
Episode 993, loss:-52.4515, succeed, steps:141, total reward:-6.2160, 0.37563037872314453 sec
Episode 994, loss:-60.6669, fail, steps:5, total reward:-9.5157, 0.028262615203857422 sec
Episode 995, loss:-50.6348, fail, steps:161, total reward:-6.3623, 0.40302276611328125 sec
Episode 996, loss:-29.6841, fail, steps:161, total reward:-2.1253, 0.4289388656616211 sec
Episode 997, loss:-57.6428, fail, steps:161, total reward:-5.5680, 0.4379606246948242 sec
Episode 998, loss:-83.8707, fail, steps:160, total reward:-11.5994, 0.4468197822570801 sec
Episode 999, loss:-19.7760, succeed, steps:133, total reward:-1.8656, 0.371668338775634

Episode 1077, loss:-24.8332, succeed, steps:87, total reward:-3.4604, 0.24585652351379395 sec
Episode 1078, loss:-47.4227, succeed, steps:139, total reward:-7.0618, 0.3715808391571045 sec
Episode 1079, loss:-98.1218, fail, steps:159, total reward:-13.6624, 0.43717312812805176 sec
Episode 1080, loss:-71.3020, fail, steps:160, total reward:-9.8681, 0.4420607089996338 sec
Episode 1081, loss:-78.2040, fail, steps:160, total reward:-9.1361, 0.444171667098999 sec
Episode 1082, loss:-76.9430, fail, steps:159, total reward:-10.7960, 0.4996829032897949 sec
Episode 1083, loss:-42.5776, succeed, steps:128, total reward:-6.9378, 0.3662385940551758 sec
Episode 1084, loss:-67.8326, fail, steps:160, total reward:-10.4452, 0.4416317939758301 sec
Episode 1085, loss:-1.4289, succeed, steps:125, total reward:-0.1843, 0.35277652740478516 sec
Episode 1086, loss:-44.4427, succeed, steps:122, total reward:-5.9786, 0.3402400016784668 sec
Episode 1087, loss:-81.8890, fail, steps:159, total reward:-11.3304, 0.4

Episode 1166, loss:-48.6341, fail, steps:161, total reward:-5.7680, 0.44964027404785156 sec
Episode 1167, loss:-36.7299, succeed, steps:152, total reward:-3.9596, 0.4261007308959961 sec
Episode 1168, loss:-28.9371, succeed, steps:143, total reward:-3.4454, 0.39903736114501953 sec
Episode 1169, loss:-80.2592, fail, steps:160, total reward:-9.4738, 0.448077917098999 sec
Episode 1170, loss:-48.5841, succeed, steps:158, total reward:-5.0631, 0.4426758289337158 sec
Episode 1171, loss:-137.0880, fail, steps:158, total reward:-19.1674, 0.44133925437927246 sec
Episode 1172, loss:-130.2316, fail, steps:158, total reward:-16.1051, 0.4415018558502197 sec
Episode 1173, loss:-24.9876, succeed, steps:143, total reward:-2.7012, 0.40073299407958984 sec
Episode 1174, loss:-54.5412, succeed, steps:158, total reward:-7.7131, 0.4432797431945801 sec
Episode 1175, loss:-55.8525, succeed, steps:159, total reward:-7.6509, 0.44448089599609375 sec
Episode 1176, loss:7.9211, succeed, steps:98, total reward:2.725

Episode 1256, loss:-75.9885, fail, steps:160, total reward:-8.7853, 0.4183051586151123 sec
Episode 1257, loss:-98.5183, fail, steps:51, total reward:-7.7766, 0.14768052101135254 sec
Episode 1258, loss:3.4793, succeed, steps:125, total reward:3.2035, 0.3258829116821289 sec
Episode 1259, loss:-56.5472, fail, steps:160, total reward:-7.7853, 0.42823123931884766 sec
Episode 1260, loss:2.0360, succeed, steps:158, total reward:2.8339, 0.4383876323699951 sec
Episode 1261, loss:5.6911, succeed, steps:63, total reward:2.0886, 0.18144941329956055 sec
Episode 1262, loss:-66.1963, fail, steps:161, total reward:-6.8246, 0.4238283634185791 sec
Episode 1263, loss:-71.4642, fail, steps:160, total reward:-9.4345, 0.4368405342102051 sec
Episode 1264, loss:-9.1276, succeed, steps:128, total reward:-0.2671, 0.3577744960784912 sec
Episode 1265, loss:-94.6127, fail, steps:159, total reward:-12.2387, 0.43427491188049316 sec
Episode 1266, loss:-27.0111, succeed, steps:155, total reward:-2.3302, 0.432678222656

Episode 1345, loss:-61.6341, succeed, steps:132, total reward:-7.6076, 0.34994935989379883 sec
Episode 1346, loss:-11.1109, succeed, steps:81, total reward:-0.1397, 0.22671985626220703 sec
Episode 1347, loss:-58.3272, succeed, steps:137, total reward:-6.4347, 0.36530375480651855 sec
Episode 1348, loss:-101.6215, fail, steps:159, total reward:-14.3272, 0.4316427707672119 sec
Episode 1349, loss:-5.3887, succeed, steps:57, total reward:-0.5400, 0.1661524772644043 sec
Episode 1350, loss:2.2982, succeed, steps:125, total reward:2.8986, 0.33394455909729004 sec
Episode 1351, loss:-22.6453, succeed, steps:130, total reward:-1.4943, 0.3535120487213135 sec
Episode 1352, loss:-41.1941, succeed, steps:112, total reward:-3.7586, 0.3081855773925781 sec
Episode 1353, loss:-105.9723, fail, steps:159, total reward:-12.6715, 0.42934513092041016 sec
Episode 1354, loss:-72.1302, fail, steps:160, total reward:-7.7952, 0.4414675235748291 sec
Episode 1355, loss:-60.8080, fail, steps:160, total reward:-6.7131

Episode 1434, loss:-83.2048, fail, steps:159, total reward:-12.3830, 0.44379687309265137 sec
Episode 1435, loss:-51.0052, fail, steps:160, total reward:-7.6410, 0.4528055191040039 sec
Episode 1436, loss:-63.1530, fail, steps:161, total reward:-6.4345, 0.4537959098815918 sec
Episode 1437, loss:-44.1271, fail, steps:160, total reward:-7.0738, 0.44870901107788086 sec
Episode 1438, loss:-26.9350, succeed, steps:99, total reward:-2.8731, 0.28603529930114746 sec
Episode 1439, loss:-15.7867, succeed, steps:142, total reward:-0.9069, 0.3871018886566162 sec
Episode 1440, loss:-81.9275, succeed, steps:142, total reward:-10.7325, 0.4167478084564209 sec
Episode 1441, loss:-41.8519, fail, steps:162, total reward:-2.4238, 0.4477365016937256 sec
Episode 1442, loss:-61.5366, fail, steps:160, total reward:-6.4967, 0.4464874267578125 sec
Episode 1443, loss:-72.6783, fail, steps:159, total reward:-11.2288, 0.44611406326293945 sec
Episode 1444, loss:-9.6602, succeed, steps:134, total reward:1.4959, 0.3780

Episode 1523, loss:5.0460, succeed, steps:96, total reward:1.2304, 0.27434420585632324 sec
Episode 1524, loss:-62.0567, fail, steps:161, total reward:-5.5451, 0.4342520236968994 sec
Episode 1525, loss:-83.8807, fail, steps:160, total reward:-8.1459, 0.4413943290710449 sec
Episode 1526, loss:-13.7831, succeed, steps:153, total reward:0.1232, 0.4257023334503174 sec
Episode 1527, loss:-52.0508, succeed, steps:136, total reward:-6.3855, 0.3819143772125244 sec
Episode 1528, loss:-34.4054, succeed, steps:78, total reward:-3.7168, 0.22449278831481934 sec
Episode 1529, loss:-63.3638, fail, steps:161, total reward:-6.7230, 0.4300355911254883 sec
Episode 1530, loss:-45.2437, fail, steps:161, total reward:-4.0631, 0.44331860542297363 sec
Episode 1531, loss:-77.9756, fail, steps:160, total reward:-10.0845, 0.44282102584838867 sec
Episode 1532, loss:1.9868, succeed, steps:103, total reward:1.4876, 0.2958033084869385 sec
Episode 1533, loss:-54.8037, fail, steps:161, total reward:-5.6410, 0.441926956

Episode 1612, loss:-47.3892, fail, steps:161, total reward:-5.8574, 0.4492623805999756 sec
Episode 1613, loss:-75.1078, fail, steps:160, total reward:-8.2181, 0.4500391483306885 sec
Episode 1614, loss:-62.6188, fail, steps:160, total reward:-7.8673, 0.449113130569458 sec
Episode 1615, loss:-102.8781, fail, steps:160, total reward:-12.6158, 0.4510176181793213 sec
Episode 1616, loss:-13.8789, succeed, steps:94, total reward:-0.1204, 0.2758767604827881 sec
Episode 1617, loss:-58.6368, fail, steps:161, total reward:-5.7123, 0.4345273971557617 sec
Episode 1618, loss:-51.6034, succeed, steps:123, total reward:-6.2770, 0.40222644805908203 sec
Episode 1619, loss:-11.7336, succeed, steps:127, total reward:-0.9065, 0.3514442443847656 sec
Episode 1620, loss:-64.6052, succeed, steps:149, total reward:-9.1769, 0.40941429138183594 sec
Episode 1621, loss:-79.1711, fail, steps:160, total reward:-9.1459, 0.44369029998779297 sec
Episode 1622, loss:-72.5981, fail, steps:160, total reward:-10.3731, 0.4466

Episode 1701, loss:-93.5405, fail, steps:159, total reward:-13.7346, 0.44552135467529297 sec
Episode 1702, loss:-64.6957, fail, steps:159, total reward:-11.7140, 0.44347596168518066 sec
Episode 1703, loss:-91.0678, fail, steps:158, total reward:-15.3838, 0.44721293449401855 sec
Episode 1704, loss:-49.5097, fail, steps:160, total reward:-7.0639, 0.44918274879455566 sec
Episode 1705, loss:-33.9586, fail, steps:161, total reward:-3.4139, 0.45252466201782227 sec
Episode 1706, loss:-21.9224, fail, steps:161, total reward:-1.6826, 0.4580516815185547 sec
Episode 1707, loss:-95.2345, fail, steps:159, total reward:-11.5173, 0.448378324508667 sec
Episode 1708, loss:8.4331, succeed, steps:73, total reward:2.4794, 0.21625185012817383 sec
Episode 1709, loss:-38.2360, fail, steps:161, total reward:-5.7131, 0.48078489303588867 sec
Episode 1710, loss:-96.7882, fail, steps:159, total reward:-14.3117, 0.43920111656188965 sec
Episode 1711, loss:-54.4976, fail, steps:160, total reward:-8.2902, 0.447231054

Episode 1790, loss:-82.6014, fail, steps:158, total reward:-13.8690, 0.44860267639160156 sec
Episode 1791, loss:-56.3344, fail, steps:160, total reward:-9.2902, 0.4487926959991455 sec
Episode 1792, loss:-37.6835, succeed, steps:154, total reward:-4.3933, 0.43332886695861816 sec
Episode 1793, loss:-68.7075, fail, steps:160, total reward:-8.0639, 0.44892120361328125 sec
Episode 1794, loss:-16.3404, succeed, steps:121, total reward:-1.3900, 0.34474992752075195 sec
Episode 1795, loss:-59.2185, fail, steps:160, total reward:-8.2181, 0.44498729705810547 sec
Episode 1796, loss:-86.5063, fail, steps:159, total reward:-13.1567, 0.44462060928344727 sec
Episode 1797, loss:-60.3218, fail, steps:160, total reward:-7.5918, 0.4487636089324951 sec
Episode 1798, loss:-59.8569, fail, steps:160, total reward:-7.6410, 0.4489295482635498 sec
Episode 1799, loss:-39.0807, fail, steps:161, total reward:-5.6410, 0.45696282386779785 sec
Episode 1800, loss:-31.9961, fail, steps:161, total reward:-2.8467, 0.45114

Episode 1879, loss:-88.2480, fail, steps:160, total reward:-10.0845, 0.4521510601043701 sec
Episode 1880, loss:-87.2466, fail, steps:161, total reward:-9.6410, 0.44974184036254883 sec
Episode 1881, loss:-45.2026, fail, steps:161, total reward:-5.9295, 0.5043191909790039 sec
Episode 1882, loss:-40.8284, fail, steps:161, total reward:-6.6803, 0.449721097946167 sec
Episode 1883, loss:-20.4126, succeed, steps:113, total reward:-2.4308, 0.3224310874938965 sec
Episode 1884, loss:-13.4506, succeed, steps:108, total reward:-0.3994, 0.3001229763031006 sec
Episode 1885, loss:-65.5187, fail, steps:160, total reward:-8.1361, 0.43114256858825684 sec
Episode 1886, loss:-29.4865, succeed, steps:105, total reward:-3.2060, 0.29785943031311035 sec
Episode 1887, loss:-43.1784, fail, steps:160, total reward:-7.4147, 0.4299764633178711 sec
Episode 1888, loss:-24.8149, fail, steps:161, total reward:-2.7745, 0.44333505630493164 sec
Episode 1889, loss:-12.2403, succeed, steps:78, total reward:-2.6184, 0.22965

Episode 1968, loss:-38.6708, succeed, steps:112, total reward:-5.6480, 0.31901979446411133 sec
Episode 1969, loss:-14.8702, succeed, steps:150, total reward:0.4946, 0.4096682071685791 sec
Episode 1970, loss:-75.4034, succeed, steps:133, total reward:-10.2675, 0.3710312843322754 sec
Episode 1971, loss:-62.0049, fail, steps:160, total reward:-9.2181, 0.43639087677001953 sec
Episode 1972, loss:-88.3872, fail, steps:159, total reward:-11.3731, 0.44022536277770996 sec
Episode 1973, loss:-34.4680, succeed, steps:110, total reward:-4.9094, 0.3128068447113037 sec
Episode 1974, loss:-66.8987, succeed, steps:147, total reward:-8.8875, 0.39892578125 sec
Episode 1975, loss:-78.9402, fail, steps:159, total reward:-12.2288, 0.4361534118652344 sec
Episode 1976, loss:-36.3938, fail, steps:162, total reward:-5.2326, 0.4546010494232178 sec
Episode 1977, loss:-16.4584, succeed, steps:82, total reward:-2.2226, 0.238969087600708 sec
Episode 1978, loss:-81.8166, fail, steps:160, total reward:-11.5272, 0.427

Episode 2055, loss:-28.9890, succeed, steps:127, total reward:-2.4213, 0.3419649600982666 sec
Episode 2056, loss:-73.1825, fail, steps:160, total reward:-10.5886, 0.43406081199645996 sec
Episode 2057, loss:-61.5140, fail, steps:160, total reward:-7.4967, 0.447277307510376 sec
Episode 2058, loss:-97.4076, fail, steps:159, total reward:-13.3207, 0.4439980983734131 sec
Episode 2059, loss:-69.0835, succeed, steps:127, total reward:-9.5247, 0.35874295234680176 sec
Episode 2060, loss:-58.6159, succeed, steps:137, total reward:-8.9897, 0.37825632095336914 sec
Episode 2061, loss:-52.3570, fail, steps:161, total reward:-4.6402, 0.49325013160705566 sec
Episode 2062, loss:-54.4437, succeed, steps:147, total reward:-5.7325, 0.41452932357788086 sec
Episode 2063, loss:-20.9470, fail, steps:162, total reward:-1.0532, 0.44896602630615234 sec
Episode 2064, loss:11.9947, succeed, steps:84, total reward:2.9324, 0.24244379997253418 sec
Episode 2065, loss:-51.7618, fail, steps:160, total reward:-6.0639, 0.

Episode 2144, loss:-47.2115, fail, steps:161, total reward:-4.8475, 0.4332268238067627 sec
Episode 2145, loss:-70.2680, fail, steps:160, total reward:-8.7853, 0.4408235549926758 sec
Episode 2146, loss:-23.6008, succeed, steps:87, total reward:-4.6146, 0.2472245693206787 sec
Episode 2147, loss:-55.2186, succeed, steps:149, total reward:-7.7760, 0.39853405952453613 sec
Episode 2148, loss:-15.7275, succeed, steps:107, total reward:-1.1830, 0.31673359870910645 sec
Episode 2149, loss:-72.7154, fail, steps:159, total reward:-11.0124, 0.429685115814209 sec
Episode 2150, loss:-64.2507, fail, steps:160, total reward:-9.2181, 0.4423997402191162 sec
Episode 2151, loss:-97.0229, fail, steps:159, total reward:-14.9830, 0.44455695152282715 sec
Episode 2152, loss:12.3832, succeed, steps:142, total reward:3.5466, 0.3992118835449219 sec
Episode 2153, loss:-43.8034, fail, steps:161, total reward:-5.8631, 0.45159363746643066 sec
Episode 2154, loss:-57.2795, fail, steps:160, total reward:-7.7952, 0.449802

Episode 2233, loss:-34.2230, fail, steps:161, total reward:-4.1975, 0.441495418548584 sec
Episode 2234, loss:-6.2316, succeed, steps:94, total reward:-0.4089, 0.26993560791015625 sec
Episode 2235, loss:-24.2709, fail, steps:161, total reward:-2.4860, 0.43332386016845703 sec
Episode 2236, loss:-87.1548, fail, steps:159, total reward:-11.3731, 0.43946266174316406 sec
Episode 2237, loss:4.1266, succeed, steps:11, total reward:1.5251, 0.04745173454284668 sec
Episode 2238, loss:-72.2148, fail, steps:160, total reward:-9.7066, 0.40531420707702637 sec
Episode 2239, loss:-73.1507, fail, steps:160, total reward:-8.4246, 0.42731165885925293 sec
Episode 2240, loss:-75.0369, fail, steps:159, total reward:-10.2910, 0.4399893283843994 sec
Episode 2241, loss:-71.5542, fail, steps:160, total reward:-8.7952, 0.4400815963745117 sec
Episode 2242, loss:-79.0080, fail, steps:159, total reward:-11.3009, 0.44008421897888184 sec
Episode 2243, loss:-80.1792, fail, steps:159, total reward:-13.2189, 0.4421076774

Episode 2322, loss:-33.5190, succeed, steps:106, total reward:-3.9765, 0.2986176013946533 sec
Episode 2323, loss:-54.2297, succeed, steps:113, total reward:-6.8514, 0.3107478618621826 sec
Episode 2324, loss:-109.8459, fail, steps:159, total reward:-13.4780, 0.4289391040802002 sec
Episode 2325, loss:-63.2524, fail, steps:160, total reward:-7.8246, 0.4465494155883789 sec
Episode 2326, loss:-85.4601, fail, steps:160, total reward:-10.3001, 0.4440038204193115 sec
Episode 2327, loss:-8.2333, succeed, steps:101, total reward:-1.0895, 0.28934407234191895 sec
Episode 2328, loss:-89.4997, fail, steps:160, total reward:-9.5788, 0.4320552349090576 sec
Episode 2329, loss:-35.8467, succeed, steps:101, total reward:-4.1002, 0.28772473335266113 sec
Episode 2330, loss:-66.4897, fail, steps:160, total reward:-7.2902, 0.4384298324584961 sec
Episode 2331, loss:-76.5811, fail, steps:160, total reward:-8.1558, 0.4451470375061035 sec
Episode 2332, loss:-33.6256, succeed, steps:94, total reward:-4.7967, 0.26

Episode 2411, loss:-33.0602, fail, steps:162, total reward:-2.7123, 0.43975186347961426 sec
Episode 2412, loss:-71.1948, fail, steps:160, total reward:-10.1558, 0.44643712043762207 sec
Episode 2413, loss:-45.4271, succeed, steps:151, total reward:-6.0416, 0.4202902317047119 sec
Episode 2414, loss:-38.0056, succeed, steps:156, total reward:-4.1966, 0.4337494373321533 sec
Episode 2415, loss:-32.5701, succeed, steps:76, total reward:-4.7791, 0.22037911415100098 sec
Episode 2416, loss:-47.3767, succeed, steps:137, total reward:-6.5626, 0.36644649505615234 sec
Episode 2417, loss:-17.9566, succeed, steps:111, total reward:-0.6266, 0.3080153465270996 sec
Episode 2418, loss:-12.7023, succeed, steps:146, total reward:0.0824, 0.45176005363464355 sec
Episode 2419, loss:-9.0429, succeed, steps:71, total reward:-0.3862, 0.2034897804260254 sec
Episode 2420, loss:-73.8639, fail, steps:160, total reward:-10.0124, 0.42488908767700195 sec
Episode 2421, loss:-38.7383, succeed, steps:109, total reward:-6.

Episode 2500, loss:-55.5341, fail, steps:161, total reward:-5.9295, 0.4509432315826416 sec
Episode 2501, loss:-45.6365, succeed, steps:134, total reward:-7.8970, 0.3784956932067871 sec
Episode 2502, loss:-97.3840, fail, steps:158, total reward:-15.3117, 0.43802928924560547 sec
Episode 2503, loss:-12.3353, succeed, steps:52, total reward:-1.5915, 0.1563429832458496 sec
Episode 2504, loss:5.1456, succeed, steps:109, total reward:0.6006, 0.29186201095581055 sec
Episode 2505, loss:-49.6493, fail, steps:161, total reward:-6.7230, 0.4270942211151123 sec
Episode 2506, loss:-49.8355, fail, steps:161, total reward:-6.5788, 0.4406290054321289 sec
Episode 2507, loss:-21.2093, succeed, steps:155, total reward:-1.5367, 0.4300661087036133 sec
Episode 2508, loss:-41.0292, fail, steps:161, total reward:-5.4967, 0.4465334415435791 sec
Episode 2509, loss:-38.7132, succeed, steps:146, total reward:-3.8661, 0.41184163093566895 sec
Episode 2510, loss:-89.6233, fail, steps:160, total reward:-11.4116, 0.4441

Episode 2589, loss:-35.8169, succeed, steps:114, total reward:-5.3071, 0.313845157623291 sec
Episode 2590, loss:-39.2227, succeed, steps:146, total reward:-4.7317, 0.39279699325561523 sec
Episode 2591, loss:-27.8966, succeed, steps:151, total reward:-1.9718, 0.4124634265899658 sec
Episode 2592, loss:-75.9771, fail, steps:160, total reward:-8.9394, 0.4409980773925781 sec
Episode 2593, loss:-27.5352, succeed, steps:123, total reward:-0.5441, 0.3454873561859131 sec
Episode 2594, loss:-60.7109, fail, steps:161, total reward:-6.5066, 0.44421815872192383 sec
Episode 2595, loss:-67.7220, fail, steps:161, total reward:-7.0837, 0.4463202953338623 sec
Episode 2596, loss:-19.9326, succeed, steps:149, total reward:-1.2989, 0.416872501373291 sec
Episode 2597, loss:-9.2209, succeed, steps:146, total reward:0.5874, 0.4072721004486084 sec
Episode 2598, loss:19.6942, succeed, steps:117, total reward:4.7964, 0.32970333099365234 sec
Episode 2599, loss:-97.9560, fail, steps:159, total reward:-14.3117, 0.4

Episode 2678, loss:-55.1424, fail, steps:160, total reward:-9.2902, 0.43656110763549805 sec
Episode 2679, loss:-6.9706, fail, steps:1, total reward:-10.0000, 0.019469022750854492 sec
Episode 2680, loss:-18.5393, fail, steps:1, total reward:-10.0000, 0.004128932952880859 sec
Episode 2681, loss:-49.2505, succeed, steps:112, total reward:-10.1736, 0.2759685516357422 sec
Episode 2682, loss:-69.4255, fail, steps:159, total reward:-13.0223, 0.4132845401763916 sec
Episode 2683, loss:-20.8606, succeed, steps:127, total reward:-2.2606, 0.3561666011810303 sec
Episode 2684, loss:-33.5697, fail, steps:161, total reward:-3.1253, 0.4328339099884033 sec
Episode 2685, loss:-33.5619, succeed, steps:122, total reward:-6.6278, 0.34151268005371094 sec
Episode 2686, loss:-89.7101, fail, steps:159, total reward:-13.5272, 0.4317781925201416 sec
Episode 2687, loss:-33.3542, fail, steps:161, total reward:-5.5689, 0.4465610980987549 sec
Episode 2688, loss:-62.0892, fail, steps:160, total reward:-8.3623, 0.45065

Episode 2767, loss:-35.5711, fail, steps:162, total reward:-3.0008, 0.4478030204772949 sec
Episode 2768, loss:-63.9627, succeed, steps:155, total reward:-8.9287, 0.4337637424468994 sec
Episode 2769, loss:-71.8042, fail, steps:161, total reward:-9.0116, 0.44974637031555176 sec
Episode 2770, loss:-42.0584, succeed, steps:125, total reward:-4.5656, 0.3579251766204834 sec
Episode 2771, loss:-106.5044, fail, steps:158, total reward:-16.2395, 0.43535566329956055 sec
Episode 2772, loss:-38.2741, fail, steps:161, total reward:-3.6303, 0.44946718215942383 sec
Episode 2773, loss:-17.8494, succeed, steps:76, total reward:-2.2241, 0.2218916416168213 sec
Episode 2774, loss:-16.5048, succeed, steps:94, total reward:-1.0581, 0.253756046295166 sec
Episode 2775, loss:-25.9321, fail, steps:162, total reward:-2.7844, 0.4292325973510742 sec
Episode 2776, loss:-16.1387, succeed, steps:72, total reward:-2.6134, 0.20869708061218262 sec
Episode 2777, loss:-25.9787, succeed, steps:98, total reward:-3.0074, 0.2

Episode 2856, loss:-38.4642, fail, steps:161, total reward:-6.2181, 0.4288032054901123 sec
Episode 2857, loss:-58.8741, succeed, steps:145, total reward:-7.9054, 0.3996608257293701 sec
Episode 2858, loss:-49.9226, fail, steps:161, total reward:-5.5680, 0.4438941478729248 sec
Episode 2859, loss:-6.7085, succeed, steps:115, total reward:-0.4308, 0.33089590072631836 sec
Episode 2860, loss:-48.8774, succeed, steps:133, total reward:-6.3912, 0.36466503143310547 sec
Episode 2861, loss:-29.8164, succeed, steps:144, total reward:-3.2881, 0.3942124843597412 sec
Episode 2862, loss:-110.3861, fail, steps:160, total reward:-12.8978, 0.4400606155395508 sec
Episode 2863, loss:-45.6790, fail, steps:160, total reward:-7.1459, 0.4992983341217041 sec
Episode 2864, loss:-10.7010, succeed, steps:115, total reward:-0.3586, 0.3306715488433838 sec
Episode 2865, loss:-77.6207, fail, steps:159, total reward:-10.7960, 0.4368743896484375 sec
Episode 2866, loss:-67.3352, fail, steps:160, total reward:-7.7230, 0.4

Episode 2945, loss:-11.6821, succeed, steps:85, total reward:-0.1604, 0.23684239387512207 sec
Episode 2946, loss:-58.2499, succeed, steps:151, total reward:-8.0425, 0.40751218795776367 sec
Episode 2947, loss:-3.3243, succeed, steps:83, total reward:1.0668, 0.2348160743713379 sec
Episode 2948, loss:-1.8375, succeed, steps:87, total reward:2.4889, 0.23525667190551758 sec
Episode 2949, loss:-54.4445, fail, steps:160, total reward:-7.2902, 0.4248192310333252 sec
Episode 2950, loss:-27.3541, succeed, steps:158, total reward:-1.9081, 0.43285202980041504 sec
Episode 2951, loss:-68.2078, fail, steps:160, total reward:-8.2280, 0.44524264335632324 sec
Episode 2952, loss:-31.3896, succeed, steps:152, total reward:-1.8538, 0.42569756507873535 sec
Episode 2953, loss:-76.8521, succeed, steps:138, total reward:-9.1439, 0.3888401985168457 sec
Episode 2954, loss:-15.2655, succeed, steps:121, total reward:-1.2457, 0.338930606842041 sec
Episode 2955, loss:-2.6929, succeed, steps:100, total reward:0.9934,

Episode 3032, loss:-48.5302, succeed, steps:134, total reward:-5.8961, 0.35973525047302246 sec
Episode 3033, loss:-74.2387, fail, steps:160, total reward:-10.6616, 0.43536949157714844 sec
Episode 3034, loss:-47.3918, fail, steps:160, total reward:-7.6509, 0.4430873394012451 sec
Episode 3035, loss:-52.1294, fail, steps:160, total reward:-9.0936, 0.44396543502807617 sec
Episode 3036, loss:-59.1907, fail, steps:160, total reward:-8.3623, 0.45172929763793945 sec
Episode 3037, loss:-71.4638, fail, steps:159, total reward:-11.7337, 0.4457669258117676 sec
Episode 3038, loss:-30.6182, succeed, steps:114, total reward:-4.7300, 0.3776969909667969 sec
Episode 3039, loss:-84.4054, fail, steps:160, total reward:-12.6814, 0.43766164779663086 sec
Episode 3040, loss:-26.1231, succeed, steps:116, total reward:-3.0186, 0.3263363838195801 sec
Episode 3041, loss:-56.8230, succeed, steps:139, total reward:-6.7733, 0.3850126266479492 sec
Episode 3042, loss:-65.0397, fail, steps:160, total reward:-7.7853, 0.

Episode 3121, loss:-67.1220, succeed, steps:136, total reward:-8.1332, 0.374218225479126 sec
Episode 3122, loss:-66.7170, succeed, steps:144, total reward:-9.1653, 0.3950998783111572 sec
Episode 3123, loss:-85.2043, fail, steps:159, total reward:-12.8780, 0.44344449043273926 sec
Episode 3124, loss:-75.4865, fail, steps:160, total reward:-10.3001, 0.4457998275756836 sec
Episode 3125, loss:-50.7499, succeed, steps:109, total reward:-7.2242, 0.30809831619262695 sec
Episode 3126, loss:-81.7004, fail, steps:159, total reward:-12.5272, 0.4350159168243408 sec
Episode 3127, loss:-43.6229, fail, steps:161, total reward:-6.4345, 0.4451479911804199 sec
Episode 3128, loss:-64.1439, fail, steps:160, total reward:-9.0116, 0.4492001533508301 sec
Episode 3129, loss:-52.4413, succeed, steps:152, total reward:-4.3924, 0.42605042457580566 sec
Episode 3130, loss:-65.4040, fail, steps:161, total reward:-7.3722, 0.448760986328125 sec
Episode 3131, loss:6.1210, succeed, steps:97, total reward:2.6632, 0.27709

Episode 3210, loss:-16.1163, succeed, steps:119, total reward:-2.9801, 0.33823156356811523 sec
Episode 3211, loss:-80.3510, fail, steps:159, total reward:-12.6223, 0.4350159168243408 sec
Episode 3212, loss:-61.1497, fail, steps:160, total reward:-8.4345, 0.44467854499816895 sec
Episode 3213, loss:18.8730, succeed, steps:65, total reward:4.8821, 0.19055557250976562 sec
Episode 3214, loss:-58.1349, fail, steps:160, total reward:-7.8673, 0.42701292037963867 sec
Episode 3215, loss:-45.3695, fail, steps:162, total reward:-2.8566, 0.44248318672180176 sec
Episode 3216, loss:-48.6821, fail, steps:160, total reward:-6.7131, 0.4434540271759033 sec
Episode 3217, loss:-35.3893, succeed, steps:132, total reward:-0.5041, 0.3694727420806885 sec
Episode 3218, loss:3.7277, succeed, steps:145, total reward:3.2473, 0.40047335624694824 sec
Episode 3219, loss:-59.0489, fail, steps:161, total reward:-6.0967, 0.5081963539123535 sec
Episode 3220, loss:-5.0434, succeed, steps:61, total reward:0.5214, 0.1802878

Episode 3299, loss:-48.9030, fail, steps:161, total reward:-5.2074, 0.4233403205871582 sec
Episode 3300, loss:-9.8780, succeed, steps:90, total reward:-1.9589, 0.25309133529663086 sec
Episode 3301, loss:-58.1939, succeed, steps:155, total reward:-7.1975, 0.41534972190856934 sec
Episode 3302, loss:-72.4358, fail, steps:160, total reward:-8.8509, 0.4417412281036377 sec
Episode 3303, loss:-76.4377, fail, steps:159, total reward:-12.6616, 0.4415853023529053 sec
Episode 3304, loss:-33.6505, succeed, steps:147, total reward:-5.9489, 0.40938854217529297 sec
Episode 3305, loss:-21.8884, fail, steps:163, total reward:-0.0524, 0.5118408203125 sec
Episode 3306, loss:-54.4108, fail, steps:160, total reward:-7.7230, 0.4524717330932617 sec
Episode 3307, loss:-11.9206, succeed, steps:76, total reward:-1.9848, 0.22146105766296387 sec
Episode 3308, loss:-99.9628, fail, steps:159, total reward:-12.8780, 0.4244658946990967 sec
Episode 3309, loss:-32.7874, fail, steps:161, total reward:-4.7745, 0.44196105

Episode 3388, loss:-127.3602, fail, steps:93, total reward:-10.8310, 0.24068188667297363 sec
Episode 3389, loss:-83.9178, fail, steps:159, total reward:-13.2780, 0.41745638847351074 sec
Episode 3390, loss:-49.7070, succeed, steps:154, total reward:-6.3409, 0.42055320739746094 sec
Episode 3391, loss:-71.5461, fail, steps:160, total reward:-9.1459, 0.4405651092529297 sec
Episode 3392, loss:-29.1586, succeed, steps:88, total reward:-3.1818, 0.254833459854126 sec
Episode 3393, loss:-55.3625, fail, steps:160, total reward:-8.9295, 0.4280264377593994 sec
Episode 3394, loss:-20.6303, succeed, steps:160, total reward:0.3083, 0.4383265972137451 sec
Episode 3395, loss:-21.3225, fail, steps:162, total reward:-1.9188, 0.44928884506225586 sec
Episode 3396, loss:-62.6376, fail, steps:160, total reward:-9.8509, 0.4454002380371094 sec
Episode 3397, loss:-21.8333, succeed, steps:159, total reward:-0.3310, 0.45070719718933105 sec
Episode 3398, loss:-52.0901, fail, steps:161, total reward:-6.9394, 0.4508

Episode 3477, loss:-68.2020, fail, steps:160, total reward:-9.7952, 0.43077850341796875 sec
Episode 3478, loss:-19.8500, succeed, steps:145, total reward:-0.7569, 0.40167689323425293 sec
Episode 3479, loss:-38.4143, fail, steps:161, total reward:-5.0631, 0.44814109802246094 sec
Episode 3480, loss:-61.3328, fail, steps:160, total reward:-8.4967, 0.501276969909668 sec
Episode 3481, loss:-13.7611, succeed, steps:143, total reward:-0.9167, 0.40023016929626465 sec
Episode 3482, loss:-85.9025, fail, steps:160, total reward:-11.0944, 0.4440314769744873 sec
Episode 3483, loss:-24.5480, succeed, steps:139, total reward:-3.0412, 0.3902146816253662 sec
Episode 3484, loss:-35.7199, fail, steps:161, total reward:-3.7745, 0.4485013484954834 sec
Episode 3485, loss:-40.5493, fail, steps:161, total reward:-4.9188, 0.4482712745666504 sec
Episode 3486, loss:-34.6486, fail, steps:161, total reward:-4.4139, 0.44936084747314453 sec
Episode 3487, loss:-32.8766, fail, steps:161, total reward:-2.2696, 0.450244

Episode 3566, loss:-34.2667, fail, steps:161, total reward:-3.4139, 0.43769145011901855 sec
Episode 3567, loss:-61.0641, fail, steps:161, total reward:-7.6608, 0.4477419853210449 sec
Episode 3568, loss:-54.1511, fail, steps:161, total reward:-6.5066, 0.4477047920227051 sec
Episode 3569, loss:-77.7225, fail, steps:160, total reward:-9.8673, 0.4535562992095947 sec
Episode 3570, loss:-23.3544, succeed, steps:118, total reward:-1.0186, 0.3329432010650635 sec
Episode 3571, loss:-42.2496, fail, steps:161, total reward:-3.5680, 0.44213271141052246 sec
Episode 3572, loss:-65.0344, fail, steps:160, total reward:-8.5788, 0.44782233238220215 sec
Episode 3573, loss:-47.1766, fail, steps:161, total reward:-4.3615, 0.5046796798706055 sec
Episode 3574, loss:-51.5571, fail, steps:160, total reward:-7.0738, 0.447751522064209 sec
Episode 3575, loss:-21.1909, succeed, steps:121, total reward:-1.3178, 0.3434183597564697 sec
Episode 3576, loss:-43.3536, fail, steps:161, total reward:-3.5581, 0.443011999130

Episode 3655, loss:-38.4788, fail, steps:158, total reward:-14.5083, 0.4429795742034912 sec
Episode 3656, loss:-45.4085, fail, steps:158, total reward:-14.7247, 0.44124794006347656 sec
Episode 3657, loss:-20.1856, fail, steps:159, total reward:-8.9827, 0.44491052627563477 sec
Episode 3658, loss:-39.2466, fail, steps:158, total reward:-13.0033, 0.44610095024108887 sec
Episode 3659, loss:-40.9773, fail, steps:159, total reward:-11.2812, 0.44391393661499023 sec
Episode 3660, loss:-31.3327, fail, steps:160, total reward:-7.6311, 0.4485588073730469 sec
Episode 3661, loss:-53.6763, fail, steps:159, total reward:-12.6526, 0.4453723430633545 sec
Episode 3662, loss:-60.4425, fail, steps:158, total reward:-13.9312, 0.4451591968536377 sec
Episode 3663, loss:-41.5688, fail, steps:158, total reward:-13.7869, 0.44261837005615234 sec
Episode 3664, loss:-45.4362, fail, steps:160, total reward:-7.4246, 0.4495224952697754 sec
Episode 3665, loss:-36.7494, fail, steps:160, total reward:-6.2082, 0.44905042

Episode 3744, loss:-46.7758, fail, steps:161, total reward:-7.2280, 0.45278215408325195 sec
Episode 3745, loss:-18.8217, fail, steps:162, total reward:-1.5581, 0.45178723335266113 sec
Episode 3746, loss:-7.6560, succeed, steps:102, total reward:0.0548, 0.2934131622314453 sec
Episode 3747, loss:-11.7030, succeed, steps:151, total reward:-0.8669, 0.4081885814666748 sec
Episode 3748, loss:-42.5324, fail, steps:161, total reward:-5.5680, 0.4435744285583496 sec
Episode 3749, loss:10.9267, succeed, steps:101, total reward:2.5705, 0.28868889808654785 sec
Episode 3750, loss:-14.6164, succeed, steps:125, total reward:-0.6171, 0.3442225456237793 sec
Episode 3751, loss:9.5711, succeed, steps:82, total reward:4.5202, 0.2296910285949707 sec
Episode 3752, loss:-55.6132, succeed, steps:158, total reward:-7.2082, 0.4188518524169922 sec
Episode 3753, loss:-19.1100, succeed, steps:109, total reward:-2.3380, 0.3055562973022461 sec
Episode 3754, loss:-15.6829, succeed, steps:106, total reward:-3.2388, 0.2

Episode 3833, loss:-50.8331, succeed, steps:137, total reward:-5.9069, 0.38062596321105957 sec
Episode 3834, loss:-71.4679, fail, steps:160, total reward:-10.6608, 0.43938326835632324 sec
Episode 3835, loss:-70.7189, fail, steps:160, total reward:-10.5623, 0.44949960708618164 sec
Episode 3836, loss:-106.4752, fail, steps:159, total reward:-11.8059, 0.442990779876709 sec
Episode 3837, loss:-23.0856, succeed, steps:87, total reward:-4.6146, 0.2497708797454834 sec
Episode 3838, loss:-56.7607, fail, steps:160, total reward:-7.4345, 0.42876124382019043 sec
Episode 3839, loss:-30.2645, fail, steps:161, total reward:-3.6303, 0.44365620613098145 sec
Episode 3840, loss:-89.7067, succeed, steps:157, total reward:-11.1657, 0.44002342224121094 sec
Episode 3841, loss:-54.3302, fail, steps:161, total reward:-6.1123, 0.4477694034576416 sec
Episode 3842, loss:-53.1997, fail, steps:160, total reward:-8.1459, 0.4464881420135498 sec
Episode 3843, loss:-27.3032, succeed, steps:126, total reward:-2.8442, 0

Episode 3922, loss:-40.1856, fail, steps:161, total reward:-5.8574, 0.4419546127319336 sec
Episode 3923, loss:-41.4444, fail, steps:161, total reward:-5.2467, 0.4465365409851074 sec
Episode 3924, loss:-40.5775, fail, steps:161, total reward:-4.7123, 0.4466838836669922 sec
Episode 3925, loss:-22.1364, succeed, steps:141, total reward:-1.0970, 0.4014925956726074 sec
Episode 3926, loss:-18.2592, succeed, steps:151, total reward:-1.5161, 0.4172229766845703 sec
Episode 3927, loss:-57.9721, fail, steps:161, total reward:-7.0902, 0.4469630718231201 sec
Episode 3928, loss:-52.4950, succeed, steps:150, total reward:-6.3203, 0.4708273410797119 sec
Episode 3929, loss:-60.6207, fail, steps:160, total reward:-9.1468, 0.4443502426147461 sec
Episode 3930, loss:-91.4310, fail, steps:160, total reward:-9.6738, 0.45151805877685547 sec
Episode 3931, loss:-20.7776, succeed, steps:141, total reward:-2.3397, 0.3945181369781494 sec
Episode 3932, loss:-36.6508, succeed, steps:149, total reward:-4.6276, 0.4134

Episode 4009, loss:-54.4613, succeed, steps:159, total reward:-9.0017, 0.42937350273132324 sec
Episode 4010, loss:-52.8960, fail, steps:160, total reward:-10.3525, 0.43976569175720215 sec
Episode 4011, loss:-100.7689, fail, steps:159, total reward:-15.0330, 0.44116973876953125 sec
Episode 4012, loss:-11.5013, succeed, steps:132, total reward:-0.5763, 0.36963844299316406 sec
Episode 4013, loss:-16.5722, succeed, steps:155, total reward:-0.8383, 0.4870572090148926 sec
Episode 4014, loss:-23.5909, fail, steps:162, total reward:-0.6917, 0.44935083389282227 sec
Episode 4015, loss:-56.6089, fail, steps:160, total reward:-7.3623, 0.44405388832092285 sec
Episode 4016, loss:-33.0561, succeed, steps:151, total reward:-6.1661, 0.42238831520080566 sec
Episode 4017, loss:-62.2324, fail, steps:160, total reward:-11.0017, 0.4493904113769531 sec
Episode 4018, loss:-32.9070, succeed, steps:138, total reward:-4.6183, 0.388599157333374 sec
Episode 4019, loss:-15.7054, succeed, steps:110, total reward:-0.

Episode 4098, loss:-37.9876, fail, steps:161, total reward:-5.6410, 0.4429759979248047 sec
Episode 4099, loss:-57.9125, fail, steps:160, total reward:-8.0017, 0.4430866241455078 sec
Episode 4100, loss:-93.0996, fail, steps:160, total reward:-10.1558, 0.4475696086883545 sec
Episode 4101, loss:-36.6087, fail, steps:161, total reward:-3.4860, 0.45343971252441406 sec
Episode 4102, loss:-28.3082, succeed, steps:147, total reward:-1.5676, 0.4176957607269287 sec
Episode 4103, loss:-82.9462, fail, steps:159, total reward:-12.7108, 0.4496581554412842 sec
Episode 4104, loss:-65.1360, fail, steps:160, total reward:-7.9066, 0.4490509033203125 sec
Episode 4105, loss:-68.7523, fail, steps:160, total reward:-8.7230, 0.4503636360168457 sec
Episode 4106, loss:-28.5735, fail, steps:161, total reward:-2.1975, 0.4523756504058838 sec
Episode 4107, loss:-85.5825, fail, steps:160, total reward:-12.1043, 0.4522850513458252 sec
Episode 4108, loss:-47.4077, fail, steps:161, total reward:-4.9910, 0.4516267776489

Episode 4187, loss:-7.7305, succeed, steps:96, total reward:-1.4918, 0.2548832893371582 sec
Episode 4188, loss:-39.3052, fail, steps:161, total reward:-5.7131, 0.42553234100341797 sec
Episode 4189, loss:-27.2406, succeed, steps:118, total reward:-1.2350, 0.3337104320526123 sec
Episode 4190, loss:-87.8192, fail, steps:159, total reward:-12.1567, 0.4865119457244873 sec
Episode 4191, loss:-80.1993, fail, steps:159, total reward:-11.5173, 0.44458580017089844 sec
Episode 4192, loss:-39.6140, succeed, steps:154, total reward:-2.0318, 0.42731642723083496 sec
Episode 4193, loss:15.9575, succeed, steps:79, total reward:4.5309, 0.22812294960021973 sec
Episode 4194, loss:0.5353, succeed, steps:43, total reward:-0.3331, 0.11960673332214355 sec
Episode 4195, loss:-49.8945, fail, steps:160, total reward:-8.5066, 0.41145753860473633 sec
Episode 4196, loss:-7.4664, succeed, steps:149, total reward:0.0717, 0.4038386344909668 sec
Episode 4197, loss:-56.8012, fail, steps:160, total reward:-8.9295, 0.4422

Episode 4276, loss:-15.6344, succeed, steps:136, total reward:0.9901, 0.37764716148376465 sec
Episode 4277, loss:2.3891, succeed, steps:101, total reward:2.6590, 0.2827925682067871 sec
Episode 4278, loss:-13.4964, succeed, steps:121, total reward:-0.9571, 0.3303530216217041 sec
Episode 4279, loss:-33.8648, fail, steps:161, total reward:-2.4139, 0.43735170364379883 sec
Episode 4280, loss:-43.7168, fail, steps:161, total reward:-5.7131, 0.4434778690338135 sec
Episode 4281, loss:-74.3203, fail, steps:160, total reward:-12.5371, 0.4446380138397217 sec
Episode 4282, loss:-75.7838, fail, steps:159, total reward:-12.5895, 0.4433770179748535 sec
Episode 4283, loss:-74.6932, fail, steps:159, total reward:-12.9731, 0.44819188117980957 sec
Episode 4284, loss:-51.0104, succeed, steps:158, total reward:-7.2803, 0.4428074359893799 sec
Episode 4285, loss:-2.9770, succeed, steps:110, total reward:2.6104, 0.3128800392150879 sec
Episode 4286, loss:-4.6267, succeed, steps:102, total reward:0.9926, 0.2823

Episode 4365, loss:-35.2998, succeed, steps:138, total reward:-4.1855, 0.37113189697265625 sec
Episode 4366, loss:-7.0195, succeed, steps:110, total reward:2.1055, 0.3031501770019531 sec
Episode 4367, loss:-21.5362, succeed, steps:111, total reward:-3.1881, 0.3069636821746826 sec
Episode 4368, loss:-51.0700, fail, steps:160, total reward:-6.9295, 0.43049001693725586 sec
Episode 4369, loss:-29.1217, fail, steps:162, total reward:-1.6303, 0.4449944496154785 sec
Episode 4370, loss:-46.9458, fail, steps:161, total reward:-3.9417, 0.44693803787231445 sec
Episode 4371, loss:-39.3652, fail, steps:161, total reward:-2.2696, 0.44719409942626953 sec
Episode 4372, loss:-45.7203, fail, steps:161, total reward:-4.4139, 0.4545269012451172 sec
Episode 4373, loss:2.5024, succeed, steps:146, total reward:7.1137, 0.40938258171081543 sec
Episode 4374, loss:-32.9938, succeed, steps:143, total reward:-2.3405, 0.3988616466522217 sec
Episode 4375, loss:-55.6886, fail, steps:160, total reward:-8.0017, 0.44327

Episode 4454, loss:-46.4070, succeed, steps:138, total reward:-4.8347, 0.3586299419403076 sec
Episode 4455, loss:-4.9394, succeed, steps:145, total reward:1.8046, 0.3909292221069336 sec
Episode 4456, loss:-34.0233, succeed, steps:157, total reward:-6.2795, 0.42894911766052246 sec
Episode 4457, loss:-15.6185, succeed, steps:105, total reward:-3.6716, 0.29573678970336914 sec
Episode 4458, loss:-15.5793, succeed, steps:137, total reward:-2.6076, 0.37735533714294434 sec
Episode 4459, loss:-61.0774, fail, steps:159, total reward:-13.3009, 0.43495607376098633 sec
Episode 4460, loss:-40.0526, succeed, steps:121, total reward:-8.5393, 0.34092283248901367 sec
Episode 4461, loss:-71.0535, fail, steps:160, total reward:-8.7329, 0.43643975257873535 sec
Episode 4462, loss:-15.9036, succeed, steps:131, total reward:1.4344, 0.36603689193725586 sec
Episode 4463, loss:-40.6876, succeed, steps:158, total reward:-4.8467, 0.4396841526031494 sec
Episode 4464, loss:-47.9482, fail, steps:161, total reward:-6

Episode 4543, loss:-47.2113, succeed, steps:143, total reward:-3.3733, 0.37648463249206543 sec
Episode 4544, loss:-68.0020, succeed, steps:148, total reward:-10.6679, 0.4054381847381592 sec
Episode 4545, loss:-3.6987, succeed, steps:104, total reward:1.4056, 0.292177677154541 sec
Episode 4546, loss:-38.7155, succeed, steps:154, total reward:-2.6810, 0.4157581329345703 sec
Episode 4547, loss:-29.1639, succeed, steps:146, total reward:-3.2168, 0.4044511318206787 sec
Episode 4548, loss:-47.0880, succeed, steps:150, total reward:-6.2482, 0.415360689163208 sec
Episode 4549, loss:-29.9595, succeed, steps:154, total reward:-2.1039, 0.43078136444091797 sec
Episode 4550, loss:-66.8671, fail, steps:160, total reward:-8.7952, 0.445695161819458 sec
Episode 4551, loss:-20.2764, succeed, steps:106, total reward:-3.6979, 0.30341339111328125 sec
Episode 4552, loss:-12.9001, succeed, steps:129, total reward:-0.2049, 0.3518357276916504 sec
Episode 4553, loss:-6.0763, succeed, steps:104, total reward:-1.

Episode 4632, loss:-12.8513, succeed, steps:136, total reward:-0.1870, 0.36330652236938477 sec
Episode 4633, loss:-2.5033, succeed, steps:126, total reward:4.9085, 0.34482884407043457 sec
Episode 4634, loss:-66.1328, fail, steps:159, total reward:-11.6517, 0.4323391914367676 sec
Episode 4635, loss:-18.7076, succeed, steps:148, total reward:-4.2276, 0.4116330146789551 sec
Episode 4636, loss:1.0318, succeed, steps:98, total reward:0.5082, 0.2787027359008789 sec
Episode 4637, loss:-29.3400, succeed, steps:155, total reward:-0.7432, 0.42351675033569336 sec
Episode 4638, loss:-30.5112, succeed, steps:125, total reward:-5.0770, 0.3505244255065918 sec
Episode 4639, loss:15.4038, succeed, steps:94, total reward:6.9831, 0.2634584903717041 sec
Episode 4640, loss:-16.4973, succeed, steps:94, total reward:-3.5639, 0.25763559341430664 sec
Episode 4641, loss:7.2141, succeed, steps:97, total reward:3.8174, 0.26293277740478516 sec
Episode 4642, loss:-2.4356, succeed, steps:54, total reward:0.4806, 0.1

Episode 4721, loss:-28.8392, fail, steps:160, total reward:-6.3426, 0.45091938972473145 sec
Episode 4722, loss:3.2741, succeed, steps:53, total reward:2.7799, 0.15828561782836914 sec
Episode 4723, loss:-7.9246, succeed, steps:96, total reward:1.1288, 0.25519251823425293 sec
Episode 4724, loss:-20.6839, fail, steps:161, total reward:-5.2803, 0.4797348976135254 sec
Episode 4725, loss:-56.9872, fail, steps:159, total reward:-12.4452, 0.4352540969848633 sec
Episode 4726, loss:-4.7213, fail, steps:162, total reward:-1.3417, 0.44867467880249023 sec
Episode 4727, loss:-5.5302, fail, steps:161, total reward:-1.7548, 0.44861388206481934 sec
Episode 4728, loss:-0.2665, succeed, steps:106, total reward:1.6212, 0.30109500885009766 sec
Episode 4729, loss:-16.8858, fail, steps:161, total reward:-1.6925, 0.4341106414794922 sec
Episode 4730, loss:-37.5269, fail, steps:160, total reward:-8.2803, 0.4447956085205078 sec
Episode 4731, loss:-28.9388, succeed, steps:153, total reward:-6.4753, 0.426791191101

Episode 4810, loss:1.6673, succeed, steps:82, total reward:4.5202, 0.2389078140258789 sec
Episode 4811, loss:6.3039, succeed, steps:85, total reward:4.8701, 0.230118989944458 sec
Episode 4812, loss:-57.0004, fail, steps:161, total reward:-5.4860, 0.42575740814208984 sec
Episode 4813, loss:-23.8043, fail, steps:161, total reward:-2.6204, 0.4389512538909912 sec
Episode 4814, loss:-14.7123, succeed, steps:85, total reward:-2.0891, 0.2442641258239746 sec
Episode 4815, loss:-49.0340, fail, steps:160, total reward:-7.2803, 0.42603278160095215 sec
Episode 4816, loss:13.1629, succeed, steps:86, total reward:5.9423, 0.2506392002105713 sec
Episode 4817, loss:-47.2778, fail, steps:161, total reward:-6.1459, 0.43031930923461914 sec
Episode 4818, loss:13.3738, succeed, steps:120, total reward:5.4349, 0.3347208499908447 sec
Episode 4819, loss:-17.9660, succeed, steps:111, total reward:0.6719, 0.3056526184082031 sec
Episode 4820, loss:-42.3414, fail, steps:161, total reward:-3.3417, 0.433067083358764

Episode 4899, loss:-59.7627, fail, steps:160, total reward:-9.3459, 0.44520020484924316 sec
Episode 4900, loss:-4.7456, succeed, steps:130, total reward:1.8050, 0.3677337169647217 sec
Episode 4901, loss:-46.6334, fail, steps:161, total reward:-4.8467, 0.4419715404510498 sec
Episode 4902, loss:-48.0770, fail, steps:160, total reward:-6.0639, 0.4450860023498535 sec
Episode 4903, loss:-43.1462, succeed, steps:137, total reward:-5.3298, 0.38477134704589844 sec
Episode 4904, loss:6.6865, succeed, steps:47, total reward:3.5391, 0.1391301155090332 sec
Episode 4905, loss:-73.7065, fail, steps:159, total reward:-11.7337, 0.41252589225769043 sec
Episode 4906, loss:5.9124, succeed, steps:63, total reward:2.6657, 0.18008756637573242 sec
Episode 4907, loss:-83.9038, fail, steps:159, total reward:-12.6616, 0.4224975109100342 sec
Episode 4908, loss:-30.5690, fail, steps:162, total reward:0.1541, 0.4419677257537842 sec
Episode 4909, loss:-33.8011, fail, steps:161, total reward:-2.9089, 0.4435114860534

Episode 4988, loss:-1.0126, succeed, steps:81, total reward:-2.5013, 0.22960209846496582 sec
Episode 4989, loss:-27.9675, fail, steps:161, total reward:-2.0532, 0.4299960136413574 sec
Episode 4990, loss:-28.9376, fail, steps:162, total reward:-0.2269, 0.4430980682373047 sec
Episode 4991, loss:-36.2497, fail, steps:161, total reward:-3.1876, 0.445908784866333 sec
Episode 4992, loss:-27.5959, succeed, steps:140, total reward:-2.6970, 0.3915886878967285 sec
Episode 4993, loss:-55.1586, succeed, steps:143, total reward:-6.4332, 0.3992116451263428 sec
Episode 4994, loss:-11.5424, succeed, steps:104, total reward:-0.6674, 0.2951209545135498 sec
Episode 4995, loss:5.0601, succeed, steps:93, total reward:2.3231, 0.25730323791503906 sec
Episode 4996, loss:-20.8461, succeed, steps:143, total reward:-3.0618, 0.3821289539337158 sec
Episode 4997, loss:-4.6713, succeed, steps:162, total reward:3.2461, 0.4393730163574219 sec
Episode 4998, loss:-25.3286, succeed, steps:116, total reward:-5.7407, 0.325

Episode 5076, loss:-1.4318, succeed, steps:78, total reward:0.4481, 0.20026707649230957 sec
Episode 5077, loss:-14.9285, succeed, steps:90, total reward:-1.1818, 0.23600029945373535 sec
Episode 5078, loss:18.7806, succeed, steps:85, total reward:6.9431, 0.22658801078796387 sec
Episode 5079, loss:-47.3594, fail, steps:161, total reward:-5.9295, 0.4224064350128174 sec
Episode 5080, loss:-3.5988, succeed, steps:104, total reward:-1.3887, 0.294766902923584 sec
Episode 5081, loss:-35.7574, succeed, steps:139, total reward:-3.4741, 0.37400102615356445 sec
Episode 5082, loss:-27.8890, succeed, steps:147, total reward:-1.7840, 0.4001922607421875 sec
Episode 5083, loss:1.2782, succeed, steps:100, total reward:5.5189, 0.2789645195007324 sec
Episode 5084, loss:-15.5211, succeed, steps:153, total reward:3.0618, 0.41235923767089844 sec
Episode 5085, loss:-18.4617, succeed, steps:118, total reward:-1.0022, 0.32726049423217773 sec
Episode 5086, loss:-35.9622, succeed, steps:131, total reward:-5.6690,

Episode 5165, loss:8.7587, succeed, steps:92, total reward:4.4781, 0.26271557807922363 sec
Episode 5166, loss:-34.4923, succeed, steps:112, total reward:-9.4522, 0.30502772331237793 sec
Episode 5167, loss:5.9968, succeed, steps:70, total reward:-0.1599, 0.19761013984680176 sec
Episode 5168, loss:-1.7278, succeed, steps:77, total reward:3.7015, 0.20547008514404297 sec
Episode 5169, loss:2.5998, succeed, steps:71, total reward:0.9843, 0.18843889236450195 sec
Episode 5170, loss:-31.4149, fail, steps:161, total reward:-3.9811, 0.4199526309967041 sec
Episode 5171, loss:-0.4590, succeed, steps:56, total reward:-0.3137, 0.16149139404296875 sec
Episode 5172, loss:-31.7009, succeed, steps:155, total reward:-3.6818, 0.4053773880004883 sec
Episode 5173, loss:-61.1385, fail, steps:159, total reward:-10.5075, 0.4324479103088379 sec
Episode 5174, loss:-33.0731, succeed, steps:149, total reward:-3.2997, 0.4156181812286377 sec
Episode 5175, loss:-23.2491, fail, steps:162, total reward:0.6067, 0.446786

Episode 5255, loss:-23.9002, fail, steps:162, total reward:0.8953, 0.4230368137359619 sec
Episode 5256, loss:2.9712, succeed, steps:65, total reward:1.7271, 0.1846456527709961 sec
Episode 5257, loss:-74.7437, fail, steps:160, total reward:-8.7853, 0.42119860649108887 sec
Episode 5258, loss:-8.6864, succeed, steps:117, total reward:2.2906, 0.3243682384490967 sec
Episode 5259, loss:-22.6836, succeed, steps:148, total reward:-1.3611, 0.40689730644226074 sec
Episode 5260, loss:-0.8307, succeed, steps:123, total reward:1.4757, 0.3406951427459717 sec
Episode 5261, loss:-33.4106, succeed, steps:154, total reward:-4.5933, 0.41844820976257324 sec
Episode 5262, loss:-2.7563, succeed, steps:133, total reward:2.0107, 0.3697700500488281 sec
Episode 5263, loss:-9.9807, succeed, steps:94, total reward:0.1682, 0.2636723518371582 sec
Episode 5264, loss:1.9198, succeed, steps:83, total reward:5.8087, 0.22680902481079102 sec
Episode 5265, loss:3.1080, succeed, steps:88, total reward:4.5709, 0.23649430274

Episode 5344, loss:-50.8006, fail, steps:160, total reward:-6.9918, 0.4436178207397461 sec
Episode 5345, loss:-40.2243, fail, steps:161, total reward:-4.3417, 0.44736719131469727 sec
Episode 5346, loss:-34.9116, succeed, steps:119, total reward:-5.8957, 0.33725690841674805 sec
Episode 5347, loss:-10.4896, succeed, steps:84, total reward:-0.2226, 0.23462462425231934 sec
Episode 5348, loss:-6.0968, succeed, steps:116, total reward:0.5856, 0.3108196258544922 sec
Episode 5349, loss:4.8774, succeed, steps:90, total reward:6.0659, 0.24876737594604492 sec
Episode 5350, loss:-14.2769, succeed, steps:150, total reward:2.2980, 0.402543306350708 sec
Episode 5351, loss:-28.8495, succeed, steps:146, total reward:0.2267, 0.399310827255249 sec
Episode 5352, loss:-0.4635, succeed, steps:93, total reward:1.3854, 0.26258277893066406 sec
Episode 5353, loss:-8.5180, succeed, steps:125, total reward:2.9314, 0.33631134033203125 sec
Episode 5354, loss:-9.9270, succeed, steps:97, total reward:2.7124, 0.266755

Episode 5433, loss:-2.4230, succeed, steps:149, total reward:4.3808, 0.39941883087158203 sec
Episode 5434, loss:-2.9792, succeed, steps:104, total reward:3.3532, 0.2896895408630371 sec
Episode 5435, loss:-14.6727, succeed, steps:76, total reward:-2.2733, 0.21054553985595703 sec
Episode 5436, loss:-8.7049, succeed, steps:141, total reward:2.9072, 0.37295961380004883 sec
Episode 5437, loss:-19.9258, succeed, steps:111, total reward:-0.4823, 0.3079519271850586 sec
Episode 5438, loss:-28.9689, fail, steps:161, total reward:-1.8990, 0.4870297908782959 sec
Episode 5439, loss:-33.7352, fail, steps:161, total reward:-3.6925, 0.4421262741088867 sec
Episode 5440, loss:17.4976, succeed, steps:106, total reward:4.9204, 0.3009932041168213 sec
Episode 5441, loss:-11.8443, succeed, steps:119, total reward:1.0241, 0.3244495391845703 sec
Episode 5442, loss:-10.9165, succeed, steps:84, total reward:-3.8104, 0.2323458194732666 sec
Episode 5443, loss:-58.5717, fail, steps:160, total reward:-6.2803, 0.4311

Episode 5522, loss:-15.1884, succeed, steps:132, total reward:0.3222, 0.36486196517944336 sec
Episode 5523, loss:0.9803, succeed, steps:112, total reward:5.0433, 0.30953145027160645 sec
Episode 5524, loss:-27.3091, fail, steps:161, total reward:-4.7754, 0.4352273941040039 sec
Episode 5525, loss:-53.2124, fail, steps:160, total reward:-11.6124, 0.44074010848999023 sec
Episode 5526, loss:-39.1354, fail, steps:160, total reward:-9.3623, 0.44852685928344727 sec
Episode 5527, loss:-21.7048, succeed, steps:124, total reward:-3.6179, 0.3505382537841797 sec
Episode 5528, loss:-12.4231, succeed, steps:128, total reward:-3.7107, 0.35344672203063965 sec
Episode 5529, loss:-3.0939, succeed, steps:94, total reward:0.3124, 0.26334261894226074 sec
Episode 5530, loss:-31.1196, fail, steps:161, total reward:-7.3295, 0.42977309226989746 sec
Episode 5531, loss:-25.4861, succeed, steps:84, total reward:-7.6047, 0.24035429954528809 sec
Episode 5532, loss:-23.3302, fail, steps:161, total reward:-4.7032, 0.4

Episode 5611, loss:-12.1269, succeed, steps:127, total reward:-0.6900, 0.3444797992706299 sec
Episode 5612, loss:-66.5399, fail, steps:161, total reward:-6.8673, 0.4363117218017578 sec
Episode 5613, loss:-12.1992, succeed, steps:102, total reward:-2.6674, 0.2894313335418701 sec
Episode 5614, loss:-55.0332, fail, steps:160, total reward:-8.8574, 0.4888484477996826 sec
Episode 5615, loss:-45.8884, fail, steps:161, total reward:-4.7417, 0.4458274841308594 sec
Episode 5616, loss:-18.5699, succeed, steps:142, total reward:-4.3897, 0.39623403549194336 sec
Episode 5617, loss:1.7496, succeed, steps:119, total reward:3.7856, 0.33278751373291016 sec
Episode 5618, loss:-11.6415, succeed, steps:141, total reward:1.4416, 0.3870701789855957 sec
Episode 5619, loss:9.0551, succeed, steps:82, total reward:4.7366, 0.2330799102783203 sec
Episode 5620, loss:-3.1042, succeed, steps:78, total reward:0.6645, 0.21783661842346191 sec
Episode 5621, loss:-12.5401, succeed, steps:160, total reward:1.6789, 0.42289

Episode 5700, loss:-63.2596, succeed, steps:158, total reward:-11.1567, 0.4313504695892334 sec
Episode 5701, loss:-14.0111, succeed, steps:105, total reward:-3.1109, 0.3025350570678711 sec
Episode 5702, loss:7.9345, succeed, steps:113, total reward:8.2605, 0.3094446659088135 sec
Episode 5703, loss:-54.5722, fail, steps:161, total reward:-4.2696, 0.4338524341583252 sec
Episode 5704, loss:-3.4668, succeed, steps:117, total reward:5.3734, 0.32761287689208984 sec
Episode 5705, loss:-16.6242, succeed, steps:106, total reward:-1.6060, 0.29199695587158203 sec
Episode 5706, loss:-3.3224, succeed, steps:95, total reward:1.1682, 0.26048946380615234 sec
Episode 5707, loss:8.7501, succeed, steps:93, total reward:8.6331, 0.2519345283508301 sec
Episode 5708, loss:2.5557, succeed, steps:55, total reward:5.8619, 0.15311098098754883 sec
Episode 5709, loss:2.4929, succeed, steps:79, total reward:4.8194, 0.2069225311279297 sec
Episode 5710, loss:-27.4590, succeed, steps:154, total reward:-0.3727, 0.40121

Episode 5790, loss:-17.5514, succeed, steps:114, total reward:-0.5651, 0.3002748489379883 sec
Episode 5791, loss:-54.2878, fail, steps:161, total reward:-6.5788, 0.42917418479919434 sec
Episode 5792, loss:-0.3758, succeed, steps:84, total reward:3.4537, 0.236663818359375 sec
Episode 5793, loss:-20.6230, succeed, steps:124, total reward:-0.8236, 0.3317873477935791 sec
Episode 5794, loss:-25.9844, succeed, steps:139, total reward:-2.2478, 0.3775448799133301 sec
Episode 5795, loss:5.1167, succeed, steps:88, total reward:4.2102, 0.24535012245178223 sec
Episode 5796, loss:-69.6170, fail, steps:159, total reward:-15.8231, 0.4240105152130127 sec
Episode 5797, loss:-49.2291, fail, steps:160, total reward:-7.9295, 0.4941842555999756 sec
Episode 5798, loss:-66.8120, fail, steps:159, total reward:-11.6616, 0.441392183303833 sec
Episode 5799, loss:-28.0425, succeed, steps:106, total reward:-2.9502, 0.3075413703918457 sec
Episode 5800, loss:-17.2488, fail, steps:163, total reward:3.9674, 0.44100499

Episode 5880, loss:2.4590, succeed, steps:82, total reward:-2.0783, 0.22219467163085938 sec
Episode 5881, loss:-2.1196, succeed, steps:144, total reward:3.6179, 0.37865519523620605 sec
Episode 5882, loss:-6.1617, succeed, steps:107, total reward:2.3097, 0.29547810554504395 sec
Episode 5883, loss:1.7467, succeed, steps:113, total reward:5.7547, 0.30597639083862305 sec
Episode 5884, loss:10.0159, succeed, steps:98, total reward:7.9724, 0.2724485397338867 sec
Episode 5885, loss:2.7219, succeed, steps:73, total reward:3.1450, 0.19866371154785156 sec
Episode 5886, loss:-3.3336, succeed, steps:79, total reward:2.3137, 0.2099766731262207 sec
Episode 5887, loss:8.7256, succeed, steps:93, total reward:5.1896, 0.24618029594421387 sec
Episode 5888, loss:3.6358, succeed, steps:117, total reward:5.4620, 0.3105330467224121 sec
Episode 5889, loss:-37.9052, fail, steps:161, total reward:-5.8574, 0.4307687282562256 sec
Episode 5890, loss:1.4143, succeed, steps:86, total reward:2.3545, 0.242109537124633

Episode 5970, loss:-11.3295, succeed, steps:121, total reward:3.0635, 0.33486151695251465 sec
Episode 5971, loss:-22.4349, succeed, steps:131, total reward:-1.7927, 0.35579419136047363 sec
Episode 5972, loss:-9.6345, succeed, steps:136, total reward:1.2065, 0.3712279796600342 sec
Episode 5973, loss:2.1424, succeed, steps:60, total reward:3.5420, 0.17091012001037598 sec
Episode 5974, loss:1.1356, succeed, steps:117, total reward:2.0742, 0.3075141906738281 sec
Episode 5975, loss:-32.3219, fail, steps:161, total reward:-2.6925, 0.427945613861084 sec
Episode 5976, loss:-3.1405, succeed, steps:83, total reward:-1.4390, 0.23450803756713867 sec
Episode 5977, loss:-6.4304, succeed, steps:95, total reward:1.3846, 0.2605314254760742 sec
Episode 5978, loss:7.2670, succeed, steps:97, total reward:6.9002, 0.26017141342163086 sec
Episode 5979, loss:-30.1461, fail, steps:161, total reward:-5.3910, 0.4254758358001709 sec
Episode 5980, loss:-37.9137, succeed, steps:131, total reward:-4.1607, 0.36228799

Episode 6058, loss:-29.4140, succeed, steps:155, total reward:-4.1375, 0.40727734565734863 sec
Episode 6059, loss:3.3635, succeed, steps:103, total reward:5.5804, 0.29056811332702637 sec
Episode 6060, loss:-52.9543, fail, steps:160, total reward:-7.0738, 0.42758870124816895 sec
Episode 6061, loss:-17.4210, fail, steps:162, total reward:0.4067, 0.4460744857788086 sec
Episode 6062, loss:-61.9217, fail, steps:159, total reward:-10.1468, 0.4416379928588867 sec
Episode 6063, loss:-56.9440, fail, steps:160, total reward:-10.3754, 0.44431328773498535 sec
Episode 6064, loss:-6.8373, succeed, steps:152, total reward:4.0816, 0.42929649353027344 sec
Episode 6065, loss:-28.7184, fail, steps:161, total reward:-3.4040, 0.44817376136779785 sec
Episode 6066, loss:-56.6139, fail, steps:159, total reward:-10.7960, 0.4461350440979004 sec
Episode 6067, loss:-0.5434, succeed, steps:82, total reward:1.2209, 0.23612713813781738 sec
Episode 6068, loss:2.4612, succeed, steps:131, total reward:5.7535, 0.3515565

Episode 6147, loss:-7.2555, succeed, steps:121, total reward:-0.3079, 0.33708953857421875 sec
Episode 6148, loss:-26.3153, fail, steps:160, total reward:-5.9918, 0.43569350242614746 sec
Episode 6149, loss:-49.6819, fail, steps:160, total reward:-7.6311, 0.4448404312133789 sec
Episode 6150, loss:1.1323, succeed, steps:70, total reward:2.9229, 0.20422148704528809 sec
Episode 6151, loss:3.9198, succeed, steps:81, total reward:3.7366, 0.21741795539855957 sec
Episode 6152, loss:-14.2593, succeed, steps:101, total reward:-3.3788, 0.2681725025177002 sec
Episode 6153, loss:-33.3879, fail, steps:161, total reward:-1.4761, 0.4270174503326416 sec
Episode 6154, loss:-5.5458, succeed, steps:119, total reward:4.1463, 0.33240747451782227 sec
Episode 6155, loss:1.3763, succeed, steps:48, total reward:1.9448, 0.1387331485748291 sec
Episode 6156, loss:-25.7248, fail, steps:161, total reward:-5.1361, 0.420257568359375 sec
Episode 6157, loss:-16.5449, succeed, steps:115, total reward:-0.3323, 0.3223044872

Episode 6237, loss:-6.7038, succeed, steps:142, total reward:0.9686, 0.3707122802734375 sec
Episode 6238, loss:-21.5868, succeed, steps:111, total reward:-7.0644, 0.3042623996734619 sec
Episode 6239, loss:-1.5648, succeed, steps:66, total reward:2.2222, 0.18181538581848145 sec
Episode 6240, loss:-28.5957, fail, steps:161, total reward:-3.6204, 0.42173099517822266 sec
Episode 6241, loss:-51.4251, fail, steps:160, total reward:-7.6311, 0.44020557403564453 sec
Episode 6242, loss:-1.8421, succeed, steps:119, total reward:4.6513, 0.33327817916870117 sec
Episode 6243, loss:-19.8260, fail, steps:161, total reward:-3.6204, 0.43704724311828613 sec
Episode 6244, loss:0.5368, succeed, steps:56, total reward:2.7692, 0.16469788551330566 sec
Episode 6245, loss:2.8792, succeed, steps:63, total reward:3.6756, 0.1674339771270752 sec
Episode 6246, loss:-6.3170, succeed, steps:96, total reward:-1.7082, 0.25171613693237305 sec
Episode 6247, loss:2.4854, succeed, steps:87, total reward:3.4987, 0.2323858737

Episode 6327, loss:-0.7491, succeed, steps:74, total reward:4.2729, 0.20705556869506836 sec
Episode 6328, loss:1.7138, succeed, steps:82, total reward:5.0022, 0.22065448760986328 sec
Episode 6329, loss:0.7580, succeed, steps:130, total reward:6.0420, 0.3424060344696045 sec
Episode 6330, loss:-12.3069, succeed, steps:102, total reward:1.7139, 0.28452157974243164 sec
Episode 6331, loss:-7.5891, succeed, steps:134, total reward:3.1550, 0.3596537113189697 sec
Episode 6332, loss:8.9570, succeed, steps:96, total reward:9.0552, 0.26644301414489746 sec
Episode 6333, loss:-47.1378, fail, steps:159, total reward:-10.7239, 0.4264488220214844 sec
Episode 6334, loss:-30.1208, fail, steps:162, total reward:-0.4040, 0.446178674697876 sec
Episode 6335, loss:-2.1461, succeed, steps:57, total reward:3.5692, 0.16961908340454102 sec
Episode 6336, loss:1.0496, succeed, steps:83, total reward:6.1694, 0.22092127799987793 sec
Episode 6337, loss:-28.2929, succeed, steps:145, total reward:-2.4135, 0.38313817977

Episode 6417, loss:-18.5144, fail, steps:160, total reward:-8.6418, 0.4391639232635498 sec
Episode 6418, loss:-28.7091, fail, steps:158, total reward:-12.6427, 0.43784594535827637 sec
Episode 6419, loss:-12.0120, fail, steps:160, total reward:-6.4048, 0.4446413516998291 sec
Episode 6420, loss:-22.5976, fail, steps:159, total reward:-10.7762, 0.44791126251220703 sec
Episode 6421, loss:-14.0430, fail, steps:161, total reward:-0.8170, 0.44890451431274414 sec
Episode 6422, loss:1.0120, succeed, steps:31, total reward:1.8739, 0.1004188060760498 sec
Episode 6423, loss:-17.0064, fail, steps:160, total reward:-6.9106, 0.4133796691894531 sec
Episode 6424, loss:-13.4694, fail, steps:161, total reward:0.2551, 0.43610668182373047 sec
Episode 6425, loss:2.0790, succeed, steps:94, total reward:7.9208, 0.2667057514190674 sec
Episode 6426, loss:-22.9035, fail, steps:160, total reward:-6.8574, 0.4303319454193115 sec
Episode 6427, loss:-19.4910, fail, steps:161, total reward:-3.1884, 0.4433720111846924 

Episode 6507, loss:-11.4166, succeed, steps:147, total reward:1.7480, 0.3814694881439209 sec
Episode 6508, loss:-29.5457, succeed, steps:141, total reward:-3.0421, 0.38160037994384766 sec
Episode 6509, loss:-23.1086, succeed, steps:128, total reward:-6.6328, 0.35703182220458984 sec
Episode 6510, loss:8.8934, succeed, steps:96, total reward:9.3438, 0.2665705680847168 sec
Episode 6511, loss:6.8659, succeed, steps:133, total reward:8.0321, 0.3581204414367676 sec
Episode 6512, loss:3.8189, succeed, steps:58, total reward:8.1406, 0.16524314880371094 sec
Episode 6513, loss:-40.1532, fail, steps:161, total reward:-3.9155, 0.4216151237487793 sec
Episode 6514, loss:2.0475, succeed, steps:73, total reward:6.7885, 0.20766878128051758 sec
Episode 6515, loss:-40.5618, fail, steps:160, total reward:-6.2082, 0.42613840103149414 sec
Episode 6516, loss:-2.0310, succeed, steps:90, total reward:-0.5162, 0.2559645175933838 sec
Episode 6517, loss:1.6414, succeed, steps:61, total reward:1.7477, 0.1685521602

Episode 6597, loss:-33.9195, fail, steps:160, total reward:-8.2803, 0.4487905502319336 sec
Episode 6598, loss:-59.3543, fail, steps:159, total reward:-12.2288, 0.5024380683898926 sec
Episode 6599, loss:-53.0012, fail, steps:159, total reward:-10.2910, 0.4477074146270752 sec
Episode 6600, loss:-49.2383, fail, steps:159, total reward:-11.3632, 0.4521610736846924 sec
Episode 6601, loss:-38.0904, fail, steps:160, total reward:-7.1262, 0.45165276527404785 sec
Episode 6602, loss:-47.7426, fail, steps:160, total reward:-7.8475, 0.45150160789489746 sec
Episode 6603, loss:-16.3269, fail, steps:161, total reward:-2.8269, 0.4538123607635498 sec
Episode 6604, loss:6.0905, succeed, steps:81, total reward:0.6537, 0.23744440078735352 sec
Episode 6605, loss:-65.6688, fail, steps:159, total reward:-12.8189, 0.4304838180541992 sec
Episode 6606, loss:-37.1491, fail, steps:161, total reward:-3.5483, 0.4464430809020996 sec
Episode 6607, loss:-59.3577, fail, steps:159, total reward:-10.5468, 0.4407441616058

Episode 6687, loss:-65.8591, fail, steps:160, total reward:-7.6082, 0.44057631492614746 sec
Episode 6688, loss:-48.2855, fail, steps:160, total reward:-10.1567, 0.5001428127288818 sec
Episode 6689, loss:-50.5877, fail, steps:160, total reward:-7.1361, 0.44618844985961914 sec
Episode 6690, loss:-56.6694, fail, steps:160, total reward:-8.3525, 0.44828057289123535 sec
Episode 6691, loss:-51.7173, fail, steps:160, total reward:-8.0738, 0.44844841957092285 sec
Episode 6692, loss:-51.9416, fail, steps:160, total reward:-8.8574, 0.44897890090942383 sec
Episode 6693, loss:-60.0519, fail, steps:160, total reward:-9.6902, 0.45388078689575195 sec
Episode 6694, loss:-55.7121, fail, steps:160, total reward:-10.8780, 0.450181245803833 sec
Episode 6695, loss:-60.2838, fail, steps:160, total reward:-8.2803, 0.44867658615112305 sec
Episode 6696, loss:-56.6280, fail, steps:159, total reward:-11.5895, 0.446486234664917 sec
Episode 6697, loss:-28.8571, fail, steps:161, total reward:-2.2696, 0.454934120178

Episode 6776, loss:-28.7504, succeed, steps:126, total reward:-2.2671, 0.3375875949859619 sec
Episode 6777, loss:-18.7419, succeed, steps:124, total reward:-1.4007, 0.33847928047180176 sec
Episode 6778, loss:-1.7778, succeed, steps:85, total reward:0.9938, 0.2367410659790039 sec
Episode 6779, loss:-64.2796, fail, steps:161, total reward:-4.7745, 0.43310117721557617 sec
Episode 6780, loss:-7.7868, succeed, steps:108, total reward:0.2498, 0.30365967750549316 sec
Episode 6781, loss:-22.2769, succeed, steps:116, total reward:-3.5956, 0.31867527961730957 sec
Episode 6782, loss:3.6540, succeed, steps:109, total reward:3.8441, 0.29939746856689453 sec
Episode 6783, loss:1.6122, succeed, steps:83, total reward:1.7488, 0.22850871086120605 sec
Episode 6784, loss:2.1318, succeed, steps:91, total reward:0.2510, 0.2454698085784912 sec
Episode 6785, loss:3.7410, succeed, steps:105, total reward:6.5705, 0.2808520793914795 sec
Episode 6786, loss:6.2223, succeed, steps:75, total reward:3.9744, 0.2044403

Episode 6866, loss:2.3308, succeed, steps:83, total reward:2.0767, 0.22788500785827637 sec
Episode 6867, loss:-16.8597, succeed, steps:147, total reward:-2.6496, 0.38957762718200684 sec
Episode 6868, loss:-40.4260, fail, steps:161, total reward:-6.2902, 0.43923187255859375 sec
Episode 6869, loss:-48.9554, fail, steps:160, total reward:-9.0574, 0.44959139823913574 sec
Episode 6870, loss:-21.3915, succeed, steps:105, total reward:-3.1830, 0.2983107566833496 sec
Episode 6871, loss:-19.8439, succeed, steps:131, total reward:-1.0713, 0.3579401969909668 sec
Episode 6872, loss:6.2797, succeed, steps:36, total reward:2.5811, 0.10752010345458984 sec
Episode 6873, loss:-1.9827, succeed, steps:141, total reward:1.9695, 0.36473536491394043 sec
Episode 6874, loss:-37.7771, fail, steps:160, total reward:-7.2902, 0.4817664623260498 sec
Episode 6875, loss:-21.4834, fail, steps:161, total reward:-5.7853, 0.4456300735473633 sec
Episode 6876, loss:-59.8287, fail, steps:160, total reward:-9.2181, 0.444015

Episode 6955, loss:4.5004, succeed, steps:91, total reward:3.3339, 0.26153111457824707 sec
Episode 6956, loss:-5.5221, succeed, steps:82, total reward:-1.7177, 0.22571802139282227 sec
Episode 6957, loss:1.0000, succeed, steps:113, total reward:1.8784, 0.3016793727874756 sec
Episode 6958, loss:-3.1618, succeed, steps:43, total reward:-0.4052, 0.12316060066223145 sec
Episode 6959, loss:0.2678, succeed, steps:70, total reward:2.9950, 0.1851215362548828 sec
Episode 6960, loss:-10.0596, succeed, steps:120, total reward:-1.8850, 0.31426095962524414 sec
Episode 6961, loss:-6.3946, succeed, steps:115, total reward:3.3734, 0.30934786796569824 sec
Episode 6962, loss:-10.1969, succeed, steps:113, total reward:-1.2766, 0.30657362937927246 sec
Episode 6963, loss:-41.6208, fail, steps:160, total reward:-6.9295, 0.42869114875793457 sec
Episode 6964, loss:3.5793, succeed, steps:35, total reward:1.2040, 0.1062459945678711 sec
Episode 6965, loss:3.0943, succeed, steps:72, total reward:4.9950, 0.18618273

Episode 7043, loss:-24.9511, succeed, steps:159, total reward:-2.3319, 0.4355652332305908 sec
Episode 7044, loss:-47.7408, fail, steps:159, total reward:-11.8582, 0.4459197521209717 sec
Episode 7045, loss:-16.5625, fail, steps:161, total reward:-0.8990, 0.44799375534057617 sec
Episode 7046, loss:-35.7436, fail, steps:160, total reward:-6.7655, 0.4486095905303955 sec
Episode 7047, loss:-25.4987, succeed, steps:153, total reward:-1.9399, 0.42843103408813477 sec
Episode 7048, loss:10.9671, succeed, steps:87, total reward:7.1587, 0.25224757194519043 sec
Episode 7049, loss:-2.6074, succeed, steps:81, total reward:-0.4119, 0.22171711921691895 sec
Episode 7050, loss:-12.4733, succeed, steps:136, total reward:2.4328, 0.3716249465942383 sec
Episode 7051, loss:-25.9359, fail, steps:160, total reward:-5.7032, 0.4333028793334961 sec
Episode 7052, loss:3.2544, succeed, steps:76, total reward:9.7885, 0.2192823886871338 sec
Episode 7053, loss:-1.5510, succeed, steps:112, total reward:4.7547, 0.298634

Episode 7134, loss:-13.8038, succeed, steps:115, total reward:-1.9267, 0.29815030097961426 sec
Episode 7135, loss:-6.9304, succeed, steps:143, total reward:2.4015, 0.43441319465637207 sec
Episode 7136, loss:-65.2574, fail, steps:160, total reward:-8.5623, 0.43469905853271484 sec
Episode 7137, loss:10.4497, succeed, steps:88, total reward:7.9423, 0.25063514709472656 sec
Episode 7138, loss:0.8786, succeed, steps:39, total reward:-0.3125, 0.1110527515411377 sec
Episode 7139, loss:7.1511, succeed, steps:112, total reward:4.9711, 0.2900838851928711 sec
Episode 7140, loss:-5.2225, succeed, steps:106, total reward:2.6311, 0.2880058288574219 sec
Episode 7141, loss:9.2155, succeed, steps:75, total reward:8.7885, 0.20516490936279297 sec
Episode 7142, loss:6.7507, succeed, steps:102, total reward:7.8239, 0.27059292793273926 sec
Episode 7143, loss:-48.8041, fail, steps:160, total reward:-7.9918, 0.42592716217041016 sec
Episode 7144, loss:-24.3832, fail, steps:161, total reward:-5.1361, 0.439668178

Episode 7224, loss:-2.2255, succeed, steps:97, total reward:3.3846, 0.27851057052612305 sec
Episode 7225, loss:0.6632, succeed, steps:112, total reward:8.9360, 0.30574631690979004 sec
Episode 7226, loss:-28.3195, fail, steps:160, total reward:-4.8376, 0.43142008781433105 sec
Episode 7227, loss:0.9959, succeed, steps:66, total reward:2.7271, 0.19068217277526855 sec
Episode 7228, loss:2.4565, succeed, steps:94, total reward:10.6430, 0.2528376579284668 sec
Episode 7229, loss:-0.3499, succeed, steps:108, total reward:3.6933, 0.2883870601654053 sec
Episode 7230, loss:-7.5073, succeed, steps:97, total reward:3.0239, 0.2637593746185303 sec
Episode 7231, loss:-6.0755, succeed, steps:97, total reward:-1.3475, 0.2679483890533447 sec
Episode 7232, loss:1.7903, succeed, steps:105, total reward:5.0746, 0.28284621238708496 sec
Episode 7233, loss:2.8701, succeed, steps:79, total reward:6.2621, 0.21607375144958496 sec
Episode 7234, loss:-0.6875, succeed, steps:67, total reward:1.0049, 0.18069744110107

Episode 7314, loss:-5.9387, succeed, steps:120, total reward:2.0799, 0.3251802921295166 sec
Episode 7315, loss:-2.0903, succeed, steps:114, total reward:7.4039, 0.3117058277130127 sec
Episode 7316, loss:-41.9059, fail, steps:158, total reward:-15.0231, 0.42659950256347656 sec
Episode 7317, loss:2.1066, succeed, steps:87, total reward:6.3652, 0.24700331687927246 sec
Episode 7318, loss:8.7074, succeed, steps:88, total reward:7.9423, 0.23986601829528809 sec
Episode 7319, loss:-23.6987, fail, steps:161, total reward:-2.4139, 0.4474794864654541 sec
Episode 7320, loss:-19.7795, fail, steps:160, total reward:-6.2704, 0.4391748905181885 sec
Episode 7321, loss:-4.8398, succeed, steps:87, total reward:-0.3055, 0.24800562858581543 sec
Episode 7322, loss:-1.3243, succeed, steps:131, total reward:3.3100, 0.3523597717285156 sec
Episode 7323, loss:4.1931, succeed, steps:144, total reward:3.5457, 0.39113593101501465 sec
Episode 7324, loss:-6.2856, succeed, steps:118, total reward:3.4349, 0.32909083366

Episode 7405, loss:-3.0635, succeed, steps:149, total reward:4.7415, 0.38741540908813477 sec
Episode 7406, loss:-4.4749, succeed, steps:134, total reward:0.2885, 0.36481165885925293 sec
Episode 7407, loss:-0.5230, succeed, steps:57, total reward:0.7584, 0.16145896911621094 sec
Episode 7408, loss:-19.7077, succeed, steps:120, total reward:-5.2564, 0.31429362297058105 sec
Episode 7409, loss:-11.0375, fail, steps:162, total reward:-0.5483, 0.4325129985809326 sec
Episode 7410, loss:-9.7463, succeed, steps:152, total reward:0.9266, 0.4309875965118408 sec
Episode 7411, loss:-4.4530, succeed, steps:126, total reward:-2.4835, 0.4075920581817627 sec
Episode 7412, loss:-45.3448, fail, steps:160, total reward:-9.9403, 0.43867993354797363 sec
Episode 7413, loss:-0.4523, succeed, steps:113, total reward:2.0948, 0.3203108310699463 sec
Episode 7414, loss:-0.5413, succeed, steps:86, total reward:2.0659, 0.23876047134399414 sec
Episode 7415, loss:-22.7197, fail, steps:161, total reward:-4.9910, 0.42803

Episode 7494, loss:2.6320, succeed, steps:111, total reward:6.4769, 0.2970256805419922 sec
Episode 7495, loss:-1.0956, succeed, steps:84, total reward:0.2987, 0.2296445369720459 sec
Episode 7496, loss:12.6973, succeed, steps:116, total reward:12.3425, 0.3100714683532715 sec
Episode 7497, loss:2.3571, succeed, steps:65, total reward:5.6034, 0.1791362762451172 sec
Episode 7498, loss:2.0972, succeed, steps:83, total reward:7.1793, 0.22493600845336914 sec
Episode 7499, loss:-20.4858, succeed, steps:97, total reward:2.1583, 0.2583882808685303 sec
Episode 7500, loss:-9.5829, succeed, steps:118, total reward:2.9463, 0.314985990524292 sec
Episode 7501, loss:-0.9437, succeed, steps:148, total reward:2.9480, 0.3974912166595459 sec
Episode 7502, loss:-12.5741, succeed, steps:121, total reward:-3.2465, 0.3354671001434326 sec
Episode 7503, loss:-5.0911, succeed, steps:101, total reward:3.7246, 0.27969884872436523 sec
Episode 7504, loss:-1.2053, succeed, steps:132, total reward:7.8256, 0.35893821716

Episode 7585, loss:3.0598, succeed, steps:61, total reward:4.6863, 0.15700769424438477 sec
Episode 7586, loss:-5.9307, succeed, steps:75, total reward:0.5309, 0.19382619857788086 sec
Episode 7587, loss:-0.6625, succeed, steps:87, total reward:3.4430, 0.2262868881225586 sec
Episode 7588, loss:-27.3802, fail, steps:161, total reward:-3.5483, 0.4251978397369385 sec
Episode 7589, loss:0.9161, succeed, steps:54, total reward:7.8726, 0.15453767776489258 sec
Episode 7590, loss:-24.7725, succeed, steps:146, total reward:0.9480, 0.37938356399536133 sec
Episode 7591, loss:-16.7132, succeed, steps:106, total reward:-4.7609, 0.2922050952911377 sec
Episode 7592, loss:3.1118, succeed, steps:43, total reward:3.6154, 0.1210927963256836 sec
Episode 7593, loss:-24.8453, fail, steps:161, total reward:-1.4761, 0.41575098037719727 sec
Episode 7594, loss:-1.2243, succeed, steps:73, total reward:-0.3149, 0.20571470260620117 sec
Episode 7595, loss:-27.4404, fail, steps:161, total reward:-5.2803, 0.42659091949

Episode 7675, loss:3.9141, succeed, steps:112, total reward:5.1154, 0.30292224884033203 sec
Episode 7676, loss:3.2899, succeed, steps:100, total reward:2.7246, 0.27411842346191406 sec
Episode 7677, loss:-4.8230, succeed, steps:160, total reward:4.9781, 0.42675161361694336 sec
Episode 7678, loss:-1.2592, succeed, steps:97, total reward:0.1574, 0.2732977867126465 sec
Episode 7679, loss:-16.8234, succeed, steps:131, total reward:-0.2450, 0.3532888889312744 sec
Episode 7680, loss:-15.7082, succeed, steps:156, total reward:1.4109, 0.42337942123413086 sec
Episode 7681, loss:-1.6555, succeed, steps:92, total reward:5.1274, 0.26024651527404785 sec
Episode 7682, loss:1.8026, succeed, steps:105, total reward:8.1574, 0.2895827293395996 sec
Episode 7683, loss:-47.2278, fail, steps:160, total reward:-7.2868, 0.4276401996612549 sec
Episode 7684, loss:-7.2817, succeed, steps:152, total reward:4.2980, 0.41929149627685547 sec
Episode 7685, loss:-2.8807, succeed, steps:124, total reward:2.9085, 0.345353

Episode 7765, loss:-32.4243, fail, steps:161, total reward:-4.1777, 0.45259571075439453 sec
Episode 7766, loss:-7.2538, succeed, steps:124, total reward:1.0519, 0.3545222282409668 sec
Episode 7767, loss:-18.6856, fail, steps:160, total reward:-6.3426, 0.44049572944641113 sec
Episode 7768, loss:-12.8169, fail, steps:160, total reward:-5.0540, 0.44661712646484375 sec
Episode 7769, loss:-10.5175, fail, steps:162, total reward:0.1731, 0.4578838348388672 sec
Episode 7770, loss:3.8862, succeed, steps:87, total reward:3.4987, 0.2535417079925537 sec
Episode 7771, loss:1.9772, succeed, steps:79, total reward:2.3137, 0.21665573120117188 sec
Episode 7772, loss:3.0018, succeed, steps:74, total reward:3.5450, 0.19846391677856445 sec
Episode 7773, loss:-24.1701, fail, steps:161, total reward:-4.6311, 0.42221570014953613 sec
Episode 7774, loss:-30.6496, fail, steps:161, total reward:-0.5384, 0.4397714138031006 sec
Episode 7775, loss:-16.0769, succeed, steps:149, total reward:-2.7226, 0.41754293441772

Episode 7855, loss:-24.5518, fail, steps:161, total reward:-4.4147, 0.41951608657836914 sec
Episode 7856, loss:-4.7088, succeed, steps:136, total reward:5.5878, 0.37393999099731445 sec
Episode 7857, loss:-20.8507, fail, steps:161, total reward:-1.7647, 0.4389522075653076 sec
Episode 7858, loss:-25.2522, fail, steps:161, total reward:-2.5384, 0.4436652660369873 sec
Episode 7859, loss:-1.9036, succeed, steps:122, total reward:0.9806, 0.34388256072998047 sec
Episode 7860, loss:-12.9021, fail, steps:162, total reward:-0.5483, 0.443096399307251 sec
Episode 7861, loss:-0.9954, succeed, steps:67, total reward:3.6156, 0.19438767433166504 sec
Episode 7862, loss:-22.7630, fail, steps:160, total reward:-8.7131, 0.4250357151031494 sec
Episode 7863, loss:-7.2129, fail, steps:161, total reward:-2.0433, 0.43988823890686035 sec
Episode 7864, loss:11.3047, succeed, steps:107, total reward:9.2918, 0.3015902042388916 sec
Episode 7865, loss:2.7791, succeed, steps:106, total reward:5.7860, 0.29016518592834

Episode 7945, loss:1.7717, succeed, steps:88, total reward:3.7053, 0.2317368984222412 sec
Episode 7946, loss:3.1604, succeed, steps:74, total reward:3.9843, 0.19756817817687988 sec
Episode 7947, loss:-8.9272, succeed, steps:129, total reward:-2.9271, 0.3420088291168213 sec
Episode 7948, loss:-18.9570, fail, steps:160, total reward:-6.2803, 0.43068814277648926 sec
Episode 7949, loss:1.1320, succeed, steps:44, total reward:4.3269, 0.13045167922973633 sec
Episode 7950, loss:-0.7587, succeed, steps:74, total reward:0.6129, 0.19329571723937988 sec
Episode 7951, loss:1.7582, succeed, steps:58, total reward:1.9748, 0.1531202793121338 sec
Episode 7952, loss:0.9490, succeed, steps:75, total reward:4.7679, 0.1952202320098877 sec
Episode 7953, loss:-13.6437, fail, steps:160, total reward:-6.2803, 0.4156625270843506 sec
Episode 7954, loss:1.9138, succeed, steps:93, total reward:6.5602, 0.2591114044189453 sec
Episode 7955, loss:2.0666, succeed, steps:86, total reward:6.2308, 0.23734450340270996 sec

Episode 8034, loss:2.2026, succeed, steps:99, total reward:5.4567, 0.26571130752563477 sec
Episode 8035, loss:-19.4090, fail, steps:161, total reward:-2.1975, 0.42525219917297363 sec
Episode 8036, loss:2.2997, succeed, steps:117, total reward:5.7341, 0.3266174793243408 sec
Episode 8037, loss:2.9177, succeed, steps:156, total reward:1.6995, 0.4254143238067627 sec
Episode 8038, loss:-0.0603, succeed, steps:91, total reward:4.1995, 0.2581925392150879 sec
Episode 8039, loss:-3.2939, succeed, steps:127, total reward:2.7535, 0.34374117851257324 sec
Episode 8040, loss:-0.1364, succeed, steps:72, total reward:5.6607, 0.2018742561340332 sec
Episode 8041, loss:-6.3627, succeed, steps:120, total reward:2.5684, 0.31890392303466797 sec
Episode 8042, loss:-26.2793, fail, steps:160, total reward:-8.3533, 0.43175792694091797 sec
Episode 8043, loss:-10.8913, succeed, steps:116, total reward:-2.7300, 0.32864880561828613 sec
Episode 8044, loss:6.0035, succeed, steps:96, total reward:8.4781, 0.26507377624

Episode 8124, loss:-22.9925, fail, steps:161, total reward:-3.2696, 0.4479987621307373 sec
Episode 8125, loss:-4.5153, fail, steps:161, total reward:-1.2597, 0.4483299255371094 sec
Episode 8126, loss:-2.2845, succeed, steps:115, total reward:-2.1431, 0.32876086235046387 sec
Episode 8127, loss:2.2855, succeed, steps:59, total reward:5.8413, 0.16869044303894043 sec
Episode 8128, loss:0.2277, succeed, steps:50, total reward:4.0890, 0.1355588436126709 sec
Episode 8129, loss:-4.9442, succeed, steps:128, total reward:3.1764, 0.3313724994659424 sec
Episode 8130, loss:-0.6737, succeed, steps:103, total reward:3.5074, 0.28243541717529297 sec
Episode 8131, loss:-20.2990, fail, steps:161, total reward:-1.8990, 0.433093786239624 sec
Episode 8132, loss:-0.7322, succeed, steps:101, total reward:-1.3124, 0.28365564346313477 sec
Episode 8133, loss:-21.0603, fail, steps:160, total reward:-5.6311, 0.43094730377197266 sec
Episode 8134, loss:-15.2255, fail, steps:162, total reward:-2.9876, 0.4437425136566

Episode 8214, loss:0.0838, succeed, steps:34, total reward:4.8017, 0.09565329551696777 sec
Episode 8215, loss:1.2785, succeed, steps:131, total reward:2.8771, 0.3337550163269043 sec
Episode 8216, loss:-0.2292, succeed, steps:113, total reward:4.3120, 0.3021049499511719 sec
Episode 8217, loss:1.9787, succeed, steps:101, total reward:4.0853, 0.27218198776245117 sec
Episode 8218, loss:-55.9746, fail, steps:1, total reward:-10.0000, 0.012155294418334961 sec
Episode 8219, loss:-2.2682, succeed, steps:79, total reward:2.0251, 0.19341492652893066 sec
Episode 8220, loss:-3.3237, succeed, steps:69, total reward:-1.4714, 0.17696237564086914 sec
Episode 8221, loss:1.3383, succeed, steps:73, total reward:3.3450, 0.1893162727355957 sec
Episode 8222, loss:2.8539, succeed, steps:48, total reward:1.7283, 0.18160343170166016 sec
Episode 8223, loss:0.5247, succeed, steps:77, total reward:7.2007, 0.1978299617767334 sec
Episode 8224, loss:-27.5905, fail, steps:160, total reward:-9.0639, 0.4187166690826416

Episode 8304, loss:-21.3069, fail, steps:159, total reward:-9.7861, 0.43167757987976074 sec
Episode 8305, loss:-14.9545, fail, steps:162, total reward:-0.1876, 0.44562792778015137 sec
Episode 8306, loss:-26.2943, fail, steps:160, total reward:-9.6517, 0.4453611373901367 sec
Episode 8307, loss:-6.8297, succeed, steps:145, total reward:-3.2791, 0.40700674057006836 sec
Episode 8308, loss:4.6274, succeed, steps:32, total reward:2.8017, 0.10216569900512695 sec
Episode 8309, loss:-16.9645, fail, steps:161, total reward:-6.9197, 0.4170382022857666 sec
Episode 8310, loss:-15.8908, fail, steps:160, total reward:-6.8475, 0.49466896057128906 sec
Episode 8311, loss:-32.2188, fail, steps:160, total reward:-6.0868, 0.43762803077697754 sec
Episode 8312, loss:-19.3770, fail, steps:160, total reward:-8.5689, 0.4430270195007324 sec
Episode 8313, loss:-20.2106, fail, steps:161, total reward:-4.2704, 0.44622015953063965 sec
Episode 8314, loss:2.5645, succeed, steps:79, total reward:5.6293, 0.2298862934112

Episode 8394, loss:0.1974, succeed, steps:59, total reward:2.8306, 0.15949463844299316 sec
Episode 8395, loss:-31.9747, fail, steps:162, total reward:1.3281, 0.42124509811401367 sec
Episode 8396, loss:-2.2385, succeed, steps:83, total reward:2.9423, 0.2340087890625 sec
Episode 8397, loss:2.3739, succeed, steps:95, total reward:8.3438, 0.25680017471313477 sec
Episode 8398, loss:1.2828, succeed, steps:104, total reward:3.5696, 0.27869510650634766 sec
Episode 8399, loss:2.5652, succeed, steps:76, total reward:9.2836, 0.20762896537780762 sec
Episode 8400, loss:-19.0288, fail, steps:161, total reward:-2.4860, 0.4301116466522217 sec
Episode 8401, loss:3.6371, succeed, steps:92, total reward:7.9938, 0.2574920654296875 sec
Episode 8402, loss:-26.0520, fail, steps:161, total reward:-3.7647, 0.4295949935913086 sec
Episode 8403, loss:0.1915, succeed, steps:50, total reward:0.2848, 0.14646124839782715 sec
Episode 8404, loss:-18.1635, succeed, steps:130, total reward:-2.1107, 0.3377835750579834 sec

Episode 8484, loss:-26.7443, fail, steps:161, total reward:-4.6860, 0.4348783493041992 sec
Episode 8485, loss:-0.6125, succeed, steps:85, total reward:-1.3677, 0.24528050422668457 sec
Episode 8486, loss:-24.6005, fail, steps:161, total reward:-2.5581, 0.4312560558319092 sec
Episode 8487, loss:-2.7430, succeed, steps:84, total reward:3.3652, 0.23895692825317383 sec
Episode 8488, loss:-1.3981, succeed, steps:116, total reward:2.0841, 0.3190188407897949 sec
Episode 8489, loss:-10.4864, succeed, steps:141, total reward:3.1958, 0.380220890045166 sec
Episode 8490, loss:0.8083, succeed, steps:135, total reward:3.0008, 0.3708484172821045 sec
Episode 8491, loss:-0.0798, succeed, steps:105, total reward:4.2811, 0.2936279773712158 sec
Episode 8492, loss:-69.2610, fail, steps:29, total reward:-5.3112, 0.08882522583007812 sec
Episode 8493, loss:1.2884, succeed, steps:55, total reward:5.1406, 0.1412029266357422 sec
Episode 8494, loss:-2.0721, succeed, steps:87, total reward:-3.0998, 0.22355031967163

Episode 8574, loss:-0.9242, succeed, steps:136, total reward:-5.0314, 0.38120031356811523 sec
Episode 8575, loss:-10.8650, fail, steps:161, total reward:-1.1155, 0.44404006004333496 sec
Episode 8576, loss:-4.6123, succeed, steps:125, total reward:-3.4114, 0.3539562225341797 sec
Episode 8577, loss:-0.4830, succeed, steps:127, total reward:2.7535, 0.35277867317199707 sec
Episode 8578, loss:-6.6193, succeed, steps:121, total reward:-4.5187, 0.334791898727417 sec
Episode 8579, loss:-11.7490, fail, steps:161, total reward:-3.5483, 0.4381406307220459 sec
Episode 8580, loss:-24.2118, fail, steps:161, total reward:-5.5918, 0.4496805667877197 sec
Episode 8581, loss:-19.0473, fail, steps:160, total reward:-9.0746, 0.4469432830810547 sec
Episode 8582, loss:-0.8552, succeed, steps:101, total reward:0.6418, 0.29051756858825684 sec
Episode 8583, loss:-17.6684, fail, steps:159, total reward:-9.4254, 0.4325847625732422 sec
Episode 8584, loss:-25.1926, fail, steps:160, total reward:-6.8475, 0.443213462

Episode 8664, loss:-7.6876, succeed, steps:127, total reward:-1.9885, 0.3599693775177002 sec
Episode 8665, loss:0.2247, succeed, steps:121, total reward:6.4349, 0.3364231586456299 sec
Episode 8666, loss:1.0686, succeed, steps:70, total reward:3.9328, 0.19691085815429688 sec
Episode 8667, loss:-19.7717, fail, steps:161, total reward:-1.8990, 0.4268622398376465 sec
Episode 8668, loss:-38.4121, fail, steps:159, total reward:-13.1567, 0.43312716484069824 sec
Episode 8669, loss:-27.0741, fail, steps:160, total reward:-7.4967, 0.44146156311035156 sec
Episode 8670, loss:-20.9796, fail, steps:160, total reward:-8.9304, 0.4430220127105713 sec
Episode 8671, loss:2.1010, succeed, steps:108, total reward:3.5490, 0.31000423431396484 sec
Episode 8672, loss:-20.3761, fail, steps:160, total reward:-5.8147, 0.4443080425262451 sec
Episode 8673, loss:-10.3562, succeed, steps:125, total reward:-2.4015, 0.3521077632904053 sec
Episode 8674, loss:-26.8003, fail, steps:159, total reward:-11.1861, 0.4852566719

Episode 8754, loss:-0.1045, succeed, steps:130, total reward:2.3821, 0.33196449279785156 sec
Episode 8755, loss:2.5567, succeed, steps:77, total reward:7.4171, 0.21180200576782227 sec
Episode 8756, loss:4.6654, succeed, steps:63, total reward:6.4699, 0.16986703872680664 sec
Episode 8757, loss:5.5188, succeed, steps:89, total reward:5.9316, 0.23203802108764648 sec
Episode 8758, loss:2.8798, succeed, steps:72, total reward:5.4279, 0.19180583953857422 sec
Episode 8759, loss:0.2774, succeed, steps:49, total reward:2.9448, 0.13057208061218262 sec
Episode 8760, loss:-19.3618, succeed, steps:142, total reward:0.5358, 0.3645937442779541 sec
Episode 8761, loss:-47.8039, fail, steps:160, total reward:-7.5590, 0.42804574966430664 sec
Episode 8762, loss:-9.1355, succeed, steps:122, total reward:3.9192, 0.3393101692199707 sec
Episode 8763, loss:-27.0921, fail, steps:162, total reward:-1.4597, 0.4426412582397461 sec
Episode 8764, loss:3.5701, succeed, steps:101, total reward:7.3846, 0.28481960296630

Episode 8844, loss:-1.4951, succeed, steps:95, total reward:7.2781, 0.26102757453918457 sec
Episode 8845, loss:-25.8336, fail, steps:162, total reward:-0.4032, 0.4317805767059326 sec
Episode 8846, loss:-3.7652, succeed, steps:122, total reward:1.3413, 0.340526819229126 sec
Episode 8847, loss:-0.8961, succeed, steps:76, total reward:-0.9421, 0.21269941329956055 sec
Episode 8848, loss:4.5775, succeed, steps:120, total reward:8.3013, 0.3197591304779053 sec
Episode 8849, loss:-13.3641, succeed, steps:100, total reward:-3.4501, 0.2749931812286377 sec
Episode 8850, loss:10.3830, succeed, steps:84, total reward:4.4472, 0.23031949996948242 sec
Episode 8851, loss:-0.7897, succeed, steps:88, total reward:3.9545, 0.23979401588439941 sec
Episode 8852, loss:-0.6086, succeed, steps:63, total reward:-0.2729, 0.17125272750854492 sec
Episode 8853, loss:0.0566, succeed, steps:100, total reward:6.4731, 0.2638835906982422 sec
Episode 8854, loss:0.0725, succeed, steps:86, total reward:2.4266, 0.23273706436

Episode 8934, loss:-6.2110, succeed, steps:147, total reward:1.9480, 0.4613673686981201 sec
Episode 8935, loss:-2.6809, succeed, steps:95, total reward:-5.3582, 0.26894211769104004 sec
Episode 8936, loss:0.2026, fail, steps:162, total reward:-0.5047, 0.4396655559539795 sec
Episode 8937, loss:-0.4255, succeed, steps:130, total reward:6.2584, 0.3646225929260254 sec
Episode 8938, loss:-7.8168, succeed, steps:139, total reward:-6.7733, 0.38161301612854004 sec
Episode 8939, loss:-4.0116, fail, steps:162, total reward:-1.0532, 0.4441046714782715 sec
Episode 8940, loss:-4.0790, fail, steps:161, total reward:-2.9089, 0.44644689559936523 sec
Episode 8941, loss:-19.7490, fail, steps:161, total reward:-2.6826, 0.452923059463501 sec
Episode 8942, loss:0.1639, succeed, steps:111, total reward:3.6269, 0.3143596649169922 sec
Episode 8943, loss:-18.2867, succeed, steps:129, total reward:-4.0943, 0.35445404052734375 sec
Episode 8944, loss:-3.9100, succeed, steps:92, total reward:1.3560, 0.2569980621337

Episode 9022, loss:-6.3612, succeed, steps:117, total reward:0.2897, 0.3118469715118408 sec
Episode 9023, loss:-1.8313, succeed, steps:138, total reward:3.9278, 0.37348508834838867 sec
Episode 9024, loss:-5.7160, fail, steps:162, total reward:0.6059, 0.43987345695495605 sec
Episode 9025, loss:-1.6433, succeed, steps:64, total reward:0.5992, 0.1849660873413086 sec
Episode 9026, loss:1.9994, succeed, steps:99, total reward:5.4731, 0.2622795104980469 sec
Episode 9027, loss:-5.0419, succeed, steps:125, total reward:0.7535, 0.3377668857574463 sec
Episode 9028, loss:-8.8745, succeed, steps:135, total reward:-2.0829, 0.3647134304046631 sec
Episode 9029, loss:-26.7464, fail, steps:161, total reward:-5.7853, 0.4367496967315674 sec
Episode 9030, loss:0.1957, succeed, steps:94, total reward:7.4880, 0.2658817768096924 sec
Episode 9031, loss:-19.4420, fail, steps:161, total reward:-3.7089, 0.4325284957885742 sec
Episode 9032, loss:-1.3042, succeed, steps:99, total reward:2.1574, 0.2801370620727539 

Episode 9113, loss:3.6110, succeed, steps:81, total reward:7.9015, 0.21861886978149414 sec
Episode 9114, loss:-13.4193, fail, steps:161, total reward:-2.2597, 0.42206788063049316 sec
Episode 9115, loss:1.3050, succeed, steps:82, total reward:8.9736, 0.22977852821350098 sec
Episode 9116, loss:-0.1843, succeed, steps:110, total reward:5.9097, 0.29406213760375977 sec
Episode 9117, loss:-5.0354, succeed, steps:110, total reward:-0.7609, 0.29668688774108887 sec
Episode 9118, loss:0.6485, succeed, steps:120, total reward:9.2391, 0.3238246440887451 sec
Episode 9119, loss:-0.9447, succeed, steps:102, total reward:0.9204, 0.27878594398498535 sec
Episode 9120, loss:-9.6353, fail, steps:161, total reward:-2.6204, 0.4346778392791748 sec
Episode 9121, loss:-6.6753, succeed, steps:122, total reward:1.0527, 0.34043312072753906 sec
Episode 9122, loss:-20.5257, fail, steps:161, total reward:-3.9188, 0.43919920921325684 sec
Episode 9123, loss:-2.4988, succeed, steps:97, total reward:3.8895, 0.2747991085

Episode 9203, loss:1.5157, succeed, steps:99, total reward:5.0960, 0.25982046127319336 sec
Episode 9204, loss:1.5896, succeed, steps:105, total reward:3.6483, 0.27963757514953613 sec
Episode 9205, loss:1.5478, succeed, steps:84, total reward:7.1694, 0.22811460494995117 sec
Episode 9206, loss:-1.0722, succeed, steps:55, total reward:2.0577, 0.1495358943939209 sec
Episode 9207, loss:-1.4617, succeed, steps:99, total reward:4.8796, 0.25867795944213867 sec
Episode 9208, loss:1.5461, succeed, steps:139, total reward:8.0107, 0.3671879768371582 sec
Episode 9209, loss:2.2693, succeed, steps:77, total reward:7.2729, 0.21520018577575684 sec
Episode 9210, loss:-2.3282, succeed, steps:125, total reward:3.4757, 0.33587193489074707 sec
Episode 9211, loss:-1.6332, succeed, steps:117, total reward:3.0119, 0.3170452117919922 sec
Episode 9212, loss:-1.6948, succeed, steps:134, total reward:-0.3607, 0.361403226852417 sec
Episode 9213, loss:-0.2692, succeed, steps:77, total reward:0.0579, 0.21629500389099

Episode 9294, loss:-3.0610, succeed, steps:85, total reward:8.0973, 0.22086620330810547 sec
Episode 9295, loss:-0.7348, succeed, steps:126, total reward:1.7535, 0.3803565502166748 sec
Episode 9296, loss:0.9935, succeed, steps:72, total reward:5.6443, 0.2022414207458496 sec
Episode 9297, loss:11.8975, succeed, steps:140, total reward:9.4435, 0.3696024417877197 sec
Episode 9298, loss:-30.2531, fail, steps:159, total reward:-13.0944, 0.4306023120880127 sec
Episode 9299, loss:0.0967, succeed, steps:47, total reward:4.0997, 0.13977575302124023 sec
Episode 9300, loss:1.7894, succeed, steps:64, total reward:4.0264, 0.16936039924621582 sec
Episode 9301, loss:-0.0969, succeed, steps:76, total reward:2.9015, 0.19823002815246582 sec
Episode 9302, loss:0.6445, succeed, steps:153, total reward:-1.6612, 0.3993189334869385 sec
Episode 9303, loss:0.3504, succeed, steps:104, total reward:3.5696, 0.28737878799438477 sec
Episode 9304, loss:-3.7641, succeed, steps:119, total reward:-1.6587, 0.325555324554

Episode 9384, loss:0.3973, succeed, steps:109, total reward:5.1261, 0.3131074905395508 sec
Episode 9385, loss:0.0226, succeed, steps:83, total reward:5.9530, 0.2294921875 sec
Episode 9386, loss:-1.9347, succeed, steps:112, total reward:0.8784, 0.299328088760376 sec
Episode 9387, loss:-2.2648, succeed, steps:75, total reward:5.3450, 0.20734739303588867 sec
Episode 9388, loss:-3.2835, succeed, steps:138, total reward:-0.2370, 0.4189150333404541 sec
Episode 9389, loss:-0.9133, succeed, steps:129, total reward:8.4134, 0.3516244888305664 sec
Episode 9390, loss:-2.5494, succeed, steps:75, total reward:5.0565, 0.21074271202087402 sec
Episode 9391, loss:-2.2642, succeed, steps:79, total reward:1.5137, 0.21183180809020996 sec
Episode 9392, loss:-16.6334, fail, steps:162, total reward:1.4625, 0.4302253723144531 sec
Episode 9393, loss:-11.6749, succeed, steps:144, total reward:2.4637, 0.39436888694763184 sec
Episode 9394, loss:-18.8431, fail, steps:160, total reward:-8.9295, 0.4386913776397705 se

Episode 9474, loss:-0.9388, succeed, steps:92, total reward:5.3438, 0.25057411193847656 sec
Episode 9475, loss:-3.5445, succeed, steps:136, total reward:4.6500, 0.3625025749206543 sec
Episode 9476, loss:1.5589, succeed, steps:126, total reward:4.5085, 0.3451576232910156 sec
Episode 9477, loss:1.3604, succeed, steps:91, total reward:7.2987, 0.25336456298828125 sec
Episode 9478, loss:2.0923, succeed, steps:112, total reward:8.1261, 0.30188465118408203 sec
Episode 9479, loss:0.2662, succeed, steps:62, total reward:5.9748, 0.17179083824157715 sec
Episode 9480, loss:-0.6197, succeed, steps:59, total reward:-0.9736, 0.15807318687438965 sec
Episode 9481, loss:-1.7064, succeed, steps:88, total reward:4.4266, 0.2295064926147461 sec
Episode 9482, loss:-26.6059, fail, steps:160, total reward:-9.6509, 0.47577357292175293 sec
Episode 9483, loss:-0.4958, succeed, steps:78, total reward:4.9736, 0.22433233261108398 sec
Episode 9484, loss:-0.7589, succeed, steps:55, total reward:1.9856, 0.1497898101806

Episode 9564, loss:-7.1157, fail, steps:161, total reward:-1.4040, 0.4260096549987793 sec
Episode 9565, loss:3.2156, succeed, steps:135, total reward:0.7836, 0.37145447731018066 sec
Episode 9566, loss:0.2155, succeed, steps:57, total reward:4.4905, 0.21822047233581543 sec
Episode 9567, loss:0.8030, succeed, steps:120, total reward:6.3005, 0.3143632411956787 sec
Episode 9568, loss:0.2105, succeed, steps:64, total reward:0.6550, 0.17738628387451172 sec
Episode 9569, loss:-13.5940, fail, steps:161, total reward:-4.2704, 0.42679309844970703 sec
Episode 9570, loss:1.7793, succeed, steps:100, total reward:2.9410, 0.27841925621032715 sec
Episode 9571, loss:-10.3590, fail, steps:161, total reward:-1.8925, 0.43134570121765137 sec
Episode 9572, loss:2.0927, succeed, steps:65, total reward:1.7042, 0.1865081787109375 sec
Episode 9573, loss:0.3983, succeed, steps:75, total reward:2.1900, 0.20028972625732422 sec
Episode 9574, loss:-12.8881, fail, steps:161, total reward:-2.2498, 0.42061519622802734 

Episode 9654, loss:-15.1644, fail, steps:161, total reward:-2.4433, 0.4191622734069824 sec
Episode 9655, loss:0.8998, succeed, steps:112, total reward:5.7646, 0.3085472583770752 sec
Episode 9656, loss:-0.0971, succeed, steps:106, total reward:5.7860, 0.2959003448486328 sec
Episode 9657, loss:0.5848, succeed, steps:76, total reward:6.6335, 0.20737743377685547 sec
Episode 9658, loss:-17.3175, succeed, steps:148, total reward:3.9579, 0.39018702507019043 sec
Episode 9659, loss:7.3376, succeed, steps:104, total reward:10.9616, 0.28764891624450684 sec
Episode 9660, loss:-11.8269, succeed, steps:112, total reward:5.1154, 0.30556750297546387 sec
Episode 9661, loss:-0.8188, succeed, steps:83, total reward:6.3858, 0.22867059707641602 sec
Episode 9662, loss:-9.8521, fail, steps:161, total reward:-0.2498, 0.42923974990844727 sec
Episode 9663, loss:-11.2935, succeed, steps:149, total reward:-1.2799, 0.41501569747924805 sec
Episode 9664, loss:-29.6646, fail, steps:159, total reward:-14.1674, 0.44074

Episode 9744, loss:5.0280, succeed, steps:122, total reward:6.6414, 0.31820082664489746 sec
Episode 9745, loss:0.0912, succeed, steps:80, total reward:3.3465, 0.21877670288085938 sec
Episode 9746, loss:1.1922, succeed, steps:112, total reward:5.6203, 0.2984018325805664 sec
Episode 9747, loss:-7.2248, succeed, steps:145, total reward:-2.7741, 0.38912153244018555 sec
Episode 9748, loss:-5.9137, succeed, steps:73, total reward:3.4335, 0.20446085929870605 sec
Episode 9749, loss:-0.2447, succeed, steps:80, total reward:0.1587, 0.21359539031982422 sec
Episode 9750, loss:-24.3338, fail, steps:158, total reward:-14.8067, 0.4155588150024414 sec
Episode 9751, loss:-11.3078, fail, steps:160, total reward:-9.9861, 0.4358229637145996 sec
Episode 9752, loss:-0.1110, succeed, steps:57, total reward:-2.5408, 0.16593647003173828 sec
Episode 9753, loss:-6.7344, fail, steps:161, total reward:-3.2826, 0.42696046829223633 sec
Episode 9754, loss:-1.6116, succeed, steps:118, total reward:4.6611, 0.3266539573

Episode 9834, loss:-8.7238, fail, steps:162, total reward:3.6888, 0.503962516784668 sec
Episode 9835, loss:2.6550, succeed, steps:110, total reward:6.2704, 0.3155632019042969 sec
Episode 9836, loss:-17.4378, fail, steps:160, total reward:-6.3426, 0.43781065940856934 sec
Episode 9837, loss:-2.7182, fail, steps:161, total reward:-1.6826, 0.451906681060791 sec
Episode 9838, loss:1.8407, succeed, steps:84, total reward:3.6537, 0.24431872367858887 sec
Episode 9839, loss:2.1715, succeed, steps:102, total reward:5.8067, 0.275632381439209 sec
Episode 9840, loss:-4.0222, succeed, steps:118, total reward:4.6611, 0.31653332710266113 sec
Episode 9841, loss:-0.0344, succeed, steps:101, total reward:4.1574, 0.27689027786254883 sec
Episode 9842, loss:-0.4059, succeed, steps:72, total reward:2.2893, 0.19784832000732422 sec
Episode 9843, loss:-0.3607, succeed, steps:28, total reward:1.8124, 0.08118486404418945 sec
Episode 9844, loss:0.9704, succeed, steps:92, total reward:5.0159, 0.23444128036499023 se

Episode 9924, loss:0.5800, succeed, steps:125, total reward:7.9291, 0.3339991569519043 sec
Episode 9925, loss:0.4683, succeed, steps:93, total reward:6.3438, 0.25535106658935547 sec
Episode 9926, loss:1.9293, succeed, steps:98, total reward:8.0445, 0.2637453079223633 sec
Episode 9927, loss:0.6501, succeed, steps:147, total reward:2.3808, 0.39156365394592285 sec
Episode 9928, loss:-9.4652, succeed, steps:124, total reward:-4.7786, 0.3408510684967041 sec
Episode 9929, loss:-0.4120, succeed, steps:76, total reward:3.1179, 0.21100330352783203 sec
Episode 9930, loss:0.0261, succeed, steps:105, total reward:0.5097, 0.28549909591674805 sec
Episode 9931, loss:2.5633, succeed, steps:72, total reward:5.5000, 0.1956615447998047 sec
Episode 9932, loss:0.3383, succeed, steps:89, total reward:5.2823, 0.23638629913330078 sec
Episode 9933, loss:-5.9194, succeed, steps:103, total reward:0.1360, 0.2736654281616211 sec
Episode 9934, loss:0.3621, succeed, steps:91, total reward:4.3438, 0.24619507789611816

Episode 10012, loss:1.7109, succeed, steps:118, total reward:4.0841, 0.3011295795440674 sec
Episode 10013, loss:0.3402, succeed, steps:99, total reward:5.8174, 0.26456403732299805 sec
Episode 10014, loss:0.4285, succeed, steps:59, total reward:2.9027, 0.15961766242980957 sec
Episode 10015, loss:0.4537, succeed, steps:106, total reward:9.2296, 0.2751810550689697 sec
Episode 10016, loss:-1.0260, succeed, steps:105, total reward:1.5589, 0.2798633575439453 sec
Episode 10017, loss:1.1275, succeed, steps:67, total reward:7.0985, 0.18194961547851562 sec
Episode 10018, loss:2.1402, succeed, steps:86, total reward:9.6022, 0.2253551483154297 sec
Episode 10019, loss:0.0868, succeed, steps:127, total reward:6.2691, 0.3893873691558838 sec
Episode 10020, loss:1.7311, succeed, steps:125, total reward:7.2405, 0.33729982376098633 sec
Episode 10021, loss:-4.3563, succeed, steps:105, total reward:4.1368, 0.2870345115661621 sec
Episode 10022, loss:-2.2506, succeed, steps:132, total reward:0.5779, 0.354485

Episode 10101, loss:-11.6066, fail, steps:159, total reward:-13.6624, 0.4511730670928955 sec
Episode 10102, loss:0.1702, succeed, steps:40, total reward:4.1311, 0.12621641159057617 sec
Episode 10103, loss:-6.6601, fail, steps:159, total reward:-13.3762, 0.4122312068939209 sec
Episode 10104, loss:-4.0851, fail, steps:161, total reward:-3.3327, 0.43773913383483887 sec
Episode 10105, loss:-5.2778, fail, steps:160, total reward:-5.6934, 0.4421241283416748 sec
Episode 10106, loss:-6.2350, fail, steps:159, total reward:-11.0648, 0.4404261112213135 sec
Episode 10107, loss:1.3397, succeed, steps:65, total reward:5.6034, 0.19695448875427246 sec
Episode 10108, loss:-4.1671, succeed, steps:90, total reward:-0.0112, 0.2445216178894043 sec
Episode 10109, loss:-2.3763, fail, steps:161, total reward:-0.9712, 0.42595338821411133 sec
Episode 10110, loss:-3.9066, fail, steps:160, total reward:-6.8048, 0.43761658668518066 sec
Episode 10111, loss:1.9753, succeed, steps:53, total reward:6.6562, 0.158260822

Episode 10191, loss:0.2263, succeed, steps:63, total reward:6.9027, 0.16353154182434082 sec
Episode 10192, loss:0.8326, succeed, steps:71, total reward:4.8607, 0.18335676193237305 sec
Episode 10193, loss:-1.2042, succeed, steps:150, total reward:9.1850, 0.3873751163482666 sec
Episode 10194, loss:0.6584, succeed, steps:77, total reward:0.6743, 0.21392011642456055 sec
Episode 10195, loss:4.4593, succeed, steps:32, total reward:3.0181, 0.0897359848022461 sec
Episode 10196, loss:5.0095, succeed, steps:119, total reward:5.1562, 0.30396509170532227 sec
Episode 10197, loss:0.4190, succeed, steps:81, total reward:4.6186, 0.22887635231018066 sec
Episode 10198, loss:-1.1045, succeed, steps:141, total reward:-0.3756, 0.3695187568664551 sec
Episode 10199, loss:1.6937, succeed, steps:129, total reward:0.6607, 0.3497178554534912 sec
Episode 10200, loss:0.2917, succeed, steps:58, total reward:1.6863, 0.16360831260681152 sec
Episode 10201, loss:-1.0597, succeed, steps:126, total reward:1.1043, 0.32946

Episode 10280, loss:-4.4136, succeed, steps:91, total reward:0.1560, 0.2470548152923584 sec
Episode 10281, loss:-4.0605, succeed, steps:82, total reward:-2.2947, 0.22222471237182617 sec
Episode 10282, loss:1.9124, succeed, steps:134, total reward:3.7484, 0.35384345054626465 sec
Episode 10283, loss:-2.5985, succeed, steps:125, total reward:0.8256, 0.34031152725219727 sec
Episode 10284, loss:-0.0355, succeed, steps:76, total reward:2.7572, 0.21074891090393066 sec
Episode 10285, loss:-0.0405, succeed, steps:51, total reward:1.3734, 0.13912677764892578 sec
Episode 10286, loss:-4.5337, succeed, steps:160, total reward:-4.1983, 0.41893458366394043 sec
Episode 10287, loss:2.6711, succeed, steps:90, total reward:-0.1719, 0.2554957866668701 sec
Episode 10288, loss:-1.8306, succeed, steps:98, total reward:4.4567, 0.269625186920166 sec
Episode 10289, loss:-7.8716, succeed, steps:132, total reward:-0.2943, 0.35207366943359375 sec
Episode 10290, loss:2.5464, succeed, steps:65, total reward:8.9748, 

Episode 10369, loss:-0.8058, succeed, steps:124, total reward:3.5577, 0.3339710235595703 sec
Episode 10370, loss:0.4049, succeed, steps:117, total reward:2.7234, 0.3181154727935791 sec
Episode 10371, loss:-9.6305, succeed, steps:160, total reward:1.3510, 0.43477940559387207 sec
Episode 10372, loss:0.5915, succeed, steps:97, total reward:3.0239, 0.2738187313079834 sec
Episode 10373, loss:0.0395, succeed, steps:89, total reward:5.4987, 0.2439861297607422 sec
Episode 10374, loss:2.3640, succeed, steps:90, total reward:9.8701, 0.24155545234680176 sec
Episode 10375, loss:0.8637, succeed, steps:87, total reward:7.4472, 0.23317766189575195 sec
Episode 10376, loss:0.0033, succeed, steps:103, total reward:6.2296, 0.27408552169799805 sec
Episode 10377, loss:-22.5207, fail, steps:160, total reward:-6.2082, 0.4250965118408203 sec
Episode 10378, loss:-10.0931, succeed, steps:106, total reward:-4.9773, 0.2985196113586426 sec
Episode 10379, loss:-3.1154, succeed, steps:122, total reward:-2.6793, 0.33

Episode 10458, loss:3.8729, succeed, steps:72, total reward:5.8607, 0.2087244987487793 sec
Episode 10459, loss:-1.6989, succeed, steps:82, total reward:5.6022, 0.21820974349975586 sec
Episode 10460, loss:0.4768, succeed, steps:52, total reward:1.6356, 0.1404109001159668 sec
Episode 10461, loss:0.2644, succeed, steps:114, total reward:2.7440, 0.29497766494750977 sec
Episode 10462, loss:-0.2351, succeed, steps:83, total reward:6.8907, 0.22494816780090332 sec
Episode 10463, loss:0.1155, succeed, steps:144, total reward:2.6801, 0.3779575824737549 sec
Episode 10464, loss:0.4156, succeed, steps:65, total reward:1.5828, 0.18481922149658203 sec
Episode 10465, loss:0.7233, succeed, steps:92, total reward:5.4880, 0.24298310279846191 sec
Episode 10466, loss:-3.2913, succeed, steps:142, total reward:-2.1863, 0.3797626495361328 sec
Episode 10467, loss:0.2658, succeed, steps:75, total reward:2.1179, 0.21097970008850098 sec
Episode 10468, loss:0.2339, succeed, steps:107, total reward:6.2090, 0.283713

Episode 10547, loss:-15.6267, fail, steps:161, total reward:-1.7548, 0.4395146369934082 sec
Episode 10548, loss:-7.2504, fail, steps:160, total reward:-7.1262, 0.45000672340393066 sec
Episode 10549, loss:-1.1023, fail, steps:160, total reward:-4.7556, 0.44586634635925293 sec
Episode 10550, loss:-1.6462, succeed, steps:130, total reward:3.0313, 0.36601829528808594 sec
Episode 10551, loss:-10.5626, fail, steps:160, total reward:-5.2704, 0.4395437240600586 sec
Episode 10552, loss:-3.4062, fail, steps:161, total reward:-3.0433, 0.44679951667785645 sec
Episode 10553, loss:-7.9530, succeed, steps:108, total reward:1.3318, 0.310488224029541 sec
Episode 10554, loss:-8.8559, fail, steps:160, total reward:-6.6212, 0.43288612365722656 sec
Episode 10555, loss:-10.3692, fail, steps:159, total reward:-12.9205, 0.4942502975463867 sec
Episode 10556, loss:-0.7945, succeed, steps:102, total reward:1.8582, 0.29114270210266113 sec
Episode 10557, loss:-2.5153, fail, steps:161, total reward:0.1108, 0.432846

Episode 10636, loss:-1.3396, succeed, steps:112, total reward:2.1047, 0.31740856170654297 sec
Episode 10637, loss:-4.3611, fail, steps:161, total reward:-0.6105, 0.4384887218475342 sec
Episode 10638, loss:0.0864, succeed, steps:98, total reward:1.1574, 0.28223371505737305 sec
Episode 10639, loss:2.2433, succeed, steps:96, total reward:2.8895, 0.26370787620544434 sec
Episode 10640, loss:-5.8942, fail, steps:161, total reward:-2.4662, 0.4287846088409424 sec
Episode 10641, loss:-6.2259, succeed, steps:151, total reward:0.9365, 0.41588497161865234 sec
Episode 10642, loss:-4.7032, succeed, steps:156, total reward:-0.7342, 0.48939037322998047 sec
Episode 10643, loss:-9.5807, fail, steps:161, total reward:-1.8170, 0.4510927200317383 sec
Episode 10644, loss:0.3937, succeed, steps:128, total reward:0.9592, 0.35881948471069336 sec
Episode 10645, loss:-4.0260, fail, steps:161, total reward:-2.2498, 0.4412245750427246 sec
Episode 10646, loss:-4.6644, succeed, steps:86, total reward:-0.2234, 0.2470

Episode 10725, loss:-11.3693, fail, steps:162, total reward:1.4330, 0.41866278648376465 sec
Episode 10726, loss:-6.4618, succeed, steps:109, total reward:-1.7609, 0.30091238021850586 sec
Episode 10727, loss:-18.7440, fail, steps:161, total reward:-5.4540, 0.4307889938354492 sec
Episode 10728, loss:2.1053, succeed, steps:100, total reward:10.1166, 0.2847111225128174 sec
Episode 10729, loss:1.5815, succeed, steps:67, total reward:3.5828, 0.18816566467285156 sec
Episode 10730, loss:0.5866, succeed, steps:112, total reward:4.6104, 0.2950258255004883 sec
Episode 10731, loss:1.7376, succeed, steps:62, total reward:5.3978, 0.17020392417907715 sec
Episode 10732, loss:-0.2973, succeed, steps:70, total reward:3.5000, 0.18554067611694336 sec
Episode 10733, loss:0.4504, succeed, steps:109, total reward:5.0540, 0.2843940258026123 sec
Episode 10734, loss:-12.7808, fail, steps:161, total reward:-2.1876, 0.42647361755371094 sec
Episode 10735, loss:-0.1633, succeed, steps:51, total reward:1.2127, 0.148

Episode 10814, loss:-3.9237, fail, steps:162, total reward:2.9773, 0.45204854011535645 sec
Episode 10815, loss:-5.4138, fail, steps:161, total reward:-3.6934, 0.44968628883361816 sec
Episode 10816, loss:-1.7789, succeed, steps:99, total reward:-0.9975, 0.28389620780944824 sec
Episode 10817, loss:0.8575, succeed, steps:123, total reward:2.4855, 0.33371496200561523 sec
Episode 10818, loss:1.8292, succeed, steps:111, total reward:4.1875, 0.30577921867370605 sec
Episode 10819, loss:-10.4391, fail, steps:161, total reward:-1.8170, 0.43650054931640625 sec
Episode 10820, loss:1.4331, succeed, steps:77, total reward:3.6851, 0.22128844261169434 sec
Episode 10821, loss:-3.1401, succeed, steps:130, total reward:-1.4286, 0.3464357852935791 sec
Episode 10822, loss:-4.3735, fail, steps:162, total reward:1.9052, 0.438976526260376 sec
Episode 10823, loss:1.7393, succeed, steps:115, total reward:8.3318, 0.3218982219696045 sec
Episode 10824, loss:-8.9465, succeed, steps:154, total reward:0.2044, 0.41875

Episode 10903, loss:-5.1559, fail, steps:161, total reward:-4.2761, 0.4351942539215088 sec
Episode 10904, loss:-16.9898, fail, steps:160, total reward:-7.4868, 0.4417240619659424 sec
Episode 10905, loss:-6.1863, fail, steps:161, total reward:-2.7548, 0.44703149795532227 sec
Episode 10906, loss:0.1664, succeed, steps:78, total reward:1.0251, 0.22674250602722168 sec
Episode 10907, loss:-4.3146, fail, steps:161, total reward:-1.0433, 0.434985876083374 sec
Episode 10908, loss:0.4293, succeed, steps:124, total reward:2.6199, 0.34450721740722656 sec
Episode 10909, loss:-1.7256, succeed, steps:156, total reward:-3.8719, 0.4238133430480957 sec
Episode 10910, loss:0.2286, succeed, steps:85, total reward:4.6537, 0.24331426620483398 sec
Episode 10911, loss:1.8502, succeed, steps:118, total reward:3.5070, 0.3172576427459717 sec
Episode 10912, loss:1.5029, succeed, steps:97, total reward:2.8075, 0.2662780284881592 sec
Episode 10913, loss:-0.0904, succeed, steps:61, total reward:0.5936, 0.1692273616

Episode 10992, loss:0.0481, succeed, steps:106, total reward:5.4975, 0.28405165672302246 sec
Episode 10993, loss:5.0049, succeed, steps:122, total reward:4.1356, 0.32848095893859863 sec
Episode 10994, loss:-4.5797, fail, steps:161, total reward:-2.1056, 0.4371311664581299 sec
Episode 10995, loss:-0.8901, succeed, steps:61, total reward:1.2428, 0.17526721954345703 sec
Episode 10996, loss:-2.0563, succeed, steps:113, total reward:-1.3487, 0.2966320514678955 sec
Episode 10997, loss:11.1306, succeed, steps:63, total reward:6.1814, 0.17325091361999512 sec
Episode 10998, loss:-4.5231, fail, steps:161, total reward:-2.9712, 0.42093586921691895 sec
Episode 10999, loss:-0.0758, succeed, steps:91, total reward:7.7152, 0.25533437728881836 sec
Checkpoint saved at episode 11000 to datasets/rl_sort_transformer_easy/list8_transformer4_192_gamma09_step210_v2/ckpt_11000_0.7160_113.69.pth
Episode 11000, loss:0.9449, succeed, steps:70, total reward:-0.2321, 0.22321796417236328 sec
Episode 11001, loss:1.4

Episode 11080, loss:-10.5572, fail, steps:161, total reward:-2.7647, 0.4428095817565918 sec
Episode 11081, loss:-5.8928, fail, steps:159, total reward:-10.0025, 0.43938517570495605 sec
Episode 11082, loss:1.1434, succeed, steps:106, total reward:1.6933, 0.2988777160644531 sec
Episode 11083, loss:-1.6713, succeed, steps:77, total reward:-0.1913, 0.21323251724243164 sec
Episode 11084, loss:5.4014, succeed, steps:102, total reward:4.1476, 0.27246880531311035 sec
Episode 11085, loss:-0.6337, succeed, steps:104, total reward:3.5696, 0.2800765037536621 sec
Episode 11086, loss:1.5739, succeed, steps:140, total reward:-1.3920, 0.3739786148071289 sec
Episode 11087, loss:-0.2044, succeed, steps:123, total reward:5.1356, 0.3412449359893799 sec
Episode 11088, loss:-4.1156, fail, steps:161, total reward:-4.1253, 0.4352152347564697 sec
Episode 11089, loss:-4.7104, succeed, steps:98, total reward:-3.8796, 0.27821898460388184 sec
Episode 11090, loss:-1.1711, succeed, steps:103, total reward:-1.6674, 0

Episode 11169, loss:-0.5032, succeed, steps:95, total reward:1.0239, 0.25562429428100586 sec
Episode 11170, loss:1.5531, succeed, steps:89, total reward:5.6430, 0.24061918258666992 sec
Episode 11171, loss:-1.4704, succeed, steps:83, total reward:2.6537, 0.22519612312316895 sec
Episode 11172, loss:-7.6925, succeed, steps:159, total reward:-2.3319, 0.4204840660095215 sec
Episode 11173, loss:-23.7944, fail, steps:160, total reward:-9.1468, 0.4404261112213135 sec
Episode 11174, loss:-7.4926, fail, steps:160, total reward:-6.4868, 0.44426679611206055 sec
Episode 11175, loss:0.3633, succeed, steps:54, total reward:0.5692, 0.16013288497924805 sec
Episode 11176, loss:1.2850, succeed, steps:49, total reward:1.5742, 0.13246917724609375 sec
Episode 11177, loss:-2.7932, fail, steps:161, total reward:-3.1975, 0.4140489101409912 sec
Episode 11178, loss:-0.1974, succeed, steps:62, total reward:2.0264, 0.1762387752532959 sec
Episode 11179, loss:-4.1998, succeed, steps:81, total reward:0.8701, 0.213987

Episode 11258, loss:-8.7104, fail, steps:160, total reward:-5.4868, 0.4119079113006592 sec
Episode 11259, loss:-1.0983, succeed, steps:126, total reward:-1.9065, 0.3442037105560303 sec
Episode 11260, loss:0.2006, succeed, steps:46, total reward:3.0276, 0.14320755004882812 sec
Episode 11261, loss:-8.3244, fail, steps:161, total reward:-3.9483, 0.41872429847717285 sec
Episode 11262, loss:0.8016, succeed, steps:95, total reward:0.9518, 0.26495909690856934 sec
Episode 11263, loss:2.1084, succeed, steps:80, total reward:2.1595, 0.21839165687561035 sec
Episode 11264, loss:-7.0881, succeed, steps:154, total reward:-3.6555, 0.40536952018737793 sec
Episode 11265, loss:1.6361, succeed, steps:72, total reward:4.6344, 0.20297908782958984 sec
Episode 11266, loss:1.3518, succeed, steps:134, total reward:-0.5771, 0.408827543258667 sec
Episode 11267, loss:-5.7024, fail, steps:162, total reward:-1.0532, 0.4417572021484375 sec
Episode 11268, loss:0.4138, succeed, steps:69, total reward:5.1500, 0.1977157

Episode 11347, loss:2.5758, succeed, steps:79, total reward:6.1179, 0.21801066398620605 sec
Episode 11348, loss:0.2583, succeed, steps:57, total reward:3.7856, 0.1551342010498047 sec
Episode 11349, loss:1.2260, succeed, steps:76, total reward:2.0851, 0.20092368125915527 sec
Episode 11350, loss:-11.3541, fail, steps:162, total reward:0.6067, 0.4217555522918701 sec
Episode 11351, loss:-17.4186, fail, steps:160, total reward:-9.2910, 0.4369659423828125 sec
Episode 11352, loss:1.9317, succeed, steps:66, total reward:6.0428, 0.19083523750305176 sec
Episode 11353, loss:-2.4401, succeed, steps:149, total reward:-5.0841, 0.39751577377319336 sec
Episode 11354, loss:1.0509, succeed, steps:59, total reward:5.9856, 0.16899991035461426 sec
Episode 11355, loss:-24.7264, fail, steps:161, total reward:-3.8376, 0.42046356201171875 sec
Episode 11356, loss:3.6712, succeed, steps:97, total reward:6.6838, 0.2736852169036865 sec
Episode 11357, loss:-9.9653, fail, steps:161, total reward:-2.3220, 0.431121110

Episode 11436, loss:-1.7315, fail, steps:162, total reward:1.3281, 0.4333789348602295 sec
Episode 11437, loss:-1.9074, succeed, steps:113, total reward:2.8161, 0.3149418830871582 sec
Episode 11438, loss:2.5702, succeed, steps:107, total reward:5.9696, 0.29259228706359863 sec
Episode 11439, loss:1.1218, succeed, steps:98, total reward:7.5396, 0.26857519149780273 sec
Episode 11440, loss:-8.2268, fail, steps:161, total reward:-0.5384, 0.43093204498291016 sec
Episode 11441, loss:-3.3815, fail, steps:162, total reward:1.3281, 0.4438345432281494 sec
Episode 11442, loss:-0.0908, succeed, steps:78, total reward:1.0251, 0.22575139999389648 sec
Episode 11443, loss:0.2220, succeed, steps:90, total reward:2.8388, 0.24231815338134766 sec
Episode 11444, loss:-0.4248, succeed, steps:122, total reward:5.0733, 0.32376813888549805 sec
Episode 11445, loss:-0.5523, succeed, steps:126, total reward:5.6298, 0.3407909870147705 sec
Episode 11446, loss:3.8010, succeed, steps:98, total reward:4.6731, 0.27424359

Episode 11525, loss:-4.8277, fail, steps:162, total reward:2.2559, 0.42685437202453613 sec
Episode 11526, loss:0.4626, succeed, steps:103, total reward:5.8132, 0.28840208053588867 sec
Episode 11527, loss:-8.9840, fail, steps:160, total reward:-6.8574, 0.4291214942932129 sec
Episode 11528, loss:0.3386, succeed, steps:69, total reward:9.1706, 0.19745278358459473 sec
Episode 11529, loss:1.3570, succeed, steps:108, total reward:0.1219, 0.2868516445159912 sec
Episode 11530, loss:0.9692, succeed, steps:96, total reward:5.1560, 0.2634925842285156 sec
Episode 11531, loss:4.8659, succeed, steps:100, total reward:6.6010, 0.26938915252685547 sec
Episode 11532, loss:0.8641, succeed, steps:110, total reward:9.7139, 0.29578423500061035 sec
Episode 11533, loss:3.8528, succeed, steps:123, total reward:4.4142, 0.33074498176574707 sec
Episode 11534, loss:-3.6845, succeed, steps:158, total reward:-0.1769, 0.4279673099517822 sec
Episode 11535, loss:-5.4076, fail, steps:161, total reward:-0.6105, 0.4426143

Episode 11614, loss:1.6394, succeed, steps:125, total reward:7.7619, 0.3343832492828369 sec
Episode 11615, loss:0.1834, succeed, steps:59, total reward:2.9748, 0.1668384075164795 sec
Episode 11616, loss:1.9587, succeed, steps:146, total reward:5.7621, 0.39123082160949707 sec
Episode 11617, loss:1.3096, succeed, steps:57, total reward:4.4184, 0.16240358352661133 sec
Episode 11618, loss:-0.7475, succeed, steps:103, total reward:3.2189, 0.2702298164367676 sec
Episode 11619, loss:-4.5747, fail, steps:161, total reward:-2.6105, 0.4266941547393799 sec
Episode 11620, loss:-0.6250, succeed, steps:109, total reward:5.5589, 0.30363965034484863 sec
Episode 11621, loss:0.5330, succeed, steps:87, total reward:3.7316, 0.23855113983154297 sec
Episode 11622, loss:-1.0071, succeed, steps:82, total reward:2.3029, 0.220689058303833 sec
Episode 11623, loss:0.2597, succeed, steps:123, total reward:5.8569, 0.33009958267211914 sec
Episode 11624, loss:1.9390, succeed, steps:47, total reward:4.3326, 0.13296008

Episode 11703, loss:4.5956, succeed, steps:87, total reward:6.8701, 0.24189043045043945 sec
Episode 11704, loss:0.5469, succeed, steps:115, total reward:1.2447, 0.3085956573486328 sec
Episode 11705, loss:-1.9089, succeed, steps:113, total reward:2.7440, 0.30534863471984863 sec
Episode 11706, loss:7.7877, succeed, steps:103, total reward:9.4567, 0.27992749214172363 sec
Episode 11707, loss:0.0957, succeed, steps:51, total reward:4.5841, 0.14153218269348145 sec
Episode 11708, loss:1.9374, succeed, steps:82, total reward:1.9029, 0.21443462371826172 sec
Episode 11709, loss:-0.6409, succeed, steps:86, total reward:3.0758, 0.22579431533813477 sec
Episode 11710, loss:2.2476, succeed, steps:118, total reward:3.5070, 0.3101017475128174 sec
Episode 11711, loss:4.6738, succeed, steps:81, total reward:7.6129, 0.22047853469848633 sec
Episode 11712, loss:1.0701, succeed, steps:53, total reward:2.9963, 0.15449213981628418 sec
Episode 11713, loss:-0.2112, succeed, steps:72, total reward:2.2007, 0.18721

Episode 11792, loss:-16.3713, fail, steps:160, total reward:-10.3468, 0.4232175350189209 sec
Episode 11793, loss:-11.0920, fail, steps:160, total reward:-7.4868, 0.43462562561035156 sec
Episode 11794, loss:-0.9819, succeed, steps:103, total reward:5.7246, 0.296126127243042 sec
Episode 11795, loss:1.5823, succeed, steps:69, total reward:6.2321, 0.19010210037231445 sec
Episode 11796, loss:4.2354, succeed, steps:68, total reward:4.9435, 0.18120932579040527 sec
Episode 11797, loss:-1.8970, succeed, steps:123, total reward:1.7970, 0.3215944766998291 sec
Episode 11798, loss:-8.0545, fail, steps:160, total reward:-5.4868, 0.4278137683868408 sec
Episode 11799, loss:0.5922, succeed, steps:111, total reward:4.1154, 0.3096761703491211 sec
Episode 11800, loss:-2.6137, succeed, steps:93, total reward:2.7560, 0.2563941478729248 sec
Episode 11801, loss:3.6500, succeed, steps:89, total reward:5.7873, 0.24141860008239746 sec
Episode 11802, loss:-6.5986, fail, steps:161, total reward:-4.4147, 0.43044710

Episode 11881, loss:0.0608, succeed, steps:76, total reward:2.9015, 0.21158647537231445 sec
Episode 11882, loss:0.3020, succeed, steps:92, total reward:4.7274, 0.2469468116760254 sec
Episode 11883, loss:-0.7307, succeed, steps:104, total reward:3.5696, 0.27782535552978516 sec
Episode 11884, loss:-5.6714, succeed, steps:102, total reward:4.8689, 0.2757561206817627 sec
Episode 11885, loss:-3.3994, succeed, steps:118, total reward:7.8161, 0.3170156478881836 sec
Episode 11886, loss:-0.9633, succeed, steps:115, total reward:8.6203, 0.3174560070037842 sec
Episode 11887, loss:0.6471, succeed, steps:73, total reward:6.1393, 0.20212101936340332 sec
Episode 11888, loss:-16.4863, fail, steps:162, total reward:1.4002, 0.4276010990142822 sec
Episode 11889, loss:-0.2933, succeed, steps:83, total reward:3.1587, 0.2353215217590332 sec
Episode 11890, loss:1.8675, succeed, steps:88, total reward:3.9381, 0.23851609230041504 sec
Episode 11891, loss:0.4988, succeed, steps:71, total reward:4.6771, 0.1915605

Episode 11970, loss:0.4017, succeed, steps:104, total reward:3.4975, 0.28524160385131836 sec
Episode 11971, loss:0.0010, succeed, steps:113, total reward:1.8391, 0.3063771724700928 sec
Episode 11972, loss:-1.0413, succeed, steps:104, total reward:3.6418, 0.28490567207336426 sec
Episode 11973, loss:1.1145, succeed, steps:61, total reward:8.0577, 0.1686687469482422 sec
Episode 11974, loss:0.2568, succeed, steps:86, total reward:9.3137, 0.22682571411132812 sec
Episode 11975, loss:2.7145, succeed, steps:112, total reward:7.3982, 0.30849552154541016 sec
Episode 11976, loss:1.7260, succeed, steps:63, total reward:3.3870, 0.17282986640930176 sec
Episode 11977, loss:0.2725, succeed, steps:109, total reward:5.1261, 0.28603434562683105 sec
Episode 11978, loss:-1.3746, fail, steps:161, total reward:-2.8990, 0.4265108108520508 sec
Episode 11979, loss:0.9266, succeed, steps:111, total reward:3.4269, 0.30943822860717773 sec
Episode 11980, loss:1.9816, succeed, steps:109, total reward:8.7139, 0.29717

Episode 12058, loss:1.3580, succeed, steps:106, total reward:8.7968, 0.29668760299682617 sec
Episode 12059, loss:0.1780, succeed, steps:76, total reward:2.0293, 0.21466660499572754 sec
Episode 12060, loss:1.0277, succeed, steps:63, total reward:3.1706, 0.1705174446105957 sec
Episode 12061, loss:-0.7730, succeed, steps:109, total reward:1.9711, 0.28486084938049316 sec
Episode 12062, loss:0.0377, succeed, steps:46, total reward:3.2604, 0.12845349311828613 sec
Episode 12063, loss:1.9344, succeed, steps:124, total reward:10.8054, 0.32115912437438965 sec
Episode 12064, loss:0.1207, succeed, steps:67, total reward:4.0878, 0.18424463272094727 sec
Episode 12065, loss:-0.6146, succeed, steps:113, total reward:1.6555, 0.2985694408416748 sec
Episode 12066, loss:-8.9792, succeed, steps:154, total reward:3.0709, 0.4095017910003662 sec
Episode 12067, loss:0.4933, fail, steps:162, total reward:0.3895, 0.4461965560913086 sec
Episode 12068, loss:0.0919, succeed, steps:63, total reward:3.4592, 0.1834359

Episode 12147, loss:-4.5740, succeed, steps:120, total reward:6.3005, 0.3163797855377197 sec
Episode 12148, loss:1.2216, succeed, steps:66, total reward:2.3271, 0.18264102935791016 sec
Episode 12149, loss:-18.0371, fail, steps:160, total reward:-5.7655, 0.41977524757385254 sec
Episode 12150, loss:0.8622, succeed, steps:99, total reward:8.9724, 0.2765932083129883 sec
Episode 12151, loss:9.7456, succeed, steps:83, total reward:6.6022, 0.2283940315246582 sec
Episode 12152, loss:3.8812, succeed, steps:52, total reward:2.3570, 0.14139652252197266 sec
Episode 12153, loss:0.6907, succeed, steps:90, total reward:10.4472, 0.23532867431640625 sec
Episode 12154, loss:-0.8001, succeed, steps:127, total reward:6.5577, 0.39226722717285156 sec
Episode 12155, loss:4.5046, succeed, steps:132, total reward:1.3714, 0.3562626838684082 sec
Episode 12156, loss:0.5806, succeed, steps:111, total reward:7.4147, 0.3053257465362549 sec
Episode 12157, loss:-0.2255, succeed, steps:105, total reward:5.0746, 0.28748

Episode 12236, loss:-1.7604, fail, steps:161, total reward:-3.4770, 0.43709492683410645 sec
Episode 12237, loss:-3.1882, fail, steps:160, total reward:-5.4048, 0.4421360492706299 sec
Episode 12238, loss:-1.5226, succeed, steps:136, total reward:2.2164, 0.3816862106323242 sec
Episode 12239, loss:-5.5173, fail, steps:161, total reward:-1.1155, 0.4437289237976074 sec
Episode 12240, loss:0.7741, succeed, steps:156, total reward:-0.0850, 0.4353146553039551 sec
Episode 12241, loss:0.7118, succeed, steps:106, total reward:5.7139, 0.3028891086578369 sec
Episode 12242, loss:-0.4329, succeed, steps:117, total reward:0.2897, 0.32038259506225586 sec
Episode 12243, loss:1.1621, succeed, steps:88, total reward:1.1995, 0.24555015563964844 sec
Episode 12244, loss:0.4114, succeed, steps:121, total reward:3.7848, 0.32486605644226074 sec
Episode 12245, loss:-4.9917, succeed, steps:102, total reward:1.7139, 0.28115177154541016 sec
Episode 12246, loss:-3.4490, fail, steps:161, total reward:-2.1056, 0.43569

Episode 12325, loss:-14.6944, fail, steps:160, total reward:-6.1983, 0.44595813751220703 sec
Episode 12326, loss:-11.0153, fail, steps:159, total reward:-11.3632, 0.44496798515319824 sec
Episode 12327, loss:-9.8486, fail, steps:160, total reward:-5.4147, 0.5054950714111328 sec
Episode 12328, loss:-14.4421, fail, steps:159, total reward:-11.5796, 0.4454941749572754 sec
Episode 12329, loss:-41.6884, fail, steps:8, total reward:-9.9592, 0.04008364677429199 sec
Episode 12330, loss:-27.0318, fail, steps:160, total reward:-9.5796, 0.4032726287841797 sec
Episode 12331, loss:-19.3479, fail, steps:160, total reward:-11.5796, 0.426922082901001 sec
Episode 12332, loss:-15.9098, fail, steps:159, total reward:-11.2582, 0.4385993480682373 sec
Episode 12333, loss:-22.6820, fail, steps:160, total reward:-9.3468, 0.4413738250732422 sec
Episode 12334, loss:-22.0279, fail, steps:161, total reward:-1.6204, 0.44469714164733887 sec
Episode 12335, loss:-13.2532, fail, steps:159, total reward:-13.3018, 0.4414

Episode 12414, loss:-11.6025, fail, steps:161, total reward:-4.2704, 0.4217069149017334 sec
Episode 12415, loss:-14.2066, fail, steps:160, total reward:-6.3426, 0.4363434314727783 sec
Episode 12416, loss:3.7856, succeed, steps:133, total reward:2.4435, 0.3701441287994385 sec
Episode 12417, loss:2.2035, succeed, steps:90, total reward:6.7873, 0.25153350830078125 sec
Episode 12418, loss:1.7583, succeed, steps:85, total reward:1.7873, 0.2315843105316162 sec
Episode 12419, loss:0.3404, succeed, steps:75, total reward:4.9843, 0.20280909538269043 sec
Episode 12420, loss:4.6782, succeed, steps:99, total reward:8.9724, 0.2623100280761719 sec
Episode 12421, loss:3.0409, succeed, steps:77, total reward:4.1179, 0.20838189125061035 sec
Episode 12422, loss:-3.4026, succeed, steps:126, total reward:1.9076, 0.3429446220397949 sec
Episode 12423, loss:1.8158, succeed, steps:145, total reward:6.9072, 0.3901364803314209 sec
Episode 12424, loss:1.9711, succeed, steps:45, total reward:5.5433, 0.13181424140

Episode 12503, loss:-21.4067, fail, steps:159, total reward:-11.8582, 0.42557406425476074 sec
Episode 12504, loss:-3.6101, succeed, steps:127, total reward:2.4649, 0.3542289733886719 sec
Episode 12505, loss:-15.7658, fail, steps:161, total reward:-4.7032, 0.4432053565979004 sec
Episode 12506, loss:-7.8882, fail, steps:160, total reward:-8.1361, 0.44309329986572266 sec
Episode 12507, loss:-4.2249, succeed, steps:91, total reward:0.7560, 0.31336307525634766 sec
Episode 12508, loss:-1.5626, fail, steps:161, total reward:-1.5384, 0.43114352226257324 sec
Episode 12509, loss:1.4756, succeed, steps:98, total reward:4.0239, 0.2774970531463623 sec
Episode 12510, loss:-7.1646, fail, steps:161, total reward:-2.9712, 0.4398841857910156 sec
Episode 12511, loss:3.3119, succeed, steps:79, total reward:-1.3463, 0.22600030899047852 sec
Episode 12512, loss:-9.1522, fail, steps:161, total reward:-0.6826, 0.42734456062316895 sec
Episode 12513, loss:-23.6363, fail, steps:160, total reward:-11.8976, 0.43998

Episode 12592, loss:-53.9176, fail, steps:160, total reward:-9.5468, 0.44490694999694824 sec
Episode 12593, loss:-16.6684, fail, steps:160, total reward:-5.6311, 0.44771766662597656 sec
Episode 12594, loss:-7.9732, succeed, steps:96, total reward:-0.9147, 0.2761225700378418 sec
Episode 12595, loss:-2.1676, succeed, steps:67, total reward:2.7664, 0.18584680557250977 sec
Episode 12596, loss:1.5651, succeed, steps:56, total reward:-1.3465, 0.15354514122009277 sec
Episode 12597, loss:-13.8226, fail, steps:161, total reward:-4.4704, 0.42164039611816406 sec
Episode 12598, loss:-0.1687, succeed, steps:69, total reward:2.2114, 0.19572830200195312 sec
Episode 12599, loss:-29.6849, fail, steps:161, total reward:-4.1975, 0.42522263526916504 sec
Episode 12600, loss:-58.5185, fail, steps:159, total reward:-11.5075, 0.43318629264831543 sec
Episode 12601, loss:-23.9303, fail, steps:161, total reward:-5.4967, 0.44505977630615234 sec
Episode 12602, loss:-70.4462, fail, steps:159, total reward:-14.4559,

Episode 12681, loss:-20.9273, succeed, steps:153, total reward:-4.6719, 0.42810845375061035 sec
Episode 12682, loss:-13.7394, fail, steps:162, total reward:-0.1155, 0.451662540435791 sec
Episode 12683, loss:-2.6984, succeed, steps:153, total reward:2.1430, 0.4287080764770508 sec
Episode 12684, loss:0.0140, succeed, steps:88, total reward:3.4889, 0.3046860694885254 sec
Episode 12685, loss:-25.3365, fail, steps:160, total reward:-6.7754, 0.4305887222290039 sec
Episode 12686, loss:-11.3745, succeed, steps:122, total reward:-8.1065, 0.34288740158081055 sec
Episode 12687, loss:-9.0284, succeed, steps:92, total reward:-6.0918, 0.26090049743652344 sec
Episode 12688, loss:-17.8224, fail, steps:160, total reward:-5.6212, 0.4307577610015869 sec
Episode 12689, loss:0.8998, succeed, steps:61, total reward:-2.9286, 0.1777958869934082 sec
Episode 12690, loss:2.7961, succeed, steps:77, total reward:7.2729, 0.20623135566711426 sec
Episode 12691, loss:3.0746, succeed, steps:82, total reward:5.4579, 0.2

Episode 12770, loss:4.4251, succeed, steps:91, total reward:8.1480, 0.2581348419189453 sec
Episode 12771, loss:-2.3426, succeed, steps:124, total reward:0.2584, 0.3355696201324463 sec
Episode 12772, loss:2.1553, succeed, steps:80, total reward:3.7465, 0.2218306064605713 sec
Episode 12773, loss:-0.6300, succeed, steps:75, total reward:1.7736, 0.20314359664916992 sec
Episode 12774, loss:0.0332, succeed, steps:68, total reward:4.8878, 0.18213844299316406 sec
Episode 12775, loss:4.0907, succeed, steps:117, total reward:6.5276, 0.35630226135253906 sec
Episode 12776, loss:1.7260, succeed, steps:113, total reward:2.5276, 0.3059866428375244 sec
Episode 12777, loss:2.3768, succeed, steps:94, total reward:7.4880, 0.25789713859558105 sec
Episode 12778, loss:2.9697, succeed, steps:103, total reward:6.3017, 0.28238797187805176 sec
Episode 12779, loss:2.4522, succeed, steps:129, total reward:5.5470, 0.347491979598999 sec
Episode 12780, loss:2.3056, succeed, steps:68, total reward:4.2386, 0.190775394

Episode 12859, loss:-4.3012, succeed, steps:146, total reward:-1.2692, 0.3810262680053711 sec
Episode 12860, loss:1.7012, succeed, steps:128, total reward:4.6191, 0.40784335136413574 sec
Episode 12861, loss:0.0556, succeed, steps:53, total reward:3.5012, 0.15166664123535156 sec
Episode 12862, loss:2.3432, succeed, steps:90, total reward:3.4880, 0.2360544204711914 sec
Episode 12863, loss:-0.5639, succeed, steps:114, total reward:0.1562, 0.3021557331085205 sec
Episode 12864, loss:-2.7883, fail, steps:161, total reward:-1.7449, 0.4306297302246094 sec
Episode 12865, loss:3.1774, fail, steps:162, total reward:3.2658, 0.4442160129547119 sec
Episode 12866, loss:-4.1085, succeed, steps:100, total reward:-0.6697, 0.2855236530303955 sec
Episode 12867, loss:-8.1770, succeed, steps:102, total reward:2.0025, 0.2832791805267334 sec
Episode 12868, loss:-1.7700, succeed, steps:148, total reward:1.3079, 0.3957381248474121 sec
Episode 12869, loss:-23.5259, fail, steps:160, total reward:-6.7655, 0.438413

Episode 12948, loss:0.2244, succeed, steps:99, total reward:2.0132, 0.27280116081237793 sec
Episode 12949, loss:2.3714, succeed, steps:113, total reward:9.6311, 0.30639147758483887 sec
Episode 12950, loss:6.0413, succeed, steps:45, total reward:5.1269, 0.12836217880249023 sec
Episode 12951, loss:1.4870, succeed, steps:93, total reward:6.5602, 0.24131560325622559 sec
Episode 12952, loss:-0.1887, succeed, steps:90, total reward:6.6430, 0.2396531105041504 sec
Episode 12953, loss:-4.4574, fail, steps:162, total reward:3.3445, 0.42681074142456055 sec
Episode 12954, loss:0.0871, succeed, steps:94, total reward:3.5396, 0.26245999336242676 sec
Episode 12955, loss:0.2594, succeed, steps:69, total reward:2.3557, 0.19167709350585938 sec
Episode 12956, loss:1.4657, succeed, steps:124, total reward:1.9085, 0.323976993560791 sec
Episode 12957, loss:-4.1145, succeed, steps:139, total reward:1.6450, 0.37361741065979004 sec
Episode 12958, loss:-0.2532, succeed, steps:94, total reward:3.4674, 0.26233410

Episode 13037, loss:3.0512, succeed, steps:100, total reward:10.1888, 0.2601752281188965 sec
Episode 13038, loss:0.7391, succeed, steps:93, total reward:5.8388, 0.256742000579834 sec
Episode 13039, loss:0.3263, succeed, steps:87, total reward:7.0144, 0.23382234573364258 sec
Episode 13040, loss:0.4521, succeed, steps:97, total reward:7.4052, 0.25932788848876953 sec
Episode 13041, loss:0.3714, succeed, steps:87, total reward:6.9423, 0.23463082313537598 sec
Episode 13042, loss:-0.5789, succeed, steps:120, total reward:2.9291, 0.3193387985229492 sec
Episode 13043, loss:0.6839, succeed, steps:79, total reward:6.0457, 0.2172086238861084 sec
Episode 13044, loss:-1.2421, succeed, steps:49, total reward:-0.4988, 0.13405871391296387 sec
Episode 13045, loss:2.0324, succeed, steps:100, total reward:6.7452, 0.25998735427856445 sec
Episode 13046, loss:1.4157, succeed, steps:58, total reward:4.8577, 0.1567373275756836 sec
Episode 13047, loss:0.3660, succeed, steps:103, total reward:6.2296, 0.27204918

Episode 13126, loss:0.5013, succeed, steps:103, total reward:5.6132, 0.26787447929382324 sec
Episode 13127, loss:-0.4743, succeed, steps:77, total reward:-0.5093, 0.2603118419647217 sec
Episode 13128, loss:0.2731, succeed, steps:82, total reward:6.2514, 0.2255408763885498 sec
Episode 13129, loss:0.0614, succeed, steps:91, total reward:-1.9662, 0.2421553134918213 sec
Episode 13130, loss:3.1818, succeed, steps:77, total reward:6.9122, 0.20731139183044434 sec
Episode 13131, loss:0.9068, succeed, steps:68, total reward:5.4485, 0.18110370635986328 sec
Episode 13132, loss:2.2966, succeed, steps:113, total reward:6.5482, 0.296558141708374 sec
Episode 13133, loss:1.3740, succeed, steps:119, total reward:5.8775, 0.31881070137023926 sec
Episode 13134, loss:0.6050, succeed, steps:75, total reward:5.3450, 0.20633363723754883 sec
Episode 13135, loss:2.4960, succeed, steps:72, total reward:5.6443, 0.1923210620880127 sec
Episode 13136, loss:0.0187, succeed, steps:23, total reward:3.7716, 0.0660898685

Episode 13216, loss:-7.5666, fail, steps:160, total reward:-5.3327, 0.5069131851196289 sec
Episode 13217, loss:-10.5862, fail, steps:160, total reward:-6.9819, 0.4505276679992676 sec
Episode 13218, loss:-13.1083, fail, steps:159, total reward:-9.7762, 0.4470679759979248 sec
Episode 13219, loss:-12.5667, fail, steps:160, total reward:-8.2484, 0.44994497299194336 sec
Episode 13220, loss:-37.4302, fail, steps:159, total reward:-16.6591, 0.44788622856140137 sec
Episode 13221, loss:-12.5269, fail, steps:160, total reward:-5.6212, 0.4526350498199463 sec
Episode 13222, loss:-22.1249, fail, steps:160, total reward:-4.9327, 0.45075106620788574 sec
Episode 13223, loss:-13.4034, fail, steps:159, total reward:-10.2090, 0.4478268623352051 sec
Episode 13224, loss:-9.7199, fail, steps:160, total reward:-6.8606, 0.44945383071899414 sec
Episode 13225, loss:-7.0542, fail, steps:160, total reward:-6.3327, 0.4520456790924072 sec
Episode 13226, loss:-13.5768, fail, steps:160, total reward:-7.7041, 0.453248

Episode 13305, loss:0.4297, succeed, steps:95, total reward:2.1059, 0.25786280632019043 sec
Episode 13306, loss:-0.6134, succeed, steps:116, total reward:3.1661, 0.3110086917877197 sec
Episode 13307, loss:6.3311, succeed, steps:92, total reward:5.7930, 0.3065826892852783 sec
Episode 13308, loss:-1.6701, succeed, steps:161, total reward:4.0494, 0.4270052909851074 sec
Episode 13309, loss:0.4855, succeed, steps:106, total reward:2.7753, 0.29653429985046387 sec
Episode 13310, loss:-4.7448, succeed, steps:98, total reward:1.0853, 0.2683370113372803 sec
Episode 13311, loss:1.7420, succeed, steps:106, total reward:5.8582, 0.29075121879577637 sec
Episode 13312, loss:4.3053, succeed, steps:94, total reward:7.6323, 0.2556190490722656 sec
Episode 13313, loss:1.1505, succeed, steps:111, total reward:0.8883, 0.29779744148254395 sec
Episode 13314, loss:1.8298, succeed, steps:107, total reward:10.2296, 0.28990745544433594 sec
Episode 13315, loss:3.2274, succeed, steps:114, total reward:7.2597, 0.3092

Episode 13394, loss:-1.4668, succeed, steps:108, total reward:-1.8232, 0.29175353050231934 sec
Episode 13395, loss:5.8250, succeed, steps:114, total reward:7.9810, 0.30793237686157227 sec
Episode 13396, loss:-0.8676, succeed, steps:118, total reward:4.6611, 0.31955671310424805 sec
Episode 13397, loss:0.0749, succeed, steps:59, total reward:2.7584, 0.16494202613830566 sec
Episode 13398, loss:0.3564, succeed, steps:47, total reward:4.3883, 0.1269381046295166 sec
Episode 13399, loss:-0.5551, succeed, steps:97, total reward:0.6952, 0.2549605369567871 sec
Episode 13400, loss:-1.1544, succeed, steps:123, total reward:-1.3973, 0.3246753215789795 sec
Episode 13401, loss:1.3083, succeed, steps:97, total reward:7.7658, 0.26432037353515625 sec
Episode 13402, loss:0.0539, succeed, steps:94, total reward:4.0773, 0.25347280502319336 sec
Episode 13403, loss:1.0262, succeed, steps:95, total reward:5.2609, 0.25569772720336914 sec
Episode 13404, loss:-21.9630, fail, steps:161, total reward:-0.5384, 0.42

Episode 13483, loss:3.0544, succeed, steps:104, total reward:3.8582, 0.28919291496276855 sec
Episode 13484, loss:-6.9466, fail, steps:161, total reward:-2.6204, 0.4335203170776367 sec
Episode 13485, loss:-1.1898, succeed, steps:107, total reward:5.9204, 0.3021371364593506 sec
Episode 13486, loss:-1.8372, succeed, steps:109, total reward:2.1875, 0.29839420318603516 sec
Episode 13487, loss:-1.2821, succeed, steps:110, total reward:-0.6167, 0.29973435401916504 sec
Episode 13488, loss:-0.7276, succeed, steps:91, total reward:4.1995, 0.2502129077911377 sec
Episode 13489, loss:0.2739, succeed, steps:70, total reward:3.7164, 0.19018268585205078 sec
Episode 13490, loss:3.9263, succeed, steps:90, total reward:6.1709, 0.23855996131896973 sec
Episode 13491, loss:0.7645, succeed, steps:96, total reward:2.7452, 0.25883960723876953 sec
Episode 13492, loss:-17.5821, fail, steps:160, total reward:-7.0212, 0.42246031761169434 sec
Episode 13493, loss:-8.2251, fail, steps:162, total reward:3.8330, 0.4414

Episode 13572, loss:-0.1882, succeed, steps:104, total reward:4.0746, 0.2970728874206543 sec
Episode 13573, loss:-8.7519, fail, steps:161, total reward:-0.4662, 0.4335758686065674 sec
Episode 13574, loss:-7.9498, fail, steps:162, total reward:2.7609, 0.4459702968597412 sec
Episode 13575, loss:-1.8577, succeed, steps:77, total reward:3.6851, 0.22431635856628418 sec
Episode 13576, loss:-0.4206, succeed, steps:107, total reward:6.9467, 0.28664541244506836 sec
Episode 13577, loss:1.5610, succeed, steps:111, total reward:10.9303, 0.2985420227050781 sec
Episode 13578, loss:0.7970, succeed, steps:97, total reward:7.0609, 0.2648491859436035 sec
Episode 13579, loss:0.2507, succeed, steps:64, total reward:3.8985, 0.23749327659606934 sec
Episode 13580, loss:0.6227, succeed, steps:100, total reward:10.4773, 0.26442503929138184 sec
Episode 13581, loss:-0.7828, succeed, steps:73, total reward:3.5614, 0.1967313289642334 sec
Episode 13582, loss:-29.3116, fail, steps:161, total reward:-2.8990, 0.422674

Episode 13661, loss:2.9332, succeed, steps:97, total reward:7.2609, 0.2613511085510254 sec
Episode 13662, loss:-2.9046, succeed, steps:140, total reward:3.2057, 0.3726181983947754 sec
Episode 13663, loss:1.0036, succeed, steps:128, total reward:8.0626, 0.35469484329223633 sec
Episode 13664, loss:3.9002, succeed, steps:35, total reward:2.2861, 0.10424375534057617 sec
Episode 13665, loss:5.6160, succeed, steps:91, total reward:8.1480, 0.23539328575134277 sec
Episode 13666, loss:1.1531, succeed, steps:81, total reward:4.8186, 0.21466660499572754 sec
Episode 13667, loss:1.2406, succeed, steps:68, total reward:5.3763, 0.18158674240112305 sec
Episode 13668, loss:2.4393, succeed, steps:45, total reward:5.7597, 0.12087368965148926 sec
Episode 13669, loss:2.9538, succeed, steps:132, total reward:9.2683, 0.3386876583099365 sec
Episode 13670, loss:0.1998, succeed, steps:99, total reward:9.1888, 0.26819658279418945 sec
Episode 13671, loss:-4.2920, fail, steps:161, total reward:-1.2498, 0.429962396

Episode 13750, loss:-6.4820, fail, steps:161, total reward:-2.5384, 0.4150218963623047 sec
Episode 13751, loss:-21.9817, fail, steps:160, total reward:-6.6311, 0.4321138858795166 sec
Episode 13752, loss:4.9985, succeed, steps:112, total reward:8.0540, 0.31465935707092285 sec
Episode 13753, loss:0.2154, succeed, steps:82, total reward:9.2621, 0.23206472396850586 sec
Episode 13754, loss:-0.0410, succeed, steps:101, total reward:1.2189, 0.26947808265686035 sec
Episode 13755, loss:0.5494, succeed, steps:95, total reward:8.7044, 0.2581205368041992 sec
Episode 13756, loss:-1.2585, succeed, steps:79, total reward:9.5614, 0.21493792533874512 sec
Episode 13757, loss:-0.3514, succeed, steps:82, total reward:1.9423, 0.21987485885620117 sec
Episode 13758, loss:-1.3780, fail, steps:163, total reward:5.1937, 0.4272933006286621 sec
Episode 13759, loss:-1.0902, succeed, steps:127, total reward:3.2584, 0.3514435291290283 sec
Episode 13760, loss:0.0585, succeed, steps:84, total reward:7.5465, 0.23944878

Episode 13840, loss:0.8964, succeed, steps:87, total reward:4.2201, 0.22589755058288574 sec
Episode 13841, loss:-0.4496, succeed, steps:115, total reward:5.0325, 0.3022480010986328 sec
Episode 13842, loss:0.6834, succeed, steps:69, total reward:6.3042, 0.18786954879760742 sec
Episode 13843, loss:-7.9843, succeed, steps:151, total reward:3.8521, 0.3992278575897217 sec
Episode 13844, loss:0.6375, succeed, steps:68, total reward:1.2114, 0.19109725952148438 sec
Episode 13845, loss:-1.0722, succeed, steps:111, total reward:4.2597, 0.2928433418273926 sec
Episode 13846, loss:0.8378, succeed, steps:103, total reward:3.2189, 0.2801327705383301 sec
Episode 13847, loss:0.2000, succeed, steps:75, total reward:5.3614, 0.21289920806884766 sec
Episode 13848, loss:0.2484, succeed, steps:41, total reward:5.1311, 0.11193656921386719 sec
Episode 13849, loss:0.1358, succeed, steps:94, total reward:4.7658, 0.24219918251037598 sec
Episode 13850, loss:0.3762, succeed, steps:102, total reward:5.6624, 0.269124

Episode 13929, loss:2.7692, succeed, steps:139, total reward:8.6599, 0.3769185543060303 sec
Episode 13930, loss:1.1386, succeed, steps:93, total reward:-0.6548, 0.2599008083343506 sec
Episode 13931, loss:-1.6645, succeed, steps:107, total reward:0.1875, 0.28978395462036133 sec
Episode 13932, loss:0.4054, succeed, steps:63, total reward:6.8306, 0.17489385604858398 sec
Episode 13933, loss:-3.9208, succeed, steps:148, total reward:7.6179, 0.38852763175964355 sec
Episode 13934, loss:-0.4154, succeed, steps:120, total reward:3.0012, 0.33332228660583496 sec
Episode 13935, loss:4.0628, succeed, steps:107, total reward:10.5181, 0.2944638729095459 sec
Episode 13936, loss:-0.8499, succeed, steps:46, total reward:-0.1831, 0.13100886344909668 sec
Episode 13937, loss:0.1950, succeed, steps:94, total reward:4.1888, 0.24513459205627441 sec
Episode 13938, loss:1.0172, succeed, steps:59, total reward:6.0020, 0.1600816249847412 sec
Episode 13939, loss:1.4212, succeed, steps:102, total reward:12.2773, 0.

Episode 14017, loss:0.0641, succeed, steps:124, total reward:3.9905, 0.3199341297149658 sec
Episode 14018, loss:1.2290, succeed, steps:106, total reward:6.5074, 0.28662800788879395 sec
Episode 14019, loss:0.7625, succeed, steps:122, total reward:2.1348, 0.3286318778991699 sec
Episode 14020, loss:-0.1016, succeed, steps:110, total reward:3.2597, 0.3005204200744629 sec
Episode 14021, loss:2.8507, succeed, steps:69, total reward:5.9435, 0.19463467597961426 sec
Episode 14022, loss:1.0502, succeed, steps:132, total reward:-4.8665, 0.34647345542907715 sec
Episode 14023, loss:2.2325, succeed, steps:56, total reward:6.2848, 0.1587541103363037 sec
Episode 14024, loss:0.3513, succeed, steps:118, total reward:1.2176, 0.3093132972717285 sec
Episode 14025, loss:1.3796, succeed, steps:147, total reward:4.2563, 0.3940577507019043 sec
Episode 14026, loss:-5.7375, succeed, steps:156, total reward:-0.7342, 0.4758727550506592 sec
Episode 14027, loss:-2.0223, succeed, steps:101, total reward:4.5902, 0.287

Episode 14106, loss:-0.1409, succeed, steps:92, total reward:5.7044, 0.24770736694335938 sec
Episode 14107, loss:0.2760, succeed, steps:45, total reward:5.0547, 0.1250746250152588 sec
Episode 14108, loss:-7.9410, fail, steps:161, total reward:-3.9819, 0.4186282157897949 sec
Episode 14109, loss:0.1364, succeed, steps:106, total reward:-0.5961, 0.2946794033050537 sec
Episode 14110, loss:-5.9488, fail, steps:161, total reward:-3.9819, 0.4332094192504883 sec
Episode 14111, loss:-6.4866, fail, steps:160, total reward:-4.5491, 0.4444906711578369 sec
Episode 14112, loss:-8.9143, fail, steps:160, total reward:-7.4147, 0.44329333305358887 sec
Episode 14113, loss:0.9367, succeed, steps:116, total reward:5.9604, 0.32673215866088867 sec
Episode 14114, loss:-0.0333, succeed, steps:128, total reward:1.2477, 0.3509993553161621 sec
Episode 14115, loss:-1.2632, succeed, steps:90, total reward:-0.8998, 0.2515754699707031 sec
Episode 14116, loss:-4.8186, fail, steps:161, total reward:-0.6826, 0.488262891

Episode 14195, loss:-3.8580, fail, steps:161, total reward:-1.9384, 0.42096972465515137 sec
Episode 14196, loss:3.8875, succeed, steps:90, total reward:6.0659, 0.253187894821167 sec
Episode 14197, loss:-8.4303, fail, steps:161, total reward:-3.8376, 0.43247222900390625 sec
Episode 14198, loss:0.8501, succeed, steps:103, total reward:2.5696, 0.2904183864593506 sec
Episode 14199, loss:-2.8927, fail, steps:162, total reward:0.3174, 0.4381124973297119 sec
Episode 14200, loss:0.3842, succeed, steps:95, total reward:1.6731, 0.2684178352355957 sec
Episode 14201, loss:-11.7522, fail, steps:161, total reward:-3.1155, 0.4324343204498291 sec
Episode 14202, loss:-2.6762, fail, steps:161, total reward:-4.8475, 0.4489414691925049 sec
Episode 14203, loss:-4.3741, fail, steps:161, total reward:-1.8498, 0.501060962677002 sec
Episode 14204, loss:-16.8380, fail, steps:161, total reward:-3.8204, 0.448927640914917 sec
Episode 14205, loss:4.5325, succeed, steps:110, total reward:2.6104, 0.3121800422668457 s

Episode 14284, loss:-6.8040, succeed, steps:128, total reward:4.4027, 0.34704041481018066 sec
Episode 14285, loss:-3.4446, succeed, steps:129, total reward:-1.5565, 0.35244250297546387 sec
Episode 14286, loss:0.0071, succeed, steps:80, total reward:7.1179, 0.22503447532653809 sec
Episode 14287, loss:3.3365, succeed, steps:76, total reward:6.0565, 0.2064344882965088 sec
Episode 14288, loss:0.2499, succeed, steps:122, total reward:3.8504, 0.3207988739013672 sec
Episode 14289, loss:1.5888, succeed, steps:124, total reward:7.5062, 0.3336958885192871 sec
Episode 14290, loss:-0.6757, succeed, steps:109, total reward:-0.6789, 0.29856443405151367 sec
Episode 14291, loss:0.0034, succeed, steps:79, total reward:5.4851, 0.21643376350402832 sec
Episode 14292, loss:3.7626, succeed, steps:113, total reward:6.5482, 0.3003876209259033 sec
Episode 14293, loss:-1.1818, succeed, steps:88, total reward:4.5709, 0.2399909496307373 sec
Episode 14294, loss:0.2203, succeed, steps:53, total reward:3.3570, 0.149

Episode 14373, loss:0.2419, succeed, steps:104, total reward:7.3017, 0.2818107604980469 sec
Episode 14374, loss:-0.0006, succeed, steps:40, total reward:3.8425, 0.1124720573425293 sec
Episode 14375, loss:0.4647, succeed, steps:48, total reward:4.9719, 0.12512898445129395 sec
Episode 14376, loss:1.2195, succeed, steps:97, total reward:7.1888, 0.24810051918029785 sec
Episode 14377, loss:-0.9236, succeed, steps:98, total reward:7.9002, 0.26337194442749023 sec
Episode 14378, loss:0.1910, succeed, steps:37, total reward:4.1418, 0.10275578498840332 sec
Episode 14379, loss:0.4584, succeed, steps:94, total reward:7.1274, 0.24059581756591797 sec
Episode 14380, loss:0.5459, succeed, steps:105, total reward:7.8689, 0.27611804008483887 sec
Episode 14381, loss:3.9002, succeed, steps:88, total reward:11.5301, 0.23674368858337402 sec
Episode 14382, loss:0.0011, succeed, steps:53, total reward:6.5841, 0.14346528053283691 sec
Episode 14383, loss:0.4196, succeed, steps:87, total reward:7.0865, 0.2243826

Episode 14463, loss:4.5901, succeed, steps:91, total reward:4.3438, 0.24058985710144043 sec
Episode 14464, loss:-17.4150, fail, steps:162, total reward:0.6059, 0.4244880676269531 sec
Episode 14465, loss:3.0249, succeed, steps:44, total reward:4.0383, 0.1290903091430664 sec
Episode 14466, loss:2.2772, succeed, steps:83, total reward:2.7259, 0.21521568298339844 sec
Episode 14467, loss:-1.6589, succeed, steps:72, total reward:-2.3641, 0.19041800498962402 sec
Episode 14468, loss:0.7882, succeed, steps:133, total reward:8.6813, 0.34342050552368164 sec
Episode 14469, loss:2.0237, succeed, steps:101, total reward:3.9410, 0.27493977546691895 sec
Episode 14470, loss:-18.8732, fail, steps:160, total reward:-5.8376, 0.4315800666809082 sec
Episode 14471, loss:1.3914, succeed, steps:86, total reward:6.0144, 0.2416369915008545 sec
Episode 14472, loss:0.2643, succeed, steps:100, total reward:6.8174, 0.2692070007324219 sec
Episode 14473, loss:2.8603, succeed, steps:104, total reward:3.7139, 0.27980542

Episode 14552, loss:0.9135, succeed, steps:94, total reward:0.5288, 0.2658827304840088 sec
Episode 14553, loss:0.0955, succeed, steps:56, total reward:2.7692, 0.15634608268737793 sec
Episode 14554, loss:-18.0755, fail, steps:160, total reward:-5.8376, 0.4163661003112793 sec
Episode 14555, loss:0.3721, succeed, steps:77, total reward:7.0565, 0.21888303756713867 sec
Episode 14556, loss:-1.2937, succeed, steps:126, total reward:-1.6572, 0.3397388458251953 sec
Episode 14557, loss:-6.1862, succeed, steps:137, total reward:2.5278, 0.37156105041503906 sec
Episode 14558, loss:0.1287, succeed, steps:62, total reward:5.1978, 0.17656707763671875 sec
Episode 14559, loss:2.1565, succeed, steps:105, total reward:4.5696, 0.27736687660217285 sec
Episode 14560, loss:-0.0094, succeed, steps:133, total reward:2.4435, 0.3599262237548828 sec
Episode 14561, loss:1.1985, succeed, steps:95, total reward:4.6117, 0.2640814781188965 sec
Episode 14562, loss:0.3434, succeed, steps:134, total reward:3.5156, 0.35967

Episode 14641, loss:-0.4069, succeed, steps:103, total reward:-1.3525, 0.2754371166229248 sec
Episode 14642, loss:0.9343, succeed, steps:129, total reward:4.2027, 0.3436293601989746 sec
Episode 14643, loss:-0.8638, fail, steps:161, total reward:-2.4662, 0.43310022354125977 sec
Episode 14644, loss:1.1803, succeed, steps:130, total reward:-0.4844, 0.3599843978881836 sec
Episode 14645, loss:0.2183, succeed, steps:87, total reward:7.2308, 0.2418811321258545 sec
Episode 14646, loss:0.7287, succeed, steps:82, total reward:5.0579, 0.22227954864501953 sec
Episode 14647, loss:0.7652, succeed, steps:102, total reward:8.3846, 0.270521879196167 sec
Episode 14648, loss:0.3417, succeed, steps:83, total reward:6.1694, 0.23116707801818848 sec
Episode 14649, loss:-33.5486, fail, steps:160, total reward:-7.8376, 0.4221813678741455 sec
Episode 14650, loss:0.0539, succeed, steps:96, total reward:5.6838, 0.2695753574371338 sec
Episode 14651, loss:-1.7521, succeed, steps:104, total reward:6.3246, 0.28125262

Episode 14730, loss:-0.1728, succeed, steps:83, total reward:2.4537, 0.23472023010253906 sec
Episode 14731, loss:0.0444, succeed, steps:71, total reward:1.0007, 0.1929314136505127 sec
Episode 14732, loss:-0.0686, succeed, steps:101, total reward:7.6731, 0.2659108638763428 sec
Episode 14733, loss:-6.5288, fail, steps:162, total reward:-2.0498, 0.42844581604003906 sec
Episode 14734, loss:0.3796, succeed, steps:87, total reward:7.2308, 0.2441098690032959 sec
Episode 14735, loss:-0.0856, succeed, steps:93, total reward:3.3330, 0.2514607906341553 sec
Episode 14736, loss:0.1759, succeed, steps:19, total reward:3.0709, 0.05819058418273926 sec
Episode 14737, loss:-0.5013, fail, steps:160, total reward:-5.4770, 0.4105062484741211 sec
Episode 14738, loss:-0.0154, succeed, steps:81, total reward:4.6022, 0.2279949188232422 sec
Episode 14739, loss:-3.5795, fail, steps:162, total reward:3.1216, 0.42621827125549316 sec
Episode 14740, loss:-0.5277, succeed, steps:67, total reward:3.8714, 0.18947339057

Episode 14819, loss:0.1561, succeed, steps:94, total reward:4.1888, 0.25568199157714844 sec
Episode 14820, loss:-1.1716, succeed, steps:89, total reward:5.4266, 0.2397449016571045 sec
Episode 14821, loss:0.5176, succeed, steps:86, total reward:5.6537, 0.22921228408813477 sec
Episode 14822, loss:0.4379, succeed, steps:90, total reward:3.1274, 0.2945425510406494 sec
Episode 14823, loss:0.0009, succeed, steps:40, total reward:3.8425, 0.10973191261291504 sec
Episode 14824, loss:0.1819, succeed, steps:79, total reward:2.4579, 0.2076570987701416 sec
Episode 14825, loss:-2.3046, succeed, steps:102, total reward:5.5181, 0.26517391204833984 sec
Episode 14826, loss:0.0852, succeed, steps:62, total reward:-1.1844, 0.16685795783996582 sec
Episode 14827, loss:-13.6661, fail, steps:160, total reward:-6.0311, 0.4145088195800781 sec
Episode 14828, loss:-0.3400, succeed, steps:126, total reward:1.5306, 0.34560680389404297 sec
Episode 14829, loss:-0.1429, succeed, steps:86, total reward:2.7873, 0.237346

Episode 14909, loss:-6.4729, succeed, steps:142, total reward:1.7621, 0.3609294891357422 sec
Episode 14910, loss:0.3691, succeed, steps:114, total reward:7.4039, 0.3598484992980957 sec
Episode 14911, loss:0.0011, succeed, steps:52, total reward:6.0169, 0.1489243507385254 sec
Episode 14912, loss:-0.8859, fail, steps:160, total reward:-4.4770, 0.4131762981414795 sec
Episode 14913, loss:0.3120, succeed, steps:88, total reward:8.1587, 0.24508380889892578 sec
Episode 14914, loss:-0.0760, succeed, steps:76, total reward:-0.9978, 0.2058556079864502 sec
Episode 14915, loss:0.0270, succeed, steps:95, total reward:5.1888, 0.250917911529541 sec
Episode 14916, loss:-1.2290, succeed, steps:125, total reward:-1.7523, 0.3305671215057373 sec
Episode 14917, loss:0.0756, succeed, steps:96, total reward:9.4159, 0.26158952713012695 sec
Episode 14918, loss:0.2112, succeed, steps:51, total reward:4.8726, 0.1398923397064209 sec
Episode 14919, loss:0.3714, succeed, steps:139, total reward:2.2778, 0.3633141517

Episode 14998, loss:2.4679, succeed, steps:147, total reward:3.0301, 0.3900449275970459 sec
Episode 14999, loss:-0.1650, succeed, steps:135, total reward:8.1756, 0.3736288547515869 sec
Checkpoint saved at episode 15000 to datasets/rl_sort_transformer_easy/list8_transformer4_192_gamma09_step210_v2/ckpt_15000_0.8320_105.09.pth
Episode 15000, loss:-1.1295, succeed, steps:64, total reward:3.4985, 0.21074700355529785 sec
Episode 15001, loss:0.0632, succeed, steps:78, total reward:1.6743, 0.20885682106018066 sec
Episode 15002, loss:-0.0167, succeed, steps:63, total reward:3.5313, 0.16896963119506836 sec
Episode 15003, loss:0.0719, succeed, steps:95, total reward:1.3124, 0.2479710578918457 sec
Episode 15004, loss:0.9667, succeed, steps:82, total reward:5.4579, 0.22013020515441895 sec
Episode 15005, loss:-11.4061, fail, steps:161, total reward:-2.3941, 0.4218330383300781 sec
Episode 15006, loss:-12.7005, fail, steps:161, total reward:-3.1884, 0.4433422088623047 sec
Episode 15007, loss:0.0877, 

Episode 15086, loss:-6.3971, fail, steps:162, total reward:0.2330, 0.42712855339050293 sec
Episode 15087, loss:-14.5210, fail, steps:161, total reward:-0.2498, 0.4401686191558838 sec
Episode 15088, loss:-0.0038, succeed, steps:74, total reward:4.0893, 0.21778297424316406 sec
Episode 15089, loss:0.0198, succeed, steps:86, total reward:6.4472, 0.23158836364746094 sec
Episode 15090, loss:0.4660, succeed, steps:98, total reward:8.3330, 0.2615475654602051 sec
Episode 15091, loss:-10.0593, fail, steps:160, total reward:-7.1262, 0.42266273498535156 sec
Episode 15092, loss:6.2571, succeed, steps:53, total reward:3.2127, 0.15532302856445312 sec
Episode 15093, loss:-11.3911, fail, steps:161, total reward:-2.0334, 0.4230160713195801 sec
Episode 15094, loss:-12.3637, fail, steps:160, total reward:-7.2048, 0.4364769458770752 sec
Episode 15095, loss:0.3019, succeed, steps:71, total reward:8.1599, 0.21439123153686523 sec
Episode 15096, loss:0.4145, succeed, steps:98, total reward:5.0338, 0.2616713047

Episode 15176, loss:-1.3681, succeed, steps:130, total reward:-1.2057, 0.34055018424987793 sec
Episode 15177, loss:1.9061, succeed, steps:121, total reward:0.5577, 0.33194446563720703 sec
Episode 15178, loss:0.1230, succeed, steps:31, total reward:1.4410, 0.09110569953918457 sec
Episode 15179, loss:-2.8106, succeed, steps:101, total reward:3.5082, 0.25803422927856445 sec
Episode 15180, loss:-0.5726, succeed, steps:132, total reward:-7.0928, 0.3477051258087158 sec
Episode 15181, loss:-14.6737, fail, steps:159, total reward:-11.0910, 0.42699718475341797 sec
Episode 15182, loss:-5.8363, fail, steps:162, total reward:-0.1155, 0.44280290603637695 sec
Episode 15183, loss:0.1328, succeed, steps:71, total reward:4.6443, 0.20686554908752441 sec
Episode 15184, loss:-0.0303, succeed, steps:93, total reward:2.6838, 0.2528243064880371 sec
Episode 15185, loss:-16.2424, fail, steps:160, total reward:-7.7590, 0.42350006103515625 sec
Episode 15186, loss:-6.7393, fail, steps:160, total reward:-5.9918, 0

Episode 15265, loss:-0.4623, succeed, steps:80, total reward:3.0579, 0.23111462593078613 sec
Episode 15266, loss:1.5900, succeed, steps:149, total reward:-1.5685, 0.4015223979949951 sec
Episode 15267, loss:-5.7474, succeed, steps:152, total reward:-5.2391, 0.4197721481323242 sec
Episode 15268, loss:-17.0113, fail, steps:160, total reward:-8.5697, 0.4422032833099365 sec
Episode 15269, loss:0.0490, succeed, steps:102, total reward:5.5902, 0.2906057834625244 sec
Episode 15270, loss:-6.5976, fail, steps:163, total reward:6.6372, 0.44220995903015137 sec
Episode 15271, loss:-0.7396, succeed, steps:62, total reward:2.2034, 0.18272042274475098 sec
Episode 15272, loss:2.4063, succeed, steps:123, total reward:6.7226, 0.32546210289001465 sec
Episode 15273, loss:1.7443, succeed, steps:116, total reward:-0.9267, 0.3155062198638916 sec
Episode 15274, loss:0.3934, succeed, steps:93, total reward:10.1644, 0.2563960552215576 sec
Episode 15275, loss:-7.9152, fail, steps:161, total reward:-0.1777, 0.4295

Episode 15354, loss:0.7230, succeed, steps:100, total reward:3.5902, 0.271470308303833 sec
Episode 15355, loss:-3.2186, succeed, steps:114, total reward:3.5997, 0.3090484142303467 sec
Episode 15356, loss:-7.1863, fail, steps:160, total reward:-11.0976, 0.42859554290771484 sec
Episode 15357, loss:-6.3513, fail, steps:160, total reward:-5.4147, 0.44074177742004395 sec
Episode 15358, loss:1.9085, succeed, steps:107, total reward:7.2910, 0.3026747703552246 sec
Episode 15359, loss:0.5778, succeed, steps:85, total reward:5.3029, 0.23509454727172852 sec
Episode 15360, loss:0.0991, succeed, steps:78, total reward:5.2621, 0.21219301223754883 sec
Episode 15361, loss:-0.1067, succeed, steps:57, total reward:1.0470, 0.15526890754699707 sec
Episode 15362, loss:-7.0013, fail, steps:159, total reward:-12.9411, 0.41809844970703125 sec
Episode 15363, loss:0.0146, succeed, steps:58, total reward:5.4905, 0.1672348976135254 sec
Episode 15364, loss:-3.3093, fail, steps:160, total reward:-7.7098, 0.42120790

Episode 15443, loss:-10.0306, fail, steps:161, total reward:-2.1155, 0.4339253902435303 sec
Episode 15444, loss:-7.2085, fail, steps:160, total reward:-6.6212, 0.4404876232147217 sec
Episode 15445, loss:0.5631, succeed, steps:75, total reward:5.1286, 0.2163095474243164 sec
Episode 15446, loss:1.8664, succeed, steps:120, total reward:9.5276, 0.3202323913574219 sec
Episode 15447, loss:-0.9293, succeed, steps:50, total reward:0.5012, 0.14178109169006348 sec
Episode 15448, loss:0.9556, succeed, steps:98, total reward:5.0338, 0.2676866054534912 sec
Episode 15449, loss:0.0826, succeed, steps:61, total reward:4.9748, 0.16551661491394043 sec
Episode 15450, loss:0.0748, succeed, steps:48, total reward:1.8005, 0.12932419776916504 sec
Episode 15451, loss:-11.4596, fail, steps:160, total reward:-5.5590, 0.41211605072021484 sec
Episode 15452, loss:0.2104, succeed, steps:88, total reward:4.8037, 0.24474096298217773 sec
Episode 15453, loss:-14.6148, fail, steps:160, total reward:-7.4868, 0.4249155521

Episode 15532, loss:0.1498, succeed, steps:36, total reward:3.3582, 0.09763526916503906 sec
Episode 15533, loss:0.4335, succeed, steps:64, total reward:4.6756, 0.16738295555114746 sec
Episode 15534, loss:0.0415, succeed, steps:87, total reward:0.4880, 0.22357964515686035 sec
Episode 15535, loss:1.2335, succeed, steps:97, total reward:10.5602, 0.25438880920410156 sec
Episode 15536, loss:-4.0064, succeed, steps:101, total reward:1.1467, 0.26769566535949707 sec
Episode 15537, loss:0.3551, succeed, steps:102, total reward:4.9410, 0.27312588691711426 sec
Episode 15538, loss:0.8668, succeed, steps:98, total reward:11.4880, 0.2634601593017578 sec
Episode 15539, loss:0.1416, succeed, steps:89, total reward:8.9423, 0.23977017402648926 sec
Episode 15540, loss:-13.8077, fail, steps:161, total reward:-4.9197, 0.4247581958770752 sec
Episode 15541, loss:0.0005, succeed, steps:49, total reward:2.8005, 0.14838647842407227 sec
Episode 15542, loss:0.8892, succeed, steps:66, total reward:6.3149, 0.173159

Episode 15621, loss:1.2157, succeed, steps:91, total reward:4.7766, 0.256911039352417 sec
Episode 15622, loss:-0.2118, succeed, steps:104, total reward:0.8475, 0.28200650215148926 sec
Episode 15623, loss:0.3090, succeed, steps:82, total reward:5.7793, 0.22281122207641602 sec
Episode 15624, loss:0.5046, succeed, steps:64, total reward:4.4592, 0.17334342002868652 sec
Episode 15625, loss:1.9889, succeed, steps:80, total reward:7.1179, 0.20976591110229492 sec
Episode 15626, loss:1.6960, succeed, steps:94, total reward:4.1166, 0.25290584564208984 sec
Episode 15627, loss:0.1255, succeed, steps:48, total reward:1.8726, 0.13140058517456055 sec
Episode 15628, loss:0.0798, succeed, steps:70, total reward:3.5721, 0.1828291416168213 sec
Episode 15629, loss:0.1606, succeed, steps:64, total reward:4.8199, 0.16768240928649902 sec
Episode 15630, loss:0.7727, succeed, steps:140, total reward:-0.6707, 0.36175107955932617 sec
Episode 15631, loss:-0.9427, succeed, steps:75, total reward:-1.1814, 0.2080183

Episode 15710, loss:3.6312, succeed, steps:106, total reward:9.4460, 0.2927587032318115 sec
Episode 15711, loss:0.2201, succeed, steps:52, total reward:2.7176, 0.14496874809265137 sec
Episode 15712, loss:3.5875, succeed, steps:85, total reward:7.8087, 0.22098374366760254 sec
Episode 15713, loss:2.3941, succeed, steps:123, total reward:13.0325, 0.32232141494750977 sec
Episode 15714, loss:0.1243, succeed, steps:72, total reward:5.7164, 0.20218825340270996 sec
Episode 15715, loss:0.0303, succeed, steps:60, total reward:4.0470, 0.16077852249145508 sec
Episode 15716, loss:0.8420, succeed, steps:107, total reward:10.7345, 0.27733850479125977 sec
Episode 15717, loss:-13.3935, fail, steps:161, total reward:-1.1155, 0.4241597652435303 sec
Episode 15718, loss:0.3452, succeed, steps:143, total reward:2.4015, 0.391599178314209 sec
Episode 15719, loss:-3.8549, fail, steps:161, total reward:-2.7712, 0.44056129455566406 sec
Episode 15720, loss:0.8187, succeed, steps:79, total reward:9.2729, 0.2829391

Episode 15799, loss:1.9419, succeed, steps:101, total reward:4.0853, 0.26955509185791016 sec
Episode 15800, loss:0.3831, succeed, steps:82, total reward:6.1072, 0.2214810848236084 sec
Episode 15801, loss:0.3129, succeed, steps:101, total reward:7.8895, 0.2687399387359619 sec
Episode 15802, loss:4.0277, succeed, steps:90, total reward:7.3644, 0.2429966926574707 sec
Episode 15803, loss:1.0307, succeed, steps:77, total reward:4.4064, 0.20873022079467773 sec
Episode 15804, loss:3.4001, succeed, steps:147, total reward:5.9850, 0.3861238956451416 sec
Episode 15805, loss:-6.8400, succeed, steps:113, total reward:-1.0110, 0.3150336742401123 sec
Episode 15806, loss:-0.0716, succeed, steps:105, total reward:2.0639, 0.28609514236450195 sec
Episode 15807, loss:0.2638, succeed, steps:110, total reward:3.4039, 0.29811692237854004 sec
Episode 15808, loss:-1.0811, succeed, steps:73, total reward:6.4443, 0.19979572296142578 sec
Episode 15809, loss:-0.0259, succeed, steps:102, total reward:8.5288, 0.269

Episode 15889, loss:0.0610, succeed, steps:87, total reward:7.1029, 0.2296907901763916 sec
Episode 15890, loss:0.8161, succeed, steps:106, total reward:5.7860, 0.2809789180755615 sec
Episode 15891, loss:-0.0932, succeed, steps:96, total reward:6.0445, 0.25911378860473633 sec
Episode 15892, loss:0.0044, succeed, steps:85, total reward:11.8457, 0.22997236251831055 sec
Episode 15893, loss:0.0314, succeed, steps:68, total reward:8.3149, 0.1870131492614746 sec
Episode 15894, loss:1.3666, succeed, steps:106, total reward:12.9616, 0.2782440185546875 sec
Episode 15895, loss:-4.4550, succeed, steps:106, total reward:5.7139, 0.2847554683685303 sec
Episode 15896, loss:0.0921, succeed, steps:105, total reward:11.6010, 0.2828395366668701 sec
Episode 15897, loss:0.2420, succeed, steps:105, total reward:5.0025, 0.2837541103363037 sec
Episode 15898, loss:-0.3689, succeed, steps:95, total reward:1.0960, 0.2568221092224121 sec
Episode 15899, loss:0.2252, succeed, steps:81, total reward:4.3137, 0.2203717

Episode 15978, loss:-11.7806, fail, steps:160, total reward:-5.1655, 0.44994425773620605 sec
Episode 15979, loss:-15.2069, fail, steps:160, total reward:-9.2812, 0.45214056968688965 sec
Episode 15980, loss:-14.5848, fail, steps:159, total reward:-9.2812, 0.4546959400177002 sec
Episode 15981, loss:1.0416, succeed, steps:40, total reward:3.9147, 0.12702107429504395 sec
Episode 15982, loss:-21.1266, fail, steps:159, total reward:-10.5369, 0.41367506980895996 sec
Episode 15983, loss:-2.7224, succeed, steps:152, total reward:-1.3628, 0.41313672065734863 sec
Episode 15984, loss:-10.6816, fail, steps:159, total reward:-10.6319, 0.4364299774169922 sec
Episode 15985, loss:-14.2330, fail, steps:160, total reward:-5.1163, 0.44808220863342285 sec
Episode 15986, loss:-5.0130, succeed, steps:149, total reward:2.4521, 0.4163241386413574 sec
Episode 15987, loss:-6.0625, fail, steps:161, total reward:-2.4498, 0.44736623764038086 sec
Episode 15988, loss:-2.2354, succeed, steps:150, total reward:-3.0742,

Episode 16066, loss:-0.7419, succeed, steps:79, total reward:2.6022, 0.20864582061767578 sec
Episode 16067, loss:1.2151, succeed, steps:48, total reward:5.3161, 0.13065671920776367 sec
Episode 16068, loss:0.3859, succeed, steps:94, total reward:7.4159, 0.2427060604095459 sec
Episode 16069, loss:-8.5418, fail, steps:160, total reward:-5.4770, 0.4192490577697754 sec
Episode 16070, loss:4.3151, succeed, steps:123, total reward:13.1768, 0.3383173942565918 sec
Episode 16071, loss:0.2611, succeed, steps:92, total reward:8.9316, 0.2549917697906494 sec
Episode 16072, loss:-0.0766, succeed, steps:103, total reward:3.1467, 0.2778329849243164 sec
Episode 16073, loss:0.0720, succeed, steps:78, total reward:5.0457, 0.21261143684387207 sec
Episode 16074, loss:-6.5473, fail, steps:161, total reward:0.4715, 0.4270346164703369 sec
Episode 16075, loss:-11.8530, fail, steps:161, total reward:0.1108, 0.43772125244140625 sec
Episode 16076, loss:0.1450, succeed, steps:95, total reward:5.2609, 0.271025419235

Episode 16155, loss:-8.8165, fail, steps:159, total reward:-14.9346, 0.44140195846557617 sec
Episode 16156, loss:-0.0393, succeed, steps:56, total reward:-0.7465, 0.16669344902038574 sec
Episode 16157, loss:-9.4793, fail, steps:160, total reward:-6.7754, 0.42087841033935547 sec
Episode 16158, loss:-9.8374, fail, steps:160, total reward:-6.0639, 0.43809962272644043 sec
Episode 16159, loss:0.6108, succeed, steps:82, total reward:-1.4848, 0.23450946807861328 sec
Episode 16160, loss:-8.9818, succeed, steps:84, total reward:-7.0375, 0.22800683975219727 sec
Episode 16161, loss:-7.9675, fail, steps:160, total reward:-7.7032, 0.4268665313720703 sec
Episode 16162, loss:-8.7691, fail, steps:160, total reward:-9.4345, 0.4390757083892822 sec
Episode 16163, loss:-4.1407, fail, steps:161, total reward:-5.3525, 0.44574475288391113 sec
Episode 16164, loss:-11.3663, fail, steps:160, total reward:-7.9197, 0.4472792148590088 sec
Episode 16165, loss:-1.1780, fail, steps:160, total reward:-5.7032, 0.447873

Episode 16244, loss:-1.9580, succeed, steps:121, total reward:-2.8858, 0.3411996364593506 sec
Episode 16245, loss:1.6711, succeed, steps:125, total reward:5.0626, 0.3435370922088623 sec
Episode 16246, loss:0.1299, succeed, steps:57, total reward:3.9856, 0.16346192359924316 sec
Episode 16247, loss:-2.6957, succeed, steps:98, total reward:-0.2360, 0.261944055557251 sec
Episode 16248, loss:0.0916, succeed, steps:95, total reward:-3.7311, 0.2565319538116455 sec
Episode 16249, loss:-0.4860, fail, steps:160, total reward:-6.2704, 0.42317676544189453 sec
Episode 16250, loss:0.6460, succeed, steps:140, total reward:-0.7428, 0.3847212791442871 sec
Episode 16251, loss:-0.3552, succeed, steps:128, total reward:4.8355, 0.3537778854370117 sec
Episode 16252, loss:-0.1938, succeed, steps:118, total reward:1.0012, 0.3259165287017822 sec
Episode 16253, loss:0.0837, succeed, steps:45, total reward:2.1719, 0.14977645874023438 sec
Episode 16254, loss:-1.4715, succeed, steps:103, total reward:2.2418, 0.266

Episode 16333, loss:0.2268, succeed, steps:98, total reward:11.7766, 0.25847482681274414 sec
Episode 16334, loss:0.1089, succeed, steps:77, total reward:7.7057, 0.20814251899719238 sec
Episode 16335, loss:-1.4189, fail, steps:161, total reward:-0.4564, 0.421907901763916 sec
Episode 16336, loss:0.2608, succeed, steps:53, total reward:7.0169, 0.15351486206054688 sec
Episode 16337, loss:2.3284, succeed, steps:64, total reward:7.8306, 0.16904926300048828 sec
Episode 16338, loss:0.1018, succeed, steps:56, total reward:6.7176, 0.1465308666229248 sec
Episode 16339, loss:1.9578, succeed, steps:96, total reward:9.5044, 0.24798369407653809 sec
Episode 16340, loss:0.0592, succeed, steps:54, total reward:4.7898, 0.1486060619354248 sec
Episode 16341, loss:-1.4831, succeed, steps:97, total reward:4.2502, 0.2520270347595215 sec
Episode 16342, loss:0.1619, succeed, steps:115, total reward:5.5375, 0.30357980728149414 sec
Episode 16343, loss:-0.7242, fail, steps:161, total reward:0.0387, 0.4297101497650

Episode 16422, loss:-3.5970, fail, steps:161, total reward:-3.6934, 0.42626333236694336 sec
Episode 16423, loss:-2.0819, succeed, steps:133, total reward:3.1648, 0.36793017387390137 sec
Episode 16424, loss:0.0295, succeed, steps:71, total reward:7.9435, 0.20140695571899414 sec
Episode 16425, loss:3.5437, succeed, steps:54, total reward:4.7176, 0.14777779579162598 sec
Episode 16426, loss:0.3102, succeed, steps:65, total reward:2.4485, 0.1753244400024414 sec
Episode 16427, loss:0.0569, succeed, steps:61, total reward:1.7477, 0.21297430992126465 sec
Episode 16428, loss:0.5979, succeed, steps:147, total reward:3.7514, 0.3813350200653076 sec
Episode 16429, loss:0.2929, succeed, steps:44, total reward:4.5433, 0.1278705596923828 sec
Episode 16430, loss:0.0033, succeed, steps:60, total reward:4.0470, 0.15735077857971191 sec
Episode 16431, loss:-0.0198, succeed, steps:75, total reward:2.1900, 0.19481515884399414 sec
Episode 16432, loss:1.7035, succeed, steps:113, total reward:9.7032, 0.29448080

Episode 16511, loss:0.8630, succeed, steps:94, total reward:4.4773, 0.257490873336792 sec
Episode 16512, loss:0.0064, succeed, steps:54, total reward:4.6455, 0.14922189712524414 sec
Episode 16513, loss:-0.1351, succeed, steps:59, total reward:-0.4687, 0.1597895622253418 sec
Episode 16514, loss:-0.8043, fail, steps:161, total reward:-0.1449, 0.43178606033325195 sec
Episode 16515, loss:0.5588, fail, steps:161, total reward:-1.9613, 0.43598508834838867 sec
Episode 16516, loss:-0.0688, succeed, steps:77, total reward:0.9629, 0.22264599800109863 sec
Episode 16517, loss:0.2717, succeed, steps:73, total reward:7.0049, 0.19698715209960938 sec
Episode 16518, loss:-2.7933, fail, steps:161, total reward:0.3272, 0.41768860816955566 sec
Episode 16519, loss:-0.0593, succeed, steps:52, total reward:2.1734, 0.15387463569641113 sec
Episode 16520, loss:1.1660, succeed, steps:72, total reward:5.8607, 0.19116830825805664 sec
Episode 16521, loss:-1.7116, fail, steps:159, total reward:-8.3270, 0.41741800308

Episode 16600, loss:0.1123, succeed, steps:62, total reward:5.6306, 0.16556549072265625 sec
Episode 16601, loss:0.1655, succeed, steps:65, total reward:2.3763, 0.1704714298248291 sec
Episode 16602, loss:0.0254, succeed, steps:83, total reward:3.3029, 0.21649765968322754 sec
Episode 16603, loss:0.0421, succeed, steps:99, total reward:5.2731, 0.26174139976501465 sec
Episode 16604, loss:1.2488, succeed, steps:133, total reward:2.7320, 0.35228967666625977 sec
Episode 16605, loss:-0.0134, succeed, steps:73, total reward:2.5286, 0.2023015022277832 sec
Episode 16606, loss:0.1264, succeed, steps:74, total reward:4.6335, 0.19771456718444824 sec
Episode 16607, loss:-0.1620, succeed, steps:99, total reward:4.8567, 0.26113057136535645 sec
Episode 16608, loss:1.8067, succeed, steps:60, total reward:7.2741, 0.1641371250152588 sec
Episode 16609, loss:0.0408, succeed, steps:94, total reward:7.6323, 0.24765968322753906 sec
Episode 16610, loss:-2.5975, fail, steps:162, total reward:2.0494, 0.42423391342

Episode 16690, loss:-0.7571, fail, steps:162, total reward:3.4101, 0.42380642890930176 sec
Episode 16691, loss:-0.4869, succeed, steps:161, total reward:1.3272, 0.4379556179046631 sec
Episode 16692, loss:-0.0027, succeed, steps:112, total reward:5.5482, 0.31671929359436035 sec
Episode 16693, loss:-0.2895, succeed, steps:84, total reward:7.9629, 0.2341010570526123 sec
Episode 16694, loss:-0.0408, succeed, steps:105, total reward:5.2910, 0.2820291519165039 sec
Episode 16695, loss:0.1395, succeed, steps:65, total reward:2.4485, 0.1826643943786621 sec
Episode 16696, loss:-0.3784, succeed, steps:103, total reward:3.4353, 0.27159714698791504 sec
Episode 16697, loss:-0.5028, fail, steps:162, total reward:3.4101, 0.48319458961486816 sec
Episode 16698, loss:-0.2236, succeed, steps:103, total reward:3.4353, 0.28891563415527344 sec
Episode 16699, loss:0.5361, succeed, steps:61, total reward:4.6863, 0.17014861106872559 sec
Episode 16700, loss:-2.0947, succeed, steps:130, total reward:1.8641, 0.340

Episode 16779, loss:-1.9237, fail, steps:161, total reward:-3.0826, 0.42601561546325684 sec
Episode 16780, loss:0.4410, succeed, steps:75, total reward:9.0049, 0.21675586700439453 sec
Episode 16781, loss:-0.0481, succeed, steps:107, total reward:3.7753, 0.28597593307495117 sec
Episode 16782, loss:0.2524, succeed, steps:81, total reward:7.9736, 0.22167277336120605 sec
Episode 16783, loss:3.6254, succeed, steps:107, total reward:13.6731, 0.28446364402770996 sec
Episode 16784, loss:0.2065, succeed, steps:77, total reward:3.8293, 0.21006321907043457 sec
Episode 16785, loss:0.0168, succeed, steps:42, total reward:2.4711, 0.11516499519348145 sec
Episode 16786, loss:0.2919, succeed, steps:109, total reward:8.9303, 0.28057241439819336 sec
Episode 16787, loss:-0.0593, succeed, steps:107, total reward:7.0025, 0.28496718406677246 sec
Episode 16788, loss:0.9451, succeed, steps:84, total reward:10.4851, 0.2275409698486328 sec
Episode 16789, loss:0.0022, succeed, steps:82, total reward:5.3137, 0.223

Episode 16868, loss:0.5733, succeed, steps:107, total reward:10.2296, 0.2821998596191406 sec
Episode 16869, loss:1.0669, succeed, steps:111, total reward:7.3425, 0.2972440719604492 sec
Episode 16870, loss:-3.8485, succeed, steps:131, total reward:-2.6394, 0.3507959842681885 sec
Episode 16871, loss:0.1004, succeed, steps:98, total reward:4.4567, 0.26975440979003906 sec
Episode 16872, loss:0.0164, succeed, steps:76, total reward:2.7736, 0.21120810508728027 sec
Episode 16873, loss:-0.1699, succeed, steps:127, total reward:2.9699, 0.3366968631744385 sec
Episode 16874, loss:0.1997, succeed, steps:79, total reward:2.6743, 0.2176222801208496 sec
Episode 16875, loss:0.3485, succeed, steps:68, total reward:4.6550, 0.1832115650177002 sec
Episode 16876, loss:-1.6292, fail, steps:161, total reward:-1.6105, 0.4195210933685303 sec
Episode 16877, loss:4.4080, succeed, steps:77, total reward:7.3450, 0.21683955192565918 sec
Episode 16878, loss:0.1965, succeed, steps:75, total reward:1.9015, 0.201244115

Episode 16958, loss:6.6663, succeed, steps:98, total reward:5.3223, 0.2595806121826172 sec
Episode 16959, loss:-0.0623, succeed, steps:101, total reward:1.3631, 0.2711760997772217 sec
Episode 16960, loss:0.0346, succeed, steps:49, total reward:2.6562, 0.13756465911865234 sec
Episode 16961, loss:0.5071, succeed, steps:108, total reward:1.1875, 0.28034138679504395 sec
Episode 16962, loss:-5.0290, succeed, steps:154, total reward:-9.0662, 0.4130980968475342 sec
Episode 16963, loss:-5.9052, fail, steps:162, total reward:-0.8925, 0.4410388469696045 sec
Episode 16964, loss:0.4233, succeed, steps:116, total reward:2.5890, 0.3257722854614258 sec
Episode 16965, loss:-5.7633, fail, steps:161, total reward:-3.3319, 0.43595027923583984 sec
Episode 16966, loss:-0.5236, succeed, steps:83, total reward:3.1587, 0.23632431030273438 sec
Episode 16967, loss:-0.5204, succeed, steps:136, total reward:2.5049, 0.36519503593444824 sec
Episode 16968, loss:-3.4494, succeed, steps:125, total reward:5.1348, 0.344

Episode 17046, loss:-1.5961, fail, steps:161, total reward:-4.2704, 0.445758581161499 sec
Episode 17047, loss:-2.5790, fail, steps:162, total reward:0.1731, 0.4522228240966797 sec
Episode 17048, loss:1.6047, succeed, steps:110, total reward:-0.4003, 0.31477880477905273 sec
Episode 17049, loss:-3.9950, fail, steps:161, total reward:-2.7548, 0.442798376083374 sec
Episode 17050, loss:-1.1068, succeed, steps:159, total reward:-7.8484, 0.4418070316314697 sec
Episode 17051, loss:-1.1305, succeed, steps:152, total reward:7.9743, 0.4233131408691406 sec
Episode 17052, loss:-8.5503, succeed, steps:144, total reward:-6.4963, 0.40325236320495605 sec
Episode 17053, loss:-1.1760, fail, steps:162, total reward:1.8559, 0.448986291885376 sec
Episode 17054, loss:-2.0558, succeed, steps:134, total reward:-0.0722, 0.3836946487426758 sec
Episode 17055, loss:-4.7952, fail, steps:162, total reward:-1.3319, 0.4492313861846924 sec
Episode 17056, loss:-0.2280, succeed, steps:86, total reward:2.0987, 0.247962236

Episode 17136, loss:-1.9221, succeed, steps:139, total reward:-1.3821, 0.38590025901794434 sec
Episode 17137, loss:0.2389, succeed, steps:106, total reward:9.3738, 0.2956867218017578 sec
Episode 17138, loss:1.7758, succeed, steps:53, total reward:3.4291, 0.14990592002868652 sec
Episode 17139, loss:0.0030, succeed, steps:49, total reward:6.3883, 0.13258814811706543 sec
Episode 17140, loss:-1.7353, succeed, steps:116, total reward:-0.6381, 0.297914981842041 sec
Episode 17141, loss:-1.0022, succeed, steps:119, total reward:5.1169, 0.3171544075012207 sec
Episode 17142, loss:0.2575, succeed, steps:97, total reward:7.0445, 0.2641470432281494 sec
Episode 17143, loss:-0.9107, succeed, steps:119, total reward:4.8447, 0.3340117931365967 sec
Episode 17144, loss:-0.2642, succeed, steps:89, total reward:2.1274, 0.2447960376739502 sec
Episode 17145, loss:-0.3328, succeed, steps:145, total reward:4.5064, 0.3865993022918701 sec
Episode 17146, loss:4.5071, succeed, steps:103, total reward:6.7345, 0.285

Episode 17225, loss:-1.9570, fail, steps:161, total reward:-1.7548, 0.43211936950683594 sec
Episode 17226, loss:0.0240, succeed, steps:56, total reward:3.3462, 0.16397666931152344 sec
Episode 17227, loss:-0.0568, succeed, steps:149, total reward:-6.4513, 0.38975071907043457 sec
Episode 17228, loss:-4.2930, succeed, steps:79, total reward:2.4579, 0.22274422645568848 sec
Episode 17229, loss:-0.1350, succeed, steps:78, total reward:5.0457, 0.21491217613220215 sec
Episode 17230, loss:-0.2532, fail, steps:160, total reward:-5.2704, 0.43929362297058105 sec
Episode 17231, loss:0.0014, succeed, steps:56, total reward:6.5012, 0.20496487617492676 sec
Episode 17232, loss:-0.1220, fail, steps:160, total reward:-8.9304, 0.4193146228790283 sec
Episode 17233, loss:-2.5129, succeed, steps:47, total reward:0.2562, 0.13721466064453125 sec
Episode 17234, loss:0.5345, succeed, steps:75, total reward:5.1286, 0.196150541305542 sec
Episode 17235, loss:-1.5669, succeed, steps:91, total reward:4.4880, 0.239157

Episode 17314, loss:0.0033, succeed, steps:143, total reward:2.4015, 0.39493823051452637 sec
Episode 17315, loss:3.2773, succeed, steps:115, total reward:3.8784, 0.3225133419036865 sec
Episode 17316, loss:0.3353, succeed, steps:105, total reward:4.4254, 0.2905104160308838 sec
Episode 17317, loss:-0.4592, fail, steps:161, total reward:-0.9712, 0.4365048408508301 sec
Episode 17318, loss:0.1365, succeed, steps:59, total reward:2.8306, 0.17388582229614258 sec
Episode 17319, loss:1.4083, succeed, steps:104, total reward:3.7860, 0.27588391304016113 sec
Episode 17320, loss:-1.7340, succeed, steps:136, total reward:1.9278, 0.41507554054260254 sec
Episode 17321, loss:0.0044, succeed, steps:91, total reward:7.7152, 0.2534823417663574 sec
Episode 17322, loss:-0.0198, succeed, steps:94, total reward:6.5667, 0.25603270530700684 sec
Episode 17323, loss:0.6205, succeed, steps:62, total reward:5.3256, 0.17004823684692383 sec
Episode 17324, loss:2.4939, succeed, steps:158, total reward:6.6381, 0.418958

Episode 17404, loss:-3.4581, succeed, steps:129, total reward:1.9592, 0.3454864025115967 sec
Episode 17405, loss:1.0934, fail, steps:160, total reward:-8.1369, 0.4910094738006592 sec
Episode 17406, loss:0.0276, succeed, steps:116, total reward:2.1562, 0.3274545669555664 sec
Episode 17407, loss:0.1830, succeed, steps:116, total reward:6.3932, 0.3195371627807617 sec
Episode 17408, loss:0.1108, succeed, steps:55, total reward:2.4905, 0.15563273429870605 sec
Episode 17409, loss:3.2964, succeed, steps:67, total reward:7.3313, 0.1777653694152832 sec
Episode 17410, loss:0.1730, succeed, steps:69, total reward:6.2321, 0.18080806732177734 sec
Episode 17411, loss:0.9327, succeed, steps:60, total reward:7.2741, 0.15929293632507324 sec
Episode 17412, loss:-0.0012, succeed, steps:79, total reward:1.8350, 0.20632004737854004 sec
Episode 17413, loss:0.1340, succeed, steps:100, total reward:6.7452, 0.26526618003845215 sec
Episode 17414, loss:-0.0749, succeed, steps:125, total reward:4.4855, 0.33737421

Episode 17493, loss:0.0892, succeed, steps:92, total reward:2.1166, 0.2436065673828125 sec
Episode 17494, loss:0.2875, succeed, steps:98, total reward:1.5181, 0.26223063468933105 sec
Episode 17495, loss:0.1869, succeed, steps:75, total reward:2.5507, 0.2039811611175537 sec
Episode 17496, loss:0.3097, succeed, steps:88, total reward:2.1372, 0.2399921417236328 sec
Episode 17497, loss:0.0640, succeed, steps:60, total reward:7.3462, 0.1622753143310547 sec
Episode 17498, loss:-4.3651, fail, steps:160, total reward:-4.8376, 0.4188964366912842 sec
Episode 17499, loss:-1.2019, fail, steps:161, total reward:-1.6826, 0.4912278652191162 sec
Episode 17500, loss:-3.1945, fail, steps:159, total reward:-11.3205, 0.43709373474121094 sec
Episode 17501, loss:-1.1153, fail, steps:161, total reward:-2.8990, 0.45290565490722656 sec
Episode 17502, loss:-0.9851, fail, steps:160, total reward:-7.4540, 0.4472038745880127 sec
Episode 17503, loss:-0.1523, fail, steps:161, total reward:-3.0441, 0.4511060714721679

Episode 17583, loss:3.5584, succeed, steps:82, total reward:2.0144, 0.2151939868927002 sec
Episode 17584, loss:-0.1931, succeed, steps:91, total reward:7.6594, 0.24067401885986328 sec
Episode 17585, loss:-3.6806, succeed, steps:143, total reward:1.1293, 0.3780980110168457 sec
Episode 17586, loss:-1.7733, succeed, steps:125, total reward:-2.1130, 0.3460197448730469 sec
Episode 17587, loss:2.9604, succeed, steps:131, total reward:10.9184, 0.35702967643737793 sec
Episode 17588, loss:-9.4898, fail, steps:160, total reward:-5.6311, 0.4347951412200928 sec
Episode 17589, loss:-0.9463, succeed, steps:144, total reward:-0.0421, 0.40050506591796875 sec
Episode 17590, loss:-0.6729, succeed, steps:125, total reward:4.6298, 0.3492124080657959 sec
Episode 17591, loss:0.0658, succeed, steps:66, total reward:3.1599, 0.18794727325439453 sec
Episode 17592, loss:-6.9227, fail, steps:160, total reward:-6.2704, 0.4812915325164795 sec
Episode 17593, loss:-5.3350, succeed, steps:101, total reward:4.4460, 0.2

Episode 17672, loss:-3.8667, fail, steps:160, total reward:-4.3327, 0.44830322265625 sec
Episode 17673, loss:-4.0243, fail, steps:161, total reward:-4.7032, 0.44870495796203613 sec
Episode 17674, loss:-5.4879, fail, steps:160, total reward:-5.9098, 0.44710707664489746 sec
Episode 17675, loss:-0.4048, fail, steps:161, total reward:-2.4662, 0.44924163818359375 sec
Episode 17676, loss:-4.7178, fail, steps:161, total reward:-1.8990, 0.45435047149658203 sec
Episode 17677, loss:0.2553, succeed, steps:83, total reward:2.9423, 0.2448732852935791 sec
Episode 17678, loss:-3.5587, fail, steps:159, total reward:-10.9926, 0.480487585067749 sec
Episode 17679, loss:-0.7613, succeed, steps:125, total reward:-2.4015, 0.34942007064819336 sec
Episode 17680, loss:-9.5755, fail, steps:161, total reward:-1.8269, 0.43938684463500977 sec
Episode 17681, loss:-2.1799, fail, steps:161, total reward:-2.6826, 0.4465618133544922 sec
Episode 17682, loss:-8.8348, fail, steps:159, total reward:-11.0648, 0.446509122848

Episode 17761, loss:2.1039, succeed, steps:113, total reward:9.3425, 0.30793023109436035 sec
Episode 17762, loss:5.5260, succeed, steps:102, total reward:4.7246, 0.2794222831726074 sec
Episode 17763, loss:-6.2834, fail, steps:161, total reward:-1.5220, 0.43119335174560547 sec
Episode 17764, loss:0.6638, succeed, steps:102, total reward:2.4353, 0.28651905059814453 sec
Episode 17765, loss:0.1276, fail, steps:161, total reward:-2.1384, 0.43505024909973145 sec
Episode 17766, loss:0.2833, succeed, steps:121, total reward:3.8569, 0.34391331672668457 sec
Episode 17767, loss:-1.8367, fail, steps:161, total reward:-6.6376, 0.43851351737976074 sec
Episode 17768, loss:-3.2074, fail, steps:160, total reward:-6.5491, 0.44365525245666504 sec
Episode 17769, loss:0.4773, succeed, steps:134, total reward:-0.0722, 0.37621545791625977 sec
Episode 17770, loss:-0.9624, fail, steps:160, total reward:-5.3426, 0.44138264656066895 sec
Episode 17771, loss:-2.5423, succeed, steps:95, total reward:5.0445, 0.27751

Episode 17851, loss:-2.7137, succeed, steps:119, total reward:-4.0924, 0.31253767013549805 sec
Episode 17852, loss:-1.1565, succeed, steps:102, total reward:5.3017, 0.27826929092407227 sec
Episode 17853, loss:0.5454, succeed, steps:81, total reward:4.8186, 0.22257280349731445 sec
Episode 17854, loss:0.2076, succeed, steps:101, total reward:0.8582, 0.2706155776977539 sec
Episode 17855, loss:-0.0004, succeed, steps:83, total reward:2.7751, 0.22662734985351562 sec
Episode 17856, loss:0.0001, succeed, steps:91, total reward:8.0758, 0.24381351470947266 sec
Episode 17857, loss:-1.0496, fail, steps:161, total reward:-1.8498, 0.4307830333709717 sec
Episode 17858, loss:-0.0114, succeed, steps:113, total reward:2.7604, 0.3149440288543701 sec
Episode 17859, loss:0.4958, succeed, steps:144, total reward:3.5457, 0.392026424407959 sec
Episode 17860, loss:-0.6426, fail, steps:162, total reward:1.0387, 0.4445323944091797 sec
Episode 17861, loss:3.4693, succeed, steps:79, total reward:2.5301, 0.2273683

Episode 17940, loss:-3.2065, fail, steps:161, total reward:-0.8170, 0.42556262016296387 sec
Episode 17941, loss:5.3528, succeed, steps:86, total reward:5.8701, 0.2431807518005371 sec
Episode 17942, loss:0.0565, succeed, steps:92, total reward:5.0487, 0.24925827980041504 sec
Episode 17943, loss:-7.0330, fail, steps:161, total reward:-2.1056, 0.4282875061035156 sec
Episode 17944, loss:-6.3769, succeed, steps:116, total reward:1.9333, 0.323331356048584 sec
Episode 17945, loss:-12.6100, fail, steps:160, total reward:-4.8277, 0.4397435188293457 sec
Episode 17946, loss:2.0416, succeed, steps:144, total reward:6.9171, 0.399979829788208 sec
Episode 17947, loss:0.1874, succeed, steps:37, total reward:1.0589, 0.11313581466674805 sec
Episode 17948, loss:0.0334, succeed, steps:37, total reward:4.2861, 0.09844660758972168 sec
Episode 17949, loss:0.0166, succeed, steps:73, total reward:3.4171, 0.1853783130645752 sec
Episode 17950, loss:3.0281, succeed, steps:82, total reward:1.3587, 0.21330690383911

Episode 18028, loss:-1.5018, fail, steps:161, total reward:0.0387, 0.4539906978607178 sec
Episode 18029, loss:0.1159, fail, steps:160, total reward:-6.3720, 0.5007286071777344 sec
Episode 18030, loss:-2.7858, fail, steps:160, total reward:-7.8484, 0.4508819580078125 sec
Episode 18031, loss:-1.7524, succeed, steps:101, total reward:1.0025, 0.29167747497558594 sec
Episode 18032, loss:-4.2479, fail, steps:159, total reward:-10.3533, 0.4319303035736084 sec
Episode 18033, loss:-7.6888, fail, steps:159, total reward:-12.8690, 0.44112181663513184 sec
Episode 18034, loss:-1.8783, fail, steps:160, total reward:-5.1163, 0.4472694396972656 sec
Episode 18035, loss:0.4715, fail, steps:160, total reward:-4.1163, 0.4481484889984131 sec
Episode 18036, loss:-0.5712, fail, steps:160, total reward:-4.6212, 0.4486362934112549 sec
Episode 18037, loss:-1.3991, fail, steps:159, total reward:-12.5598, 0.451124906539917 sec
Episode 18038, loss:-0.9977, fail, steps:160, total reward:-7.0212, 0.4503359794616699 

Episode 18118, loss:-13.2179, fail, steps:159, total reward:-10.9935, 0.44714784622192383 sec
Episode 18119, loss:-6.7026, fail, steps:159, total reward:-11.2099, 0.454312801361084 sec
Episode 18120, loss:-1.1691, fail, steps:159, total reward:-9.3434, 0.44608116149902344 sec
Episode 18121, loss:-1.7067, fail, steps:160, total reward:-5.3228, 0.44907426834106445 sec
Episode 18122, loss:-3.8050, fail, steps:159, total reward:-7.9106, 0.4486873149871826 sec
Episode 18123, loss:-3.7447, fail, steps:158, total reward:-11.9213, 0.4453577995300293 sec
Episode 18124, loss:-9.0767, fail, steps:160, total reward:-5.7556, 0.5036132335662842 sec
Episode 18125, loss:-3.9849, fail, steps:159, total reward:-7.4778, 0.4492487907409668 sec
Episode 18126, loss:-9.2868, fail, steps:158, total reward:-12.2099, 0.44374632835388184 sec
Episode 18127, loss:-9.2871, fail, steps:159, total reward:-10.9935, 0.44665074348449707 sec
Episode 18128, loss:-2.1434, fail, steps:159, total reward:-8.5499, 0.4526522159

Episode 18207, loss:-17.2204, fail, steps:158, total reward:-14.7377, 0.44925951957702637 sec
Episode 18208, loss:-21.1897, fail, steps:158, total reward:-12.7771, 0.44412970542907715 sec
Episode 18209, loss:-6.3077, fail, steps:158, total reward:-12.6328, 0.44395899772644043 sec
Episode 18210, loss:-3.7081, fail, steps:157, total reward:-15.4271, 0.4400510787963867 sec
Episode 18211, loss:-5.5597, fail, steps:158, total reward:-12.2721, 0.4434499740600586 sec
Episode 18212, loss:-1.2371, fail, steps:158, total reward:-13.2721, 0.45358943939208984 sec
Episode 18213, loss:-4.0030, fail, steps:158, total reward:-14.6435, 0.444011926651001 sec
Episode 18214, loss:-3.9939, fail, steps:158, total reward:-13.4164, 0.4446992874145508 sec
Episode 18215, loss:-5.4831, fail, steps:158, total reward:-14.7156, 0.44261908531188965 sec
Episode 18216, loss:-12.7972, fail, steps:158, total reward:-13.9213, 0.44535017013549805 sec
Episode 18217, loss:-1.5596, fail, steps:158, total reward:-14.2107, 0.4

Episode 18296, loss:-46.5840, fail, steps:160, total reward:-15.9007, 0.4489917755126953 sec
Episode 18297, loss:-32.8421, fail, steps:158, total reward:-15.4164, 0.4442460536956787 sec
Episode 18298, loss:-17.4493, fail, steps:158, total reward:-14.8393, 0.4441344738006592 sec
Episode 18299, loss:-12.3950, fail, steps:158, total reward:-14.2107, 0.44327592849731445 sec
Episode 18300, loss:-24.8433, fail, steps:158, total reward:-14.7156, 0.4472212791442871 sec
Episode 18301, loss:-57.1231, fail, steps:158, total reward:-15.4984, 0.4436016082763672 sec
Episode 18302, loss:-30.9489, fail, steps:157, total reward:-15.5714, 0.44112324714660645 sec
Episode 18303, loss:-36.3864, fail, steps:158, total reward:-12.9935, 0.4446909427642822 sec
Episode 18304, loss:-58.8183, fail, steps:157, total reward:-17.5091, 0.44684624671936035 sec
Episode 18305, loss:-60.7545, fail, steps:158, total reward:-12.9312, 0.4543943405151367 sec
Episode 18306, loss:-41.1684, fail, steps:158, total reward:-13.849

Episode 18385, loss:-17.1924, fail, steps:159, total reward:-12.5796, 0.4413728713989258 sec
Episode 18386, loss:-0.3394, succeed, steps:100, total reward:-1.6468, 0.2858846187591553 sec
Episode 18387, loss:-11.4437, fail, steps:160, total reward:-9.0746, 0.43238306045532227 sec
Episode 18388, loss:-12.1992, succeed, steps:137, total reward:-8.3962, 0.38156986236572266 sec
Episode 18389, loss:-25.0933, fail, steps:159, total reward:-13.1575, 0.44244956970214844 sec
Episode 18390, loss:-11.0266, fail, steps:161, total reward:-3.3319, 0.44947147369384766 sec
Episode 18391, loss:-17.5467, fail, steps:159, total reward:-9.6418, 0.4428575038909912 sec
Episode 18392, loss:-4.7105, succeed, steps:150, total reward:-4.5891, 0.42107152938842773 sec
Episode 18393, loss:-3.4878, fail, steps:162, total reward:-0.6597, 0.4502882957458496 sec
Episode 18394, loss:3.5663, succeed, steps:128, total reward:0.2378, 0.36884522438049316 sec
Episode 18395, loss:-1.7288, succeed, steps:111, total reward:-2.8

Episode 18474, loss:-36.1392, fail, steps:159, total reward:-11.4025, 0.44455695152282715 sec
Episode 18475, loss:-7.3215, fail, steps:160, total reward:-6.4082, 0.512260913848877 sec
Episode 18476, loss:-12.8354, fail, steps:161, total reward:-2.4860, 0.45104265213012695 sec
Episode 18477, loss:-20.4076, fail, steps:159, total reward:-10.9304, 0.44755029678344727 sec
Episode 18478, loss:-27.0051, fail, steps:159, total reward:-11.7239, 0.44545507431030273 sec
Episode 18479, loss:-12.9943, fail, steps:159, total reward:-12.9083, 0.45165228843688965 sec
Episode 18480, loss:-26.1836, fail, steps:159, total reward:-10.4353, 0.4457364082336426 sec
Episode 18481, loss:-26.2657, fail, steps:159, total reward:-10.8976, 0.44663548469543457 sec
Episode 18482, loss:-25.8153, fail, steps:158, total reward:-15.3838, 0.4447181224822998 sec
Episode 18483, loss:-0.7014, succeed, steps:81, total reward:0.7259, 0.23640012741088867 sec
Episode 18484, loss:-19.1126, fail, steps:160, total reward:-7.5525,

Episode 18563, loss:-7.0275, fail, steps:161, total reward:-3.6925, 0.45194387435913086 sec
Episode 18564, loss:-0.9184, succeed, steps:102, total reward:-2.4510, 0.2940206527709961 sec
Episode 18565, loss:-10.2412, fail, steps:159, total reward:-10.1468, 0.43233728408813477 sec
Episode 18566, loss:-8.6484, fail, steps:160, total reward:-4.6934, 0.4451615810394287 sec
Episode 18567, loss:-15.6874, fail, steps:160, total reward:-9.0025, 0.4493839740753174 sec
Episode 18568, loss:3.9743, succeed, steps:52, total reward:1.7241, 0.15791583061218262 sec
Episode 18569, loss:-5.5051, succeed, steps:84, total reward:-3.1612, 0.22201228141784668 sec
Episode 18570, loss:-15.6222, fail, steps:160, total reward:-9.1468, 0.420351505279541 sec
Episode 18571, loss:-6.9420, fail, steps:159, total reward:-11.6418, 0.43436646461486816 sec
Episode 18572, loss:-1.1440, fail, steps:161, total reward:-1.3319, 0.4447822570800781 sec
Episode 18573, loss:-2.6131, succeed, steps:109, total reward:-9.6316, 0.319

Episode 18652, loss:-19.3447, fail, steps:160, total reward:-6.8574, 0.4395558834075928 sec
Episode 18653, loss:-15.0983, fail, steps:161, total reward:-4.7745, 0.44569945335388184 sec
Episode 18654, loss:1.6908, succeed, steps:116, total reward:-0.3029, 0.3323514461517334 sec
Episode 18655, loss:-1.7272, succeed, steps:92, total reward:1.3953, 0.25646448135375977 sec
Episode 18656, loss:7.3776, succeed, steps:92, total reward:4.3339, 0.24985432624816895 sec
Episode 18657, loss:-4.0073, fail, steps:161, total reward:-4.7754, 0.4280726909637451 sec
Episode 18658, loss:-10.2726, fail, steps:160, total reward:-7.7853, 0.43804192543029785 sec
Episode 18659, loss:-0.2982, succeed, steps:114, total reward:2.3734, 0.32021474838256836 sec
Episode 18660, loss:-15.1209, fail, steps:159, total reward:-11.0025, 0.43909287452697754 sec
Episode 18661, loss:0.2781, succeed, steps:89, total reward:0.9732, 0.25628662109375 sec
Episode 18662, loss:2.3216, succeed, steps:117, total reward:5.4620, 0.36804

Episode 18741, loss:-2.5419, fail, steps:161, total reward:-2.2597, 0.42708754539489746 sec
Episode 18742, loss:2.2223, succeed, steps:63, total reward:2.8821, 0.1840522289276123 sec
Episode 18743, loss:0.7089, succeed, steps:103, total reward:-1.6346, 0.27266526222229004 sec
Episode 18744, loss:-5.0679, fail, steps:160, total reward:-6.9918, 0.4239528179168701 sec
Episode 18745, loss:-3.0481, fail, steps:162, total reward:-1.1647, 0.44368958473205566 sec
Episode 18746, loss:-3.0742, succeed, steps:119, total reward:-6.7351, 0.3332676887512207 sec
Episode 18747, loss:-8.8092, fail, steps:160, total reward:-7.5689, 0.43473243713378906 sec
Episode 18748, loss:-10.0217, fail, steps:159, total reward:-11.5075, 0.4456338882446289 sec
Episode 18749, loss:2.1191, succeed, steps:117, total reward:2.2906, 0.3294684886932373 sec
Episode 18750, loss:-0.9991, succeed, steps:141, total reward:-4.9897, 0.38576769828796387 sec
Episode 18751, loss:0.2459, succeed, steps:86, total reward:1.9217, 0.2426

Episode 18830, loss:-3.7175, fail, steps:160, total reward:-5.4868, 0.42742300033569336 sec
Episode 18831, loss:-12.8008, fail, steps:158, total reward:-12.8591, 0.4362316131591797 sec
Episode 18832, loss:-1.3668, fail, steps:160, total reward:-8.2319, 0.49828553199768066 sec
Episode 18833, loss:-13.8567, fail, steps:160, total reward:-6.6212, 0.44612860679626465 sec
Episode 18834, loss:-4.6967, fail, steps:160, total reward:-9.0025, 0.4533052444458008 sec
Episode 18835, loss:-10.3711, fail, steps:161, total reward:-4.4147, 0.4516277313232422 sec
Episode 18836, loss:1.1125, succeed, steps:72, total reward:1.5515, 0.2135181427001953 sec
Episode 18837, loss:-13.3573, fail, steps:159, total reward:-11.2910, 0.42331743240356445 sec
Episode 18838, loss:-1.4522, fail, steps:160, total reward:-7.2713, 0.4398319721221924 sec
Episode 18839, loss:-4.4037, fail, steps:159, total reward:-10.5697, 0.4444904327392578 sec
Episode 18840, loss:-1.4815, fail, steps:160, total reward:-6.1983, 0.447031736

Episode 18919, loss:-5.5462, fail, steps:160, total reward:-6.4147, 0.44420361518859863 sec
Episode 18920, loss:-0.0066, succeed, steps:101, total reward:3.3968, 0.28841090202331543 sec
Episode 18921, loss:-7.2115, fail, steps:162, total reward:-0.6597, 0.44054484367370605 sec
Episode 18922, loss:-2.4469, succeed, steps:130, total reward:-1.6385, 0.3685023784637451 sec
Episode 18923, loss:-6.1622, fail, steps:159, total reward:-11.2189, 0.4340476989746094 sec
Episode 18924, loss:-4.3155, fail, steps:159, total reward:-10.3697, 0.44146084785461426 sec
Episode 18925, loss:-12.2085, fail, steps:159, total reward:-11.7960, 0.4431769847869873 sec
Episode 18926, loss:-9.4247, fail, steps:159, total reward:-11.0124, 0.44780659675598145 sec
Episode 18927, loss:-7.6589, fail, steps:160, total reward:-6.5689, 0.4468696117401123 sec
Episode 18928, loss:1.1981, fail, steps:161, total reward:-2.6105, 0.44903111457824707 sec
Episode 18929, loss:-5.2525, fail, steps:161, total reward:-4.5590, 0.44995

Episode 19007, loss:-11.2804, fail, steps:160, total reward:-9.7239, 0.4470975399017334 sec
Episode 19008, loss:-3.0398, succeed, steps:115, total reward:-0.8865, 0.3273582458496094 sec
Episode 19009, loss:-11.9952, fail, steps:159, total reward:-10.6517, 0.4396657943725586 sec
Episode 19010, loss:-5.0421, fail, steps:160, total reward:-8.7131, 0.4452660083770752 sec
Episode 19011, loss:-3.6147, fail, steps:159, total reward:-10.4353, 0.4446287155151367 sec
Episode 19012, loss:-3.6097, fail, steps:160, total reward:-7.6475, 0.448559045791626 sec
Episode 19013, loss:-5.8263, fail, steps:161, total reward:-2.1975, 0.5066592693328857 sec
Episode 19014, loss:-101.6532, fail, steps:65, total reward:-10.4893, 0.1917896270751953 sec
Episode 19015, loss:-0.7825, succeed, steps:78, total reward:3.4588, 0.21013450622558594 sec
Episode 19016, loss:-0.0175, succeed, steps:64, total reward:-0.0664, 0.1713409423828125 sec
Episode 19017, loss:2.2609, succeed, steps:66, total reward:5.0329, 0.17366290

Episode 19096, loss:1.1669, succeed, steps:96, total reward:1.1583, 0.2760941982269287 sec
Episode 19097, loss:-12.5177, fail, steps:159, total reward:-12.3632, 0.4280738830566406 sec
Episode 19098, loss:-18.9601, fail, steps:160, total reward:-6.8475, 0.44455909729003906 sec
Episode 19099, loss:-8.2682, succeed, steps:82, total reward:-2.3669, 0.23523521423339844 sec
Episode 19100, loss:-13.8115, fail, steps:161, total reward:-7.2017, 0.4306755065917969 sec
Episode 19101, loss:6.6597, succeed, steps:56, total reward:2.4085, 0.1653273105621338 sec
Episode 19102, loss:-19.4342, fail, steps:160, total reward:-8.4246, 0.4208376407623291 sec
Episode 19103, loss:-11.8284, fail, steps:160, total reward:-7.3525, 0.4381134510040283 sec
Episode 19104, loss:11.0053, succeed, steps:134, total reward:9.3928, 0.3738245964050293 sec
Episode 19105, loss:-32.4598, fail, steps:159, total reward:-12.0124, 0.43607234954833984 sec
Episode 19106, loss:-24.5778, fail, steps:160, total reward:-7.3623, 0.4478

Episode 19185, loss:-3.7600, succeed, steps:134, total reward:2.5943, 0.3744547367095947 sec
Episode 19186, loss:-0.6523, succeed, steps:101, total reward:-0.5845, 0.28061580657958984 sec
Episode 19187, loss:-18.0604, fail, steps:159, total reward:-10.1468, 0.427170991897583 sec
Episode 19188, loss:-2.5161, succeed, steps:88, total reward:0.0224, 0.24891185760498047 sec
Episode 19189, loss:-7.0779, fail, steps:163, total reward:3.4625, 0.4359700679779053 sec
Episode 19190, loss:-9.2740, fail, steps:161, total reward:-2.2696, 0.44162702560424805 sec
Episode 19191, loss:5.4813, succeed, steps:93, total reward:1.9625, 0.2719078063964844 sec
Episode 19192, loss:-2.0819, succeed, steps:146, total reward:-0.7832, 0.3958864212036133 sec
Episode 19193, loss:-19.4967, fail, steps:160, total reward:-7.1459, 0.4411797523498535 sec
Episode 19194, loss:1.1681, succeed, steps:38, total reward:0.9933, 0.1177983283996582 sec
Episode 19195, loss:-2.7353, fail, steps:162, total reward:1.7510, 0.41736936

Episode 19274, loss:3.3289, succeed, steps:78, total reward:3.8194, 0.21809625625610352 sec
Episode 19275, loss:-0.4737, succeed, steps:64, total reward:3.8821, 0.17263293266296387 sec
Episode 19276, loss:-3.1866, succeed, steps:128, total reward:3.1764, 0.3320941925048828 sec
Episode 19277, loss:6.2980, succeed, steps:106, total reward:8.5804, 0.29424333572387695 sec
Episode 19278, loss:-2.4882, succeed, steps:118, total reward:-3.3079, 0.3177924156188965 sec
Episode 19279, loss:-0.4250, succeed, steps:82, total reward:1.2931, 0.2254018783569336 sec
Episode 19280, loss:-1.8123, fail, steps:161, total reward:-3.1253, 0.42627382278442383 sec
Episode 19281, loss:0.7001, succeed, steps:49, total reward:2.0791, 0.1428995132446289 sec
Episode 19282, loss:-0.5573, succeed, steps:67, total reward:-0.4378, 0.1756424903869629 sec
Episode 19283, loss:-7.9448, fail, steps:161, total reward:-3.8761, 0.4177875518798828 sec
Episode 19284, loss:-11.5999, fail, steps:161, total reward:-1.6925, 0.46732

Episode 19363, loss:-14.5121, fail, steps:161, total reward:-6.0017, 0.4202442169189453 sec
Episode 19364, loss:-15.2886, fail, steps:161, total reward:-5.2803, 0.43634653091430664 sec
Episode 19365, loss:-6.6238, succeed, steps:97, total reward:-4.2960, 0.27544236183166504 sec
Episode 19366, loss:-18.4676, fail, steps:159, total reward:-12.7239, 0.4262564182281494 sec
Episode 19367, loss:-4.0973, succeed, steps:108, total reward:3.4933, 0.3042469024658203 sec
Episode 19368, loss:0.6813, succeed, steps:34, total reward:4.0082, 0.1027071475982666 sec
Episode 19369, loss:-0.9655, succeed, steps:114, total reward:2.0849, 0.29517674446105957 sec
Episode 19370, loss:-3.3446, succeed, steps:88, total reward:-3.4703, 0.23954153060913086 sec
Episode 19371, loss:-6.8874, fail, steps:162, total reward:-1.1975, 0.4259519577026367 sec
Episode 19372, loss:0.6069, succeed, steps:108, total reward:-0.2552, 0.3015899658203125 sec
Episode 19373, loss:1.1030, succeed, steps:106, total reward:4.8483, 0.2

Episode 19453, loss:-4.2349, fail, steps:159, total reward:-11.8746, 0.42429542541503906 sec
Episode 19454, loss:2.0702, fail, steps:163, total reward:5.4109, 0.4449641704559326 sec
Episode 19455, loss:-4.0371, succeed, steps:78, total reward:-2.7069, 0.22691106796264648 sec
Episode 19456, loss:-2.2959, fail, steps:161, total reward:-4.1262, 0.48272132873535156 sec
Episode 19457, loss:1.0287, succeed, steps:81, total reward:3.5923, 0.23179292678833008 sec
Episode 19458, loss:-6.4880, fail, steps:160, total reward:-10.3632, 0.45101332664489746 sec
Episode 19459, loss:-1.4635, fail, steps:161, total reward:-4.9918, 0.4397563934326172 sec
Episode 19460, loss:-8.2262, fail, steps:159, total reward:-11.2910, 0.44072532653808594 sec
Episode 19461, loss:-6.0084, fail, steps:160, total reward:-10.2353, 0.4437987804412842 sec
Episode 19462, loss:-0.0592, fail, steps:161, total reward:-0.8269, 0.44729113578796387 sec
Episode 19463, loss:1.3597, succeed, steps:89, total reward:2.1995, 0.261684417

Episode 19542, loss:-8.1661, fail, steps:159, total reward:-11.6616, 0.4372878074645996 sec
Episode 19543, loss:0.4317, succeed, steps:106, total reward:1.9097, 0.3035552501678467 sec
Episode 19544, loss:0.9623, succeed, steps:49, total reward:1.8070, 0.13893508911132812 sec
Episode 19545, loss:5.5081, succeed, steps:98, total reward:6.8903, 0.255321741104126 sec
Episode 19546, loss:-9.9398, fail, steps:161, total reward:-5.4246, 0.48016810417175293 sec
Episode 19547, loss:0.5689, fail, steps:161, total reward:-4.6311, 0.4375951290130615 sec
Episode 19548, loss:0.8210, succeed, steps:102, total reward:4.1476, 0.28929972648620605 sec
Episode 19549, loss:-9.7333, fail, steps:161, total reward:-2.6826, 0.43776965141296387 sec
Episode 19550, loss:-17.0065, fail, steps:159, total reward:-16.0624, 0.43877172470092773 sec
Episode 19551, loss:-4.3721, succeed, steps:141, total reward:-4.8585, 0.3923325538635254 sec
Episode 19552, loss:-2.9765, fail, steps:161, total reward:-5.2082, 0.445494413

In [None]:
# Testing code

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def compute_entropy(N, alpha=1):
    K = 2**N
    values = np.arange(K)
    unnormalized_probs = np.exp(-alpha * values)
    Z = unnormalized_probs.sum()
    probs = unnormalized_probs / Z
    return values, -np.log2(probs)

# Parameters
N = 4  # Number of bits
alpha = 1  # Decay rate

# Compute probabilities
values, probs = compute_entropy(N, alpha)

# Plotting
plt.figure(figsize=(10, 6))
plt.bar(values, probs, width=0.8, alpha=0.7, edgecolor="black")
plt.title(f"Exponential Decay Probabilities (N={N}, alpha={alpha})")
plt.xlabel("Integer Value")
plt.ylabel("Probability")
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.show()

In [None]:
def compute_min_delta(comparisons):
    # Initialize the result list to store minDelta values
    min_delta = []

    # Iterate through each pair in the comparisons list
    for i, (xi, yi) in enumerate(comparisons):
        if i == 0:
            # For i = 0, use the first case directly
            min_delta.append((xi, min(yi, yi - xi), 0))
        else:
            # For i > 0, compute all possible options and select the minimal one
            options = []

            # Simple Entropy
            simple_entropy = (xi, min(yi, yi - xi), 0)
            options.append(simple_entropy)

            # First Delta Entropy
            xi_prev, yi_prev = comparisons[i - 1]
            first_delta_entropy = (xi - xi_prev, yi - yi_prev, 0)
            options.append(first_delta_entropy)

            # Second Delta Entropy (only valid for i > 1)
            if i > 1:
                xi_prev2, yi_prev2 = comparisons[i - 2]
                second_delta_entropy = (
                    (xi - xi_prev) - (xi_prev - xi_prev2),
                    (yi - yi_prev) - (yi_prev - yi_prev2),
                    0,
                )
                options.append(second_delta_entropy)

            # Arbitrary Position Entropy (only valid for i > 1)
            for j in range(i):
                xj, yj = comparisons[j]
                arbitrary_position_entropy = (
                    xi - xj,
                    yi - yj,
                    min(j, i - j),
                )
                options.append(arbitrary_position_entropy)

            # Find the option with the minimal sum
            min_delta.append(min(options, key=lambda t: sum([abs(x) for x in t])))

    return min_delta


# Example Usage
comparisons = [(1, 5), (2, 7), (4, 10), (8, 15)]
result = compute_min_delta(comparisons)

# Output the results
for i, delta in enumerate(result):
    print(f"minDelta for comparison {i}: {delta}")

def compute_min_delta_entropy(comparisons):
    # Initialize the result list to store minDelta values
    min_delta = None

    # Iterate through each pair in the comparisons list
    i = len(comparisons) - 1
    xi, yi = comparisons[i]
    if i == 0:
        # For i = 0, use the first case directly
        min_delta = (xi, min(yi, yi - xi), 0)
    else:
        # For i > 0, compute all possible options and select the minimal one
        options = []

        # Simple Entropy
        simple_entropy = (xi, min(yi, yi - xi), 0)
        options.append(simple_entropy)

        # First Delta Entropy
        xi_prev, yi_prev = comparisons[i - 1]
        first_delta_entropy = (xi - xi_prev, yi - yi_prev, 0)
        options.append(first_delta_entropy)

        # Second Delta Entropy (only valid for i > 1)
        if i > 1:
            xi_prev2, yi_prev2 = comparisons[i - 2]
            second_delta_entropy = (
                (xi - xi_prev) - (xi_prev - xi_prev2),
                (yi - yi_prev) - (yi_prev - yi_prev2),
                0,
            )
            options.append(second_delta_entropy)

        # Arbitrary Position Entropy (only valid for i > 1)
        for j in range(i):
            xj, yj = comparisons[j]
            arbitrary_position_entropy = (
                xi - xj,
                yi - yj,
                min(j, i - j),
            )
            options.append(arbitrary_position_entropy)

        # Find the option with the minimal sum
        min_delta = min(options, key=lambda t: sum([get_entropy_of_integer(x) for x in t]))

    return sum([get_entropy_of_integer(x) for x in min_delta])

compute_min_delta_entropy(comparisons)

In [None]:

def test_case_1():
    # Test with a single comparison
    comparisons = [(3, 8)]
    expected_result = [(3, 5, 0)]
    assert compute_min_delta(comparisons) == expected_result

def test_case_2():
    # Test with multiple comparisons
    comparisons = [(1, 5), (2, 7), (4, 10), (8, 15)]
    expected_result = [
        (1, 4, 0),  # Simple Entropy for the first pair
        (1, 2, 0),  # Minimal sum using First Delta Entropy
        (1, 1, 0),  # Minimal sum using First Delta Entropy
        (2, 2, 0),  # Minimal sum using First Delta Entropy
    ]
    assert compute_min_delta(comparisons) == expected_result

def test_case_3():
    # Test with comparisons where the minimal sum comes from Arbitrary Position Entropy
    comparisons = [(1, 10), (3, 12), (5, 14), (1, 11)]
    expected_result = [
        (1, 9, 0),  # Simple Entropy for the first pair
        (2, 2, 0),  # Minimal sum using First Delta Entropy
        (0, 0, 0),  # Minimal sum using First Delta Entropy
        (0, 1, 0),  # Minimal sum using Arbitrary Position Entropy
    ]
    assert compute_min_delta(comparisons) == expected_result

test_case_1()
test_case_2()
test_case_3()
print("All cases succeeded.")