In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.optim.lr_scheduler as lr_scheduler
import math
import random

MIN_LIST_LEN = 16
MAX_LIST_LEN = 16
MAX_STEPS = 640

SUCCESS_REWARD = 0.5
STEP_REWARD = -0.3
COMPARISON_ENTROPY_MULTIPLIER = -0.00
SWAP_REWARD = 1.0
INVALID_ACTION_REWARD = -10.0
LONGTERM_GAMMA = 0.99
SHORTTERM_GAMMA = 0.7

EPS_START = 0.5
EPS_END = 0.05
EPS_DECAY = 1000
LR_SCHEDULER_GAMMA = 0.96
NUM_EPISODES = 200000
EPISODES_SAVE = 1000
OUTPUT_DIR = '/home/mcwave/code/autocode/datasets/rl_sort_transformer_easy/list16_transformer4_192_gamma07_step640_v3'

# Define the vocabulary
vocab = {
    'Comparison': 0,
    'Swap': 1,
    'less': 2,
    'equal': 3,
    'more': 4,
    '0': 5,
    '1': 6,
    '2': 7,
    '3': 8,
    '4': 9,
    '5': 10,
    '6': 11,
    '7': 12,
    '8': 13,
    '9': 14,
    '10': 15,
    '11': 16,
    '12': 17,
    '13': 18,
    '14': 19,
    '15': 20,
    'len1': 21,
    'len2': 22,
    'len3': 23,
    'len4': 24,
    'len5': 25,
    'len6': 26,
    'len7': 27,
    'len8': 28,
    'len9': 29,
    'len10': 30,
    'len11': 31,
    'len12': 32,
    'len13': 33,
    'len14': 34,
    'len15': 35,
    'len16': 36,
}
inv_vocab = {v: k for k, v in vocab.items()}

def compute_entropy(N, alpha=1):
    K = 2**N
    values = np.arange(K)
    unnormalized_probs = np.exp(-alpha * values)
    Z = unnormalized_probs.sum()
    probs = unnormalized_probs / Z
    return values, -np.log2(probs)

_, int_entropy = compute_entropy(4)

def get_entropy_of_integer(x):
    x = min(15, abs(x))
    return int_entropy[x]

def compute_min_delta_entropy(comparisons):
    # Initialize the result list to store minDelta values
    min_delta = None

    # Iterate through each pair in the comparisons list
    i = len(comparisons) - 1
    xi, yi = comparisons[i]
    if i == 0:
        # For i = 0, use the first case directly
        min_delta = (xi, min(yi, yi - xi), 0)
    else:
        # For i > 0, compute all possible options and select the minimal one
        options = []

        # Simple Entropy
        simple_entropy = (xi, min(yi, yi - xi), 0)
        options.append(simple_entropy)

        # First Delta Entropy
        xi_prev, yi_prev = comparisons[i - 1]
        first_delta_entropy = (xi - xi_prev, yi - yi_prev, 0)
        options.append(first_delta_entropy)

        # Second Delta Entropy (only valid for i > 1)
        if i > 1:
            xi_prev2, yi_prev2 = comparisons[i - 2]
            second_delta_entropy = (
                (xi - xi_prev) - (xi_prev - xi_prev2),
                (yi - yi_prev) - (yi_prev - yi_prev2),
                0,
            )
            options.append(second_delta_entropy)

        # Arbitrary Position Entropy (only valid for i > 1)
        for j in range(i):
            xj, yj = comparisons[j]
            arbitrary_position_entropy = (
                xi - xj,
                yi - yj,
                min(j, i - j),
            )
            options.append(arbitrary_position_entropy)

        # Find the option with the minimal sum
        min_delta = min(options, key=lambda t: sum([get_entropy_of_integer(x) for x in t]))

    entropy = sum([get_entropy_of_integer(x) for x in min_delta])
    if len(comparisons) == 1:
        return 3 * entropy
    else:
        return entropy

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define the environment
class SortingEnv:
    def __init__(self):
        self.max_steps = MAX_STEPS

    def reset(self):
        self.length = random.randint(MIN_LIST_LEN, MAX_LIST_LEN)
        self.list = [random.randint(1, 100) for _ in range(self.length)]
        while self.list == sorted(self.list):
            self.list = [random.randint(1, 100) for _ in range(self.length)]
        self.indices = None
        self.current_step = 0
        self.done = False
        initial_token = 'len{}'.format(self.length)
        return vocab[initial_token], self.list.copy()
    
    def get_list(self):
        return self.list
    
    def get_list_len(self):
        return len(self.list)

    def step(self, action_tokens):
        action = action_tokens[0]
        reward = -0.01  # default penalty
        response_token = None

        if action == vocab['Comparison']:
            if len(action_tokens) != 3:
                reward = INVALID_ACTION_REWARD
                self.done = True
                return response_token, reward, self.done, self.list.copy()
            index1 = action_tokens[1] - vocab['0']
            index2 = action_tokens[2] - vocab['0']
            if index1 >= self.length or index2 >= self.length or index1 < 0 or index2 < 0:
                reward = INVALID_ACTION_REWARD
                self.done = True
                return response_token, reward, self.done, self.list.copy()
            self.indices = (index1, index2)
            if self.list[index1] < self.list[index2]:
                response_token = vocab['less']
                reward = STEP_REWARD
            elif self.list[index1] == self.list[index2]:
                response_token = vocab['equal']
                reward = STEP_REWARD * 2
            else:
                response_token = vocab['more']
                reward = STEP_REWARD
        elif action == vocab['Swap']:
            if self.indices is None:
                reward = INVALID_ACTION_REWARD
                self.done = True
                return response_token, reward, self.done, self.list.copy()
            index1, index2 = self.indices
            prev_list = self.list.copy()
            self.list[index1], self.list[index2] = self.list[index2], self.list[index1]
            if self.list == sorted(self.list):
                reward = SUCCESS_REWARD
                self.done = True
            #elif prev_list[index1] > prev_list[index2] and self.list[index1] <= self.list[index2]:
            #    reward = 0.1
            elif (index1 < index2 and prev_list[index1] > prev_list[index2] and self.list[index1] <= self.list[index2]) or \
                (index1 > index2 and prev_list[index1] < prev_list[index2] and self.list[index1] >= self.list[index2]):
                reward = SWAP_REWARD
            elif (index1 < index2 and prev_list[index1] < prev_list[index2] and self.list[index1] >= self.list[index2]) or \
                (index1 > index2 and prev_list[index1] > prev_list[index2] and self.list[index1] <= self.list[index2]):
                reward = -SWAP_REWARD
            else:
                reward = STEP_REWARD
            self.indices = None
        else:
            reward = INVALID_ACTION_REWARD
            self.done = True

        self.current_step += 1
        if self.current_step >= self.max_steps:
            self.done = True
        return response_token, reward, self.done, self.list.copy()


Using device: cuda


In [2]:
# Positional Encoding for Transformer
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=640):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=0.1)

        pe = torch.zeros(max_len, d_model)  # (max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)  # (max_len, 1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() *
                             (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)  # Even indices
        pe[:, 1::2] = torch.cos(position * div_term)  # Odd indices
        pe = pe.unsqueeze(1)  # (max_len, 1, d_model)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)

# Transformer Model
class TransformerModel(nn.Module):
    def __init__(self, vocab_size, d_model=192, nhead=8, num_layers=4):
        super(TransformerModel, self).__init__()
        self.model_type = 'Transformer'
        self.d_model = d_model

        self.embedding = nn.Embedding(vocab_size, d_model)
        self.pos_encoder = PositionalEncoding(d_model)
        encoder_layers = nn.TransformerEncoderLayer(d_model, nhead)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        self.decoder = nn.Linear(d_model, vocab_size)

        self.init_weights()

    def init_weights(self):
        initrange = 0.1
        nn.init.uniform_(self.embedding.weight, -initrange, initrange)
        nn.init.zeros_(self.decoder.bias)
        nn.init.uniform_(self.decoder.weight, -initrange, initrange)

    def forward(self, src):
        src = self.embedding(src) * math.sqrt(self.d_model)
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src)
        output = self.decoder(output)
        return output

def decode(input_tokens, inv_vocab):
    return ' '.join([inv_vocab[x] for x in input_tokens])


def save_checkpoint(model, optimizer, episode, folder, filename):
    """
    Save the model and optimizer state to the designated filepath.

    Args:
        model (nn.Module): The model to save.
        optimizer (torch.optim.Optimizer): The optimizer whose state to save.
        episode (int): The current episode number.
        filepath (str): The path where to save the checkpoint.
    """
    filepath = os.path.join(folder, filename)
    # Ensure the directory exists
    os.makedirs(os.path.dirname(filepath), exist_ok=True)
    # Save the checkpoint
    torch.save({
        'episode': episode,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
    }, filepath)
    print(f"Checkpoint saved at episode {episode} to {filepath}")

def load_checkpoint(filepath, model, optimizer):
    """
    Load the model and optimizer state from the designated filepath.

    Args:
        filepath (str): The path from where to load the checkpoint.
        model (nn.Module): The model into which to load the state_dict.
        optimizer (torch.optim.Optimizer): The optimizer into which to load the state.

    Returns:
        int: The episode number to resume from.
    """
    if os.path.isfile(filepath):
        checkpoint = torch.load(filepath, map_location=device)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        episode = checkpoint['episode']
        print(f"Checkpoint loaded from {filepath}, resuming from episode {episode}")
        return episode
    else:
        print(f"No checkpoint found at {filepath}, starting from scratch.")
        return 0

In [3]:
def compute_bellman_returns(raw_rewards, gamma):
    bellman_returns = []
    R = 0
    for r in raw_rewards[::-1]:
        R = r + gamma * R
        bellman_returns.insert(0, R)
    return bellman_returns

# Training Loop
def train(verbose=False):
    # Removed torch.autograd.set_detect_anomaly(True)
    vocab_size = len(vocab)
    model = TransformerModel(vocab_size).to(device)
    optimizer = optim.Adam(model.parameters(), lr=2.3e-5)  # Reduced learning rate
    scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=LR_SCHEDULER_GAMMA)
    
    # Optionally, load a checkpoint
    checkpoint_path = os.path.join(OUTPUT_DIR, "ckpt_36000_0.6820_426.26.pth")
    load_checkpoint(checkpoint_path, model, optimizer)

    episode_cnt = 0
    total_reward = 0.0
    num_successes = 0
    success_step_cnts = []
    
    for episode in range(NUM_EPISODES):
        t1 = time.time()
        model.train()  # Set model to training mode
        env = SortingEnv()
        initial_token_id, current_list = env.reset()
        input_tokens = [initial_token_id]
        log_probs = []
        rewards = []
        comparisons = []
        steps = 0
        
        state = 'expect_action'
        done = False
        success = False

        while not done and len(input_tokens) < env.max_steps:
            if verbose:
                print(decode(input_tokens, inv_vocab))
                print(env.get_list())
                print(comparisons)
            # Prepare input tensor
            input_seq = torch.tensor(input_tokens, dtype=torch.long, device=device).unsqueeze(1)  # (seq_len, batch_size)
            # Get model output
            with torch.autocast(device_type=device.type, dtype=torch.bfloat16):
                output = model(input_seq)  # (seq_len, batch_size, vocab_size)
                # Get logits for the last token
                logits = output[-1, 0, :]  # (vocab_size)

                # Check for NaNs in logits
                if torch.isnan(logits).any():
                    print(f"Episode {episode}, NaNs in logits before masking.")
                    break

                # Get valid tokens based on state
                def get_valid_tokens(state):
                    action_tokens = [vocab['Comparison'], vocab['Swap']]
                    index_tokens = [vocab[str(i)] for i in range(env.length)]
                    if state == 'expect_action':
                        return action_tokens
                    elif state == 'expect_index1':
                        return index_tokens[:-1]
                    elif state == 'expect_index2':
                        return [x for x in index_tokens if x > input_tokens[-1]]
                    else:
                        # Handle unexpected states by defaulting to expect_action
                        return action_tokens

                valid_token_ids = get_valid_tokens(state)

                # Ensure valid_token_ids are within the vocab range
                if any(idx >= vocab_size or idx < 0 for idx in valid_token_ids):
                    print(f"Episode {episode}, invalid indices in valid_token_ids: {valid_token_ids}")
                    break

                # Mask invalid tokens
                mask_value = -1e9  # Use a large negative value instead of -inf
                mask = torch.full_like(logits, mask_value).to(device)
                mask[valid_token_ids] = 0
                masked_logits = logits + mask

                # Sample action. Have some chance to randomly pick a valid action.
                eps_threshold = EPS_END + (EPS_START - EPS_END) * np.exp(-1.0 * episode / EPS_DECAY)
                if random.random() < eps_threshold:
                    masked_logits = masked_logits / 4

                # Check for NaNs in masked_logits
                if torch.isnan(masked_logits).any():
                    print(f"Episode {episode}, NaNs in masked_logits after masking.")
                    break

                # Compute probabilities
                probs = F.softmax(masked_logits, dim=0)

                # Check for NaNs in probs
                if torch.isnan(probs).any():
                    print(f"Episode {episode}, NaNs in probs after softmax.")
                    break

                try:
                    m = torch.distributions.Categorical(probs)
                    action_token = m.sample()
                    log_prob = m.log_prob(action_token)
                except ValueError as e:
                    print(f"Episode {episode}, error in sampling action: {e}")
                    break

            log_probs.append(log_prob)
            input_tokens.append(action_token.item())

            action = action_token.item()
            reward = 0.0
            if state == 'expect_action':
                if action == vocab['Comparison']:
                    state = 'expect_index1'
                elif action == vocab['Swap']:
                    if env.indices is None:
                        reward = INVALID_ACTION_REWARD
                        rewards.append(reward)
                        done = True
                        continue
                    action_tokens = [vocab['Swap']]
                    response_token, reward, done, current_list = env.step(action_tokens)
                    steps += 1
                    if done and reward == SUCCESS_REWARD:
                        success = True
                        if episode % 100 == 0:
                            print(decode(input_tokens, inv_vocab))
                    if verbose:
                        print("Reward:", reward)
                    state = 'expect_action'
                else:
                    reward = INVALID_ACTION_REWARD
                    done = True
            elif state == 'expect_index1':
                index1_token = action_token
                state = 'expect_index2'
            elif state == 'expect_index2':
                index2_token = action_token
                action_tokens = [vocab['Comparison'], index1_token.item(), index2_token.item()]
                comparisons.append((int(inv_vocab[index1_token.item()]), 
                                    int(inv_vocab[index2_token.item()])))
                response_token, reward, done, current_list = env.step(action_tokens)
                steps += 1
                if done and reward == SUCCESS_REWARD:
                    success = True
                    if episode % 100 == 0:
                        print(1, decode(input_tokens, inv_vocab))
                else:
                    reward += COMPARISON_ENTROPY_MULTIPLIER * compute_min_delta_entropy(comparisons)
                if verbose:
                    print("Reward:", reward)
                if response_token is not None:
                    input_tokens.append(response_token)
                state = 'expect_action'
            else:
                reward = INVALID_ACTION_REWARD
                done = True

            rewards.append(reward)
        #
        # Post processing of each episode
        success_rewards = [0.0] * len(rewards)
        if success: 
            num_successes += 1
            success_rewards[-1] = SUCCESS_REWARD
            success_step_cnts.append(steps)
        #
        assert len(log_probs) == len(rewards), "log_probs and returns have different sizes!"

        if len(log_probs) == 0:
            continue  # Skip if no actions were taken

        # Compute returns and loss within autocast
        with torch.autocast(device_type=device.type, dtype=torch.bfloat16):
            # Compute returns
            returns1 = compute_bellman_returns(rewards, SHORTTERM_GAMMA)
            returns2 = compute_bellman_returns(success_rewards, LONGTERM_GAMMA)
            returns = torch.tensor(np.array(returns1) + np.array(returns2)).to(device)

            # Check for NaNs in returns
            if torch.isnan(returns).any():
                print(f"Episode {episode}, NaNs in returns.")
                continue

            # Compute loss
            loss = 0
            for log_prob, R in zip(log_probs, returns):
                loss -= log_prob * R

            # Check for NaNs in loss
            if torch.isnan(loss):
                print(f"Episode {episode}, NaN in loss.")
                continue

        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping
        optimizer.step()

        episode_cnt += 1
        total_reward += sum(rewards)
        t2 = time.time()
        if episode % 1 == 0:
            print(f"Episode {episode}, loss:{loss.item():.4f}, {'succeed' if success else 'fail'}, steps:{steps}, total reward:{sum(rewards):.4f}, {t2-t1} sec")
            
        # Save checkpoint
        if episode > 0 and episode % EPISODES_SAVE == 0:
            avg_reward = total_reward / episode_cnt
            #assert len(success_step_cnts) == num_successes
            success_rate = num_successes / episode_cnt
            avg_steps = sum(success_step_cnts) / len(success_step_cnts)
            episode_cnt = 0
            total_reward = 0.0
            num_successes = 0
            total_steps = 0
            save_checkpoint(model, optimizer, episode, OUTPUT_DIR, f"ckpt_{episode}_{success_rate:.4f}_{avg_steps:.2f}.pth")
            #
            # Reduce the lr
            scheduler.step()
            # Optionally, log the learning rate
            current_lr = scheduler.get_last_lr()[0]
            print(f"Learning rate = {current_lr:.6f}")

if __name__ == "__main__":
    train(verbose=False)




Checkpoint loaded from /home/mcwave/code/autocode/datasets/rl_sort_transformer_easy/list16_transformer4_192_gamma07_step640_v3/ckpt_36000_0.6820_426.26.pth, resuming from episode 36000
Episode 0, loss:-12.1249, fail, steps:205, total reward:14.7000, 2.1134471893310547 sec
Episode 1, loss:-18.4284, fail, steps:205, total reward:16.5000, 1.5575354099273682 sec
Episode 2, loss:-12.8559, fail, steps:201, total reward:11.2000, 1.6232504844665527 sec
Episode 3, loss:-3.7204, fail, steps:205, total reward:17.2000, 1.660752534866333 sec
Episode 4, loss:-37.2764, fail, steps:199, total reward:2.5000, 1.7681288719177246 sec
Episode 5, loss:2.4569, succeed, steps:176, total reward:5.5000, 1.5079686641693115 sec
Episode 6, loss:-13.2953, succeed, steps:175, total reward:-7.5000, 1.566814661026001 sec
Episode 7, loss:7.1159, succeed, steps:199, total reward:9.1000, 1.7064220905303955 sec
Episode 8, loss:-33.6386, fail, steps:192, total reward:-1.7000, 1.6696302890777588 sec
Episode 9, loss:-39.7963

Episode 90, loss:-0.8651, succeed, steps:146, total reward:-1.9000, 1.3780906200408936 sec
Episode 91, loss:-20.0591, fail, steps:205, total reward:14.4000, 1.8467352390289307 sec
Episode 92, loss:12.1040, succeed, steps:127, total reward:7.6000, 1.1219196319580078 sec
Episode 93, loss:4.9097, succeed, steps:107, total reward:2.5000, 0.8583855628967285 sec
Episode 94, loss:20.5262, succeed, steps:195, total reward:31.4000, 1.4312350749969482 sec
Episode 95, loss:-9.6437, fail, steps:207, total reward:19.5000, 1.7678391933441162 sec
Episode 96, loss:-32.9448, fail, steps:198, total reward:4.5000, 1.6864008903503418 sec
Episode 97, loss:-21.0945, fail, steps:202, total reward:10.7000, 1.7238752841949463 sec
Episode 98, loss:1.7919, succeed, steps:170, total reward:9.6000, 1.4617316722869873 sec
Episode 99, loss:-18.0128, fail, steps:205, total reward:16.0000, 1.8613512516021729 sec
Episode 100, loss:-18.5110, fail, steps:205, total reward:16.0000, 1.6974968910217285 sec
Episode 101, loss

Episode 181, loss:2.5503, succeed, steps:192, total reward:6.9000, 1.7308392524719238 sec
Episode 182, loss:-22.7044, fail, steps:200, total reward:9.0000, 1.7265114784240723 sec
Episode 183, loss:-3.3740, succeed, steps:149, total reward:0.3000, 1.3452038764953613 sec
Episode 184, loss:-30.1162, fail, steps:195, total reward:-2.5000, 1.825821876525879 sec
Episode 185, loss:25.6338, succeed, steps:190, total reward:29.3000, 1.5035817623138428 sec
Episode 186, loss:19.5945, succeed, steps:193, total reward:23.5000, 1.6486856937408447 sec
Episode 187, loss:11.9805, succeed, steps:192, total reward:19.4000, 1.6038494110107422 sec
Episode 188, loss:16.2482, succeed, steps:203, total reward:20.9000, 1.704829216003418 sec
Episode 189, loss:10.3301, succeed, steps:186, total reward:13.6000, 1.5543601512908936 sec
Episode 190, loss:10.0821, succeed, steps:187, total reward:16.1000, 1.5101289749145508 sec
Episode 191, loss:-14.6805, fail, steps:203, total reward:14.5000, 1.8178822994232178 sec


Episode 243, loss:-22.6979, fail, steps:197, total reward:1.7000, 1.9271876811981201 sec
Episode 244, loss:-38.8428, fail, steps:196, total reward:0.7000, 1.7080776691436768 sec
Episode 245, loss:13.6587, succeed, steps:145, total reward:13.2000, 1.2164998054504395 sec
Episode 246, loss:10.4879, succeed, steps:192, total reward:11.9000, 1.6333484649658203 sec
Episode 247, loss:1.2403, succeed, steps:168, total reward:14.8000, 1.2651076316833496 sec
Episode 248, loss:-13.1353, fail, steps:204, total reward:15.5000, 1.7758331298828125 sec
Episode 249, loss:-2.4513, succeed, steps:187, total reward:12.3000, 1.5496606826782227 sec
Episode 250, loss:3.2827, succeed, steps:199, total reward:13.9000, 1.7679364681243896 sec
Episode 251, loss:-32.3780, fail, steps:200, total reward:9.3000, 1.7252285480499268 sec
Episode 252, loss:6.2229, succeed, steps:157, total reward:18.7000, 1.2982289791107178 sec
Episode 253, loss:1.1644, succeed, steps:167, total reward:7.9000, 1.3394825458526611 sec
Epis

Episode 308, loss:-22.4868, fail, steps:196, total reward:4.6000, 1.7531476020812988 sec
Episode 309, loss:-13.0904, fail, steps:210, total reward:23.8000, 1.7100038528442383 sec
Episode 310, loss:-23.6692, fail, steps:202, total reward:12.0000, 1.7096936702728271 sec
Episode 311, loss:-15.8731, fail, steps:202, total reward:10.4000, 1.742966651916504 sec
Episode 312, loss:1.5396, succeed, steps:173, total reward:17.8000, 1.4991023540496826 sec
Episode 313, loss:13.4320, succeed, steps:163, total reward:23.4000, 1.2272820472717285 sec
Episode 314, loss:8.5851, succeed, steps:124, total reward:15.6000, 0.9209718704223633 sec
Episode 315, loss:3.2167, succeed, steps:194, total reward:9.3000, 1.6159512996673584 sec
Episode 316, loss:14.7515, succeed, steps:197, total reward:33.2000, 1.4990198612213135 sec
Episode 317, loss:-15.3696, fail, steps:201, total reward:10.6000, 1.8313302993774414 sec
Episode 318, loss:-1.0052, fail, steps:209, total reward:21.9000, 1.7225990295410156 sec
Episode

Episode 399, loss:12.4863, succeed, steps:161, total reward:27.0000, 1.123887300491333 sec
Episode 400, loss:-16.5828, fail, steps:209, total reward:23.1000, 1.802739143371582 sec
Episode 401, loss:9.0699, succeed, steps:157, total reward:23.4000, 1.2697856426239014 sec
Episode 402, loss:8.7646, succeed, steps:147, total reward:20.1000, 1.112781047821045 sec
Episode 403, loss:6.0563, succeed, steps:191, total reward:4.5000, 1.8112120628356934 sec
Episode 404, loss:-13.8226, fail, steps:209, total reward:23.1000, 1.7396607398986816 sec
Episode 405, loss:-7.6931, fail, steps:211, total reward:26.4000, 1.725060224533081 sec
Episode 406, loss:-10.6008, fail, steps:205, total reward:14.7000, 1.7393646240234375 sec
Episode 407, loss:10.6486, succeed, steps:155, total reward:14.0000, 1.2726647853851318 sec
Episode 408, loss:8.0729, succeed, steps:162, total reward:19.3000, 1.304382562637329 sec
Episode 409, loss:-15.4056, fail, steps:202, total reward:12.2000, 1.8057301044464111 sec
Episode 4

Episode 490, loss:12.8354, succeed, steps:185, total reward:20.5000, 1.5601916313171387 sec
Episode 491, loss:4.5144, succeed, steps:167, total reward:12.5000, 1.3805444240570068 sec
Episode 492, loss:9.8456, succeed, steps:154, total reward:22.2000, 1.1131277084350586 sec
Episode 493, loss:2.0672, succeed, steps:203, total reward:15.3000, 1.792551040649414 sec
Episode 494, loss:-18.1283, fail, steps:205, total reward:17.2000, 1.7298851013183594 sec
Episode 495, loss:-25.8234, fail, steps:195, total reward:-1.0000, 1.7568600177764893 sec
Episode 496, loss:-3.4025, fail, steps:209, total reward:19.8000, 1.7702040672302246 sec
Episode 497, loss:0.0032, fail, steps:215, total reward:31.7000, 1.768958568572998 sec
Episode 498, loss:7.5232, succeed, steps:191, total reward:20.2000, 1.6276311874389648 sec
Episode 499, loss:-17.8282, fail, steps:204, total reward:9.5000, 1.7830791473388672 sec
len16 Comparison 9 10 less Comparison 8 9 more Swap Comparison 7 8 more Swap Comparison 6 7 equal Co

Episode 558, loss:10.3686, succeed, steps:166, total reward:22.5000, 1.463252067565918 sec
Episode 559, loss:8.7117, succeed, steps:157, total reward:9.5000, 1.2382359504699707 sec
Episode 560, loss:-4.8924, succeed, steps:173, total reward:-7.3000, 1.522261619567871 sec
Episode 561, loss:6.2403, succeed, steps:151, total reward:9.9000, 1.1761481761932373 sec
Episode 562, loss:-14.3378, fail, steps:200, total reward:8.1000, 1.854560375213623 sec
Episode 563, loss:13.2096, succeed, steps:175, total reward:23.1000, 1.5264971256256104 sec
Episode 564, loss:1.7985, succeed, steps:159, total reward:7.6000, 1.2683300971984863 sec
Episode 565, loss:10.4801, succeed, steps:116, total reward:20.3000, 0.8404257297515869 sec
Episode 566, loss:0.8637, succeed, steps:147, total reward:10.0000, 1.0591111183166504 sec
Episode 567, loss:9.6858, succeed, steps:107, total reward:9.0000, 0.7834446430206299 sec
Episode 568, loss:-7.2318, succeed, steps:198, total reward:3.6000, 1.7120866775512695 sec
Epis

Episode 649, loss:-12.6441, fail, steps:204, total reward:14.3000, 1.7583272457122803 sec
Episode 650, loss:5.9585, succeed, steps:208, total reward:21.6000, 1.7658991813659668 sec
Episode 651, loss:-11.8607, fail, steps:202, total reward:13.5000, 1.7416293621063232 sec
Episode 652, loss:8.0974, fail, steps:212, total reward:27.4000, 1.8150606155395508 sec
Episode 653, loss:2.7312, succeed, steps:158, total reward:11.0000, 1.3987071514129639 sec
Episode 654, loss:-8.3910, fail, steps:205, total reward:16.5000, 1.9171173572540283 sec
Episode 655, loss:6.0357, succeed, steps:137, total reward:1.3000, 1.2744522094726562 sec
Episode 656, loss:-20.7373, fail, steps:191, total reward:-4.8000, 2.0337493419647217 sec
Episode 657, loss:-14.0592, fail, steps:201, total reward:11.2000, 1.7880091667175293 sec
Episode 658, loss:-17.8985, fail, steps:204, total reward:13.4000, 1.7061409950256348 sec
Episode 659, loss:7.2968, succeed, steps:204, total reward:18.4000, 1.7539167404174805 sec
Episode 66

Episode 740, loss:8.2927, succeed, steps:147, total reward:9.4000, 1.35154128074646 sec
Episode 741, loss:4.7753, succeed, steps:199, total reward:19.3000, 1.7317397594451904 sec
Episode 742, loss:-14.7918, fail, steps:203, total reward:11.8000, 1.7973756790161133 sec
Episode 743, loss:-0.5352, succeed, steps:167, total reward:13.8000, 1.4849910736083984 sec
Episode 744, loss:9.0403, succeed, steps:116, total reward:21.3000, 0.8873875141143799 sec
Episode 745, loss:-15.8692, fail, steps:199, total reward:6.7000, 1.966076135635376 sec
Episode 746, loss:-6.1327, fail, steps:212, total reward:27.4000, 1.7333874702453613 sec
Episode 747, loss:-22.0966, fail, steps:204, total reward:15.5000, 1.7728819847106934 sec
Episode 748, loss:-18.8007, fail, steps:190, total reward:-3.7000, 1.7964398860931396 sec
Episode 749, loss:3.7261, succeed, steps:150, total reward:10.9000, 1.3885629177093506 sec
Episode 750, loss:7.6613, succeed, steps:158, total reward:15.3000, 1.2449936866760254 sec
Episode 7

Episode 802, loss:-0.2910, succeed, steps:184, total reward:6.6000, 1.617086410522461 sec
Episode 803, loss:6.3647, succeed, steps:138, total reward:14.7000, 1.1614058017730713 sec
Episode 804, loss:-4.1259, fail, steps:214, total reward:27.3000, 1.8915519714355469 sec
Episode 805, loss:15.9038, succeed, steps:197, total reward:25.0000, 1.6337313652038574 sec
Episode 806, loss:5.7870, succeed, steps:180, total reward:35.2000, 1.3217432498931885 sec
Episode 807, loss:1.1513, succeed, steps:188, total reward:23.7000, 1.473095417022705 sec
Episode 808, loss:-0.6647, succeed, steps:157, total reward:17.2000, 1.1880621910095215 sec
Episode 809, loss:-18.5971, fail, steps:189, total reward:-6.0000, 1.824310064315796 sec
Episode 810, loss:4.0719, succeed, steps:206, total reward:20.0000, 1.7825450897216797 sec
Episode 811, loss:-2.7319, succeed, steps:186, total reward:8.7000, 1.686687707901001 sec
Episode 812, loss:-2.6468, succeed, steps:198, total reward:16.8000, 1.8075807094573975 sec
Epi

Episode 893, loss:4.0856, succeed, steps:145, total reward:17.1000, 1.1370840072631836 sec
Episode 894, loss:6.9025, succeed, steps:152, total reward:21.2000, 1.079718828201294 sec
Episode 895, loss:10.4159, succeed, steps:142, total reward:23.3000, 0.9683685302734375 sec
Episode 896, loss:-41.3124, fail, steps:190, total reward:-10.6000, 1.8395562171936035 sec
Episode 897, loss:4.0336, succeed, steps:199, total reward:17.6000, 1.7522051334381104 sec
Episode 898, loss:4.1461, succeed, steps:151, total reward:3.7000, 1.3558058738708496 sec
Episode 899, loss:4.0731, succeed, steps:166, total reward:13.2000, 1.30938720703125 sec
len16 Comparison 9 10 less Comparison 7 8 less Comparison 6 7 less Comparison 5 6 more Swap Comparison 4 5 less Comparison 3 4 more Swap Comparison 2 3 less Comparison 1 2 more Swap Comparison 0 1 less Comparison 10 11 less Comparison 11 12 more Swap Comparison 12 13 more Swap Comparison 13 14 more Swap Comparison 14 15 more Swap Comparison 10 11 less Comparison 8

Episode 957, loss:-9.0466, fail, steps:211, total reward:26.1000, 1.8058357238769531 sec
Episode 958, loss:6.4508, succeed, steps:168, total reward:28.5000, 1.4410464763641357 sec
Episode 959, loss:-12.6194, fail, steps:200, total reward:10.2000, 1.958918809890747 sec
Episode 960, loss:-8.4412, fail, steps:207, total reward:19.2000, 1.7541499137878418 sec
Episode 961, loss:5.7351, succeed, steps:118, total reward:3.3000, 1.0653889179229736 sec
Episode 962, loss:-10.6063, fail, steps:213, total reward:28.4000, 2.067797899246216 sec
Episode 963, loss:-1.7104, fail, steps:205, total reward:16.5000, 1.8301315307617188 sec
Episode 964, loss:10.7354, succeed, steps:158, total reward:26.9000, 1.3299856185913086 sec
Episode 965, loss:-14.8262, succeed, steps:191, total reward:-6.1000, 1.9429938793182373 sec
Episode 966, loss:4.1689, succeed, steps:184, total reward:17.1000, 1.6141722202301025 sec
Episode 967, loss:0.4065, succeed, steps:200, total reward:15.3000, 1.7679407596588135 sec
Episode

Episode 1010, loss:-18.3223, fail, steps:197, total reward:4.4000, 1.814558982849121 sec
Episode 1011, loss:1.6770, succeed, steps:190, total reward:6.7000, 1.6899945735931396 sec
Episode 1012, loss:3.1737, fail, steps:224, total reward:42.8000, 1.8552625179290771 sec
Episode 1013, loss:-9.8914, fail, steps:208, total reward:22.1000, 1.75007963180542 sec
Episode 1014, loss:6.7693, succeed, steps:181, total reward:12.8000, 1.6526474952697754 sec
Episode 1015, loss:3.6134, succeed, steps:145, total reward:21.0000, 1.1306142807006836 sec
Episode 1016, loss:3.9598, succeed, steps:187, total reward:11.0000, 1.5335445404052734 sec
Episode 1017, loss:-13.2092, fail, steps:199, total reward:8.6000, 1.774608850479126 sec
Episode 1018, loss:-1.0171, fail, steps:213, total reward:28.4000, 1.887220859527588 sec
Episode 1019, loss:6.1946, succeed, steps:139, total reward:14.7000, 1.1746339797973633 sec
Episode 1020, loss:-9.0692, succeed, steps:180, total reward:-2.7000, 1.5633163452148438 sec
Epis

Episode 1101, loss:1.9913, succeed, steps:207, total reward:21.9000, 1.7936654090881348 sec
Episode 1102, loss:8.2458, succeed, steps:206, total reward:28.7000, 1.7818210124969482 sec
Episode 1103, loss:7.6132, succeed, steps:188, total reward:23.1000, 1.6286194324493408 sec
Episode 1104, loss:-11.2908, succeed, steps:182, total reward:-7.0000, 1.7322325706481934 sec
Episode 1105, loss:-10.5436, fail, steps:207, total reward:19.8000, 1.880502700805664 sec
Episode 1106, loss:-14.0058, fail, steps:205, total reward:14.1000, 1.813037633895874 sec
Episode 1107, loss:-0.8216, succeed, steps:190, total reward:8.0000, 1.6975572109222412 sec
Episode 1108, loss:-12.2866, fail, steps:204, total reward:13.4000, 1.8706564903259277 sec
Episode 1109, loss:8.0202, succeed, steps:135, total reward:17.9000, 1.2289867401123047 sec
Episode 1110, loss:6.6653, succeed, steps:194, total reward:28.4000, 1.5567011833190918 sec
Episode 1111, loss:-6.2732, fail, steps:212, total reward:25.9000, 1.78521966934204

Episode 1191, loss:6.4288, succeed, steps:176, total reward:20.1000, 1.315105676651001 sec
Episode 1192, loss:7.0590, succeed, steps:179, total reward:21.9000, 1.3891792297363281 sec
Episode 1193, loss:-13.2262, fail, steps:197, total reward:3.5000, 1.807643175125122 sec
Episode 1194, loss:-7.7863, fail, steps:208, total reward:19.6000, 1.8661460876464844 sec
Episode 1195, loss:7.0972, succeed, steps:107, total reward:16.8000, 0.869053840637207 sec
Episode 1196, loss:3.4881, succeed, steps:164, total reward:11.4000, 1.2859349250793457 sec
Episode 1197, loss:-13.8722, fail, steps:203, total reward:10.9000, 1.8248329162597656 sec
Episode 1198, loss:4.7947, succeed, steps:164, total reward:20.4000, 1.400399923324585 sec
Episode 1199, loss:8.5580, succeed, steps:151, total reward:23.5000, 1.1339836120605469 sec
len16 Comparison 9 10 less Comparison 8 9 more Swap Comparison 7 8 more Swap Comparison 6 7 more Swap Comparison 5 6 more Swap Comparison 4 5 less Comparison 3 4 more Swap Compariso

Episode 1251, loss:-1.0345, fail, steps:214, total reward:29.8000, 1.8466429710388184 sec
Episode 1252, loss:11.4206, succeed, steps:153, total reward:22.5000, 1.2936670780181885 sec
Episode 1253, loss:2.8561, succeed, steps:181, total reward:18.9000, 1.4629757404327393 sec
Episode 1254, loss:-25.7356, fail, steps:193, total reward:-6.1000, 1.8437597751617432 sec
Episode 1255, loss:5.1985, succeed, steps:168, total reward:25.8000, 1.4917845726013184 sec
Episode 1256, loss:-10.0581, fail, steps:205, total reward:16.5000, 1.9045569896697998 sec
Episode 1257, loss:7.7602, succeed, steps:147, total reward:16.2000, 1.2737596035003662 sec
Episode 1258, loss:0.2888, succeed, steps:148, total reward:7.1000, 1.1927175521850586 sec
Episode 1259, loss:-12.4459, fail, steps:207, total reward:18.3000, 1.9338722229003906 sec
Episode 1260, loss:-19.5362, fail, steps:200, total reward:7.8000, 1.8443667888641357 sec
Episode 1261, loss:11.6897, succeed, steps:173, total reward:13.9000, 1.563925504684448

Episode 1315, loss:8.1344, succeed, steps:191, total reward:33.6000, 1.4317655563354492 sec
Episode 1316, loss:-14.7585, fail, steps:200, total reward:8.7000, 1.8711764812469482 sec
Episode 1317, loss:13.0500, succeed, steps:179, total reward:29.0000, 1.4873309135437012 sec
Episode 1318, loss:-2.4951, succeed, steps:178, total reward:11.1000, 1.4915940761566162 sec
Episode 1319, loss:12.2407, succeed, steps:141, total reward:26.2000, 0.9990043640136719 sec
Episode 1320, loss:6.0706, succeed, steps:207, total reward:24.9000, 1.8067762851715088 sec
Episode 1321, loss:-6.9282, fail, steps:205, total reward:16.5000, 1.7976276874542236 sec
Episode 1322, loss:-11.9166, fail, steps:202, total reward:10.7000, 1.791480302810669 sec
Episode 1323, loss:-6.4259, fail, steps:203, total reward:9.7000, 1.78092360496521 sec
Episode 1324, loss:-13.0253, fail, steps:197, total reward:2.0000, 1.80879545211792 sec
Episode 1325, loss:1.7298, fail, steps:211, total reward:26.1000, 1.9115543365478516 sec
Epi

Episode 1400, loss:2.4883, succeed, steps:168, total reward:24.7000, 1.2010114192962646 sec
Episode 1401, loss:-12.2225, fail, steps:198, total reward:6.9000, 1.8530147075653076 sec
Episode 1402, loss:3.7948, succeed, steps:118, total reward:23.9000, 0.9633471965789795 sec
Episode 1403, loss:-13.3612, fail, steps:190, total reward:-4.9000, 2.1605660915374756 sec
Episode 1404, loss:-24.7639, fail, steps:197, total reward:5.9000, 1.7745702266693115 sec
Episode 1405, loss:-0.1302, succeed, steps:181, total reward:2.2000, 1.6314053535461426 sec
Episode 1406, loss:6.2353, succeed, steps:191, total reward:27.2000, 1.5924088954925537 sec
Episode 1407, loss:7.5624, succeed, steps:135, total reward:19.5000, 1.0811798572540283 sec
Episode 1408, loss:-6.9490, succeed, steps:187, total reward:-6.0000, 1.7399213314056396 sec
Episode 1409, loss:4.9894, succeed, steps:188, total reward:10.7000, 1.650087833404541 sec
Episode 1410, loss:-6.7142, succeed, steps:191, total reward:9.4000, 1.70408296585083

Episode 1490, loss:4.3639, succeed, steps:190, total reward:25.2000, 1.4889583587646484 sec
Episode 1491, loss:4.1743, succeed, steps:189, total reward:16.0000, 1.6095833778381348 sec
Episode 1492, loss:4.3780, succeed, steps:141, total reward:17.4000, 1.1095309257507324 sec
Episode 1493, loss:6.7907, succeed, steps:186, total reward:39.0000, 1.340644359588623 sec
Episode 1494, loss:2.7629, succeed, steps:139, total reward:16.7000, 1.061121940612793 sec
Episode 1495, loss:-5.8498, fail, steps:201, total reward:10.3000, 1.8913698196411133 sec
Episode 1496, loss:-10.0321, fail, steps:203, total reward:13.6000, 1.7851698398590088 sec
Episode 1497, loss:-10.7556, fail, steps:196, total reward:-0.9000, 1.8016161918640137 sec
Episode 1498, loss:-3.0273, succeed, steps:173, total reward:10.0000, 1.595555067062378 sec
Episode 1499, loss:4.2833, succeed, steps:162, total reward:5.2000, 1.4118382930755615 sec
len16 Comparison 9 10 more Swap Comparison 8 9 more Swap Comparison 7 8 more Swap Compa

Episode 1561, loss:2.1804, succeed, steps:195, total reward:19.3000, 1.6278960704803467 sec
Episode 1562, loss:6.5856, succeed, steps:194, total reward:17.7000, 1.6674180030822754 sec
Episode 1563, loss:-3.1855, fail, steps:211, total reward:26.4000, 1.9213964939117432 sec
Episode 1564, loss:-14.7794, fail, steps:192, total reward:-5.9000, 1.7876336574554443 sec
Episode 1565, loss:0.5430, fail, steps:219, total reward:37.0000, 1.840625286102295 sec
Episode 1566, loss:-14.2684, fail, steps:204, total reward:9.8000, 1.7825322151184082 sec
Episode 1567, loss:4.0372, succeed, steps:133, total reward:15.5000, 1.226766586303711 sec
Episode 1568, loss:8.8633, succeed, steps:180, total reward:31.0000, 1.4108030796051025 sec
Episode 1569, loss:11.0534, succeed, steps:165, total reward:20.9000, 1.2092442512512207 sec
Episode 1570, loss:-5.6655, fail, steps:211, total reward:24.5000, 1.867506980895996 sec
Episode 1571, loss:-4.2914, succeed, steps:165, total reward:5.9000, 1.4925577640533447 sec


Episode 1617, loss:9.4016, succeed, steps:170, total reward:31.7000, 1.386779546737671 sec
Episode 1618, loss:-20.1704, fail, steps:193, total reward:-1.9000, 1.979889154434204 sec
Episode 1619, loss:14.6105, succeed, steps:195, total reward:26.8000, 1.7233526706695557 sec
Episode 1620, loss:-12.7256, fail, steps:202, total reward:12.2000, 1.8909049034118652 sec
Episode 1621, loss:4.4197, succeed, steps:159, total reward:22.0000, 1.3597323894500732 sec
Episode 1622, loss:2.1066, succeed, steps:171, total reward:17.8000, 1.367163896560669 sec
Episode 1623, loss:3.6953, succeed, steps:140, total reward:16.2000, 1.032527208328247 sec
Episode 1624, loss:4.8701, succeed, steps:139, total reward:13.2000, 0.9959678649902344 sec
Episode 1625, loss:6.7121, succeed, steps:142, total reward:14.9000, 1.0461947917938232 sec
Episode 1626, loss:11.1587, succeed, steps:190, total reward:19.2000, 1.4700770378112793 sec
Episode 1627, loss:5.4823, succeed, steps:160, total reward:25.6000, 1.1674914360046

Episode 1707, loss:8.4623, succeed, steps:165, total reward:25.4000, 1.3899815082550049 sec
Episode 1708, loss:5.5825, succeed, steps:165, total reward:26.1000, 1.3275048732757568 sec
Episode 1709, loss:-9.1531, fail, steps:205, total reward:15.0000, 1.8916559219360352 sec
Episode 1710, loss:10.5223, succeed, steps:198, total reward:32.9000, 1.6049914360046387 sec
Episode 1711, loss:0.3983, fail, steps:222, total reward:41.3000, 1.881906509399414 sec
Episode 1712, loss:-2.0233, fail, steps:208, total reward:20.9000, 1.7515277862548828 sec
Episode 1713, loss:-7.2873, succeed, steps:191, total reward:5.0000, 1.7932493686676025 sec
Episode 1714, loss:-8.7888, fail, steps:211, total reward:24.9000, 1.852792739868164 sec
Episode 1715, loss:-1.2401, succeed, steps:187, total reward:11.1000, 1.680875539779663 sec
Episode 1716, loss:0.7860, succeed, steps:136, total reward:9.4000, 1.120621919631958 sec
Episode 1717, loss:5.9082, succeed, steps:173, total reward:18.8000, 1.3510417938232422 sec


Episode 1797, loss:-8.8545, fail, steps:206, total reward:18.8000, 2.0535032749176025 sec
Episode 1798, loss:5.0586, succeed, steps:174, total reward:21.4000, 1.5969314575195312 sec
Episode 1799, loss:5.7336, succeed, steps:139, total reward:18.1000, 1.074951410293579 sec
len16 Comparison 9 10 more Swap Comparison 8 9 less Comparison 7 8 less Comparison 6 7 more Swap Comparison 5 6 more Swap Comparison 4 5 more Swap Comparison 14 15 more Swap Comparison 3 4 more Swap Comparison 2 3 more Swap Comparison 12 13 more Swap Comparison 10 11 more Swap Comparison 1 2 more Swap Comparison 0 1 more Swap Comparison 9 10 more Swap Comparison 3 4 more Swap Comparison 2 3 less Comparison 8 9 more Swap Comparison 7 8 more Swap Comparison 6 7 more Swap Comparison 10 11 less Comparison 6 7 less Comparison 5 6 less Comparison 11 12 more Swap Comparison 12 13 more Swap Comparison 13 14 more Swap Comparison 4 5 more Swap Comparison 3 4 more Swap Comparison 2 3 more Swap Comparison 10 11 more Swap Comparis

Episode 1863, loss:7.9467, succeed, steps:179, total reward:19.1000, 1.531571865081787 sec
Episode 1864, loss:1.8864, succeed, steps:200, total reward:14.9000, 1.9075040817260742 sec
Episode 1865, loss:-10.2116, fail, steps:207, total reward:17.7000, 1.7968525886535645 sec
Episode 1866, loss:2.3767, succeed, steps:156, total reward:28.1000, 1.257178783416748 sec
Episode 1867, loss:5.4074, succeed, steps:152, total reward:27.4000, 1.1406996250152588 sec
Episode 1868, loss:-2.3559, fail, steps:210, total reward:24.1000, 2.013796091079712 sec
Episode 1869, loss:8.3776, succeed, steps:161, total reward:24.0000, 1.3412935733795166 sec
Episode 1870, loss:-2.5745, succeed, steps:158, total reward:-3.4000, 1.3798186779022217 sec
Episode 1871, loss:-12.0874, fail, steps:204, total reward:11.3000, 1.8777687549591064 sec
Episode 1872, loss:8.7079, succeed, steps:178, total reward:24.9000, 1.4621386528015137 sec
Episode 1873, loss:4.7782, succeed, steps:167, total reward:25.2000, 1.370199203491211

Episode 1954, loss:11.0809, succeed, steps:196, total reward:17.2000, 1.7954084873199463 sec
Episode 1955, loss:-4.4740, fail, steps:211, total reward:26.4000, 1.8849523067474365 sec
Episode 1956, loss:-10.7344, fail, steps:205, total reward:16.9000, 1.875060796737671 sec
Episode 1957, loss:-0.8718, fail, steps:217, total reward:33.1000, 1.8597688674926758 sec
Episode 1958, loss:5.1759, succeed, steps:203, total reward:23.6000, 1.7440130710601807 sec
Episode 1959, loss:4.8182, succeed, steps:163, total reward:29.7000, 1.2548933029174805 sec
Episode 1960, loss:-6.0658, fail, steps:196, total reward:0.9000, 1.9292964935302734 sec
Episode 1961, loss:1.6523, succeed, steps:188, total reward:27.6000, 1.6325440406799316 sec
Episode 1962, loss:2.1528, succeed, steps:167, total reward:20.7000, 1.3036508560180664 sec
Episode 1963, loss:2.4964, succeed, steps:197, total reward:17.5000, 1.6801419258117676 sec
Episode 1964, loss:5.3131, succeed, steps:131, total reward:21.3000, 1.0340445041656494 

Episode 2042, loss:4.1284, succeed, steps:201, total reward:13.3000, 1.812211513519287 sec
Episode 2043, loss:0.1199, fail, steps:213, total reward:28.4000, 1.838801622390747 sec
Episode 2044, loss:-0.1786, succeed, steps:197, total reward:23.1000, 1.692251205444336 sec
Episode 2045, loss:6.0770, succeed, steps:177, total reward:24.4000, 1.5216078758239746 sec
Episode 2046, loss:-8.9099, fail, steps:210, total reward:24.1000, 1.936614990234375 sec
Episode 2047, loss:-4.2920, fail, steps:212, total reward:25.9000, 1.8485925197601318 sec
Episode 2048, loss:-9.4756, fail, steps:209, total reward:22.5000, 1.870330572128296 sec
Episode 2049, loss:7.5036, succeed, steps:157, total reward:20.0000, 1.3901731967926025 sec
Episode 2050, loss:7.7518, succeed, steps:103, total reward:14.5000, 0.809180498123169 sec
Episode 2051, loss:12.0624, succeed, steps:196, total reward:35.4000, 1.4056951999664307 sec
Episode 2052, loss:-6.8602, fail, steps:214, total reward:29.4000, 1.7636606693267822 sec
Epi

Episode 2102, loss:2.4058, succeed, steps:195, total reward:30.4000, 1.477351427078247 sec
Episode 2103, loss:4.8217, succeed, steps:161, total reward:24.7000, 1.1818225383758545 sec
Episode 2104, loss:-8.5386, fail, steps:206, total reward:18.8000, 1.8666784763336182 sec
Episode 2105, loss:0.1656, succeed, steps:164, total reward:21.8000, 1.469907283782959 sec
Episode 2106, loss:0.4669, succeed, steps:186, total reward:19.7000, 1.509969711303711 sec
Episode 2107, loss:8.6473, succeed, steps:201, total reward:26.3000, 1.7111492156982422 sec
Episode 2108, loss:3.6222, succeed, steps:125, total reward:17.9000, 0.9840652942657471 sec
Episode 2109, loss:10.6686, succeed, steps:166, total reward:29.7000, 1.2038803100585938 sec
Episode 2110, loss:5.6975, succeed, steps:173, total reward:23.0000, 1.3264591693878174 sec
Episode 2111, loss:-9.5568, fail, steps:198, total reward:6.9000, 1.8635504245758057 sec
Episode 2112, loss:-9.6324, fail, steps:194, total reward:1.6000, 1.8541195392608643 se

Episode 2192, loss:4.3652, succeed, steps:173, total reward:7.6000, 1.3876440525054932 sec
Episode 2193, loss:1.9844, succeed, steps:203, total reward:22.9000, 1.7463006973266602 sec
Episode 2194, loss:5.3123, succeed, steps:152, total reward:22.2000, 1.29595947265625 sec
Episode 2195, loss:-5.7581, fail, steps:216, total reward:31.5000, 2.107426404953003 sec
Episode 2196, loss:11.5799, succeed, steps:174, total reward:6.0000, 1.5831241607666016 sec
Episode 2197, loss:1.7619, succeed, steps:171, total reward:22.3000, 1.3477823734283447 sec
Episode 2198, loss:-2.0520, fail, steps:210, total reward:24.1000, 1.9006922245025635 sec
Episode 2199, loss:7.0565, succeed, steps:199, total reward:32.5000, 1.6395676136016846 sec
len16 Comparison 9 10 more Swap Comparison 8 9 less Comparison 7 8 more Swap Comparison 6 7 less Comparison 5 6 less Comparison 4 5 more Swap Comparison 3 4 more Swap Comparison 2 3 more Swap Comparison 1 2 more Swap Comparison 0 1 more Swap Comparison 10 11 more Swap Com

Episode 2258, loss:5.2596, succeed, steps:175, total reward:5.9000, 1.3705775737762451 sec
Episode 2259, loss:5.1461, succeed, steps:192, total reward:16.1000, 1.6594343185424805 sec
Episode 2260, loss:11.5540, succeed, steps:204, total reward:35.8000, 1.764174222946167 sec
Episode 2261, loss:2.9431, succeed, steps:168, total reward:20.5000, 1.4394135475158691 sec
Episode 2262, loss:6.3126, succeed, steps:205, total reward:23.7000, 1.7808291912078857 sec
Episode 2263, loss:0.3233, succeed, steps:189, total reward:10.9000, 1.6604456901550293 sec
Episode 2264, loss:3.8383, succeed, steps:167, total reward:19.1000, 1.4528813362121582 sec
Episode 2265, loss:-1.3289, succeed, steps:211, total reward:27.0000, 2.0224459171295166 sec
Episode 2266, loss:-7.4285, fail, steps:202, total reward:10.2000, 1.8379113674163818 sec
Episode 2267, loss:6.0288, succeed, steps:155, total reward:16.2000, 1.3781323432922363 sec
Episode 2268, loss:-0.2135, succeed, steps:173, total reward:4.8000, 1.50523591041

Episode 2324, loss:1.2828, succeed, steps:154, total reward:16.1000, 1.3579392433166504 sec
Episode 2325, loss:3.3061, succeed, steps:200, total reward:22.6000, 1.6719303131103516 sec
Episode 2326, loss:2.0152, succeed, steps:176, total reward:19.5000, 1.458104133605957 sec
Episode 2327, loss:-6.4503, fail, steps:205, total reward:13.6000, 2.0191752910614014 sec
Episode 2328, loss:2.7756, fail, steps:208, total reward:20.8000, 1.8323986530303955 sec
Episode 2329, loss:7.6511, succeed, steps:183, total reward:32.8000, 1.5038981437683105 sec
Episode 2330, loss:2.3682, fail, steps:216, total reward:32.7000, 1.9406638145446777 sec
Episode 2331, loss:5.6601, succeed, steps:123, total reward:19.2000, 1.0924038887023926 sec
Episode 2332, loss:-13.2959, fail, steps:201, total reward:7.3000, 2.226555109024048 sec
Episode 2333, loss:2.9634, succeed, steps:204, total reward:26.7000, 1.7250254154205322 sec
Episode 2334, loss:4.3708, succeed, steps:174, total reward:2.8000, 1.5790131092071533 sec
E

Episode 2400, loss:6.3649, succeed, steps:202, total reward:22.1000, 1.7963268756866455 sec
Episode 2401, loss:1.3165, succeed, steps:157, total reward:16.1000, 1.4698607921600342 sec
Episode 2402, loss:4.3696, succeed, steps:160, total reward:10.1000, 1.3378229141235352 sec
Episode 2403, loss:-2.3368, fail, steps:210, total reward:22.0000, 1.9049642086029053 sec
Episode 2404, loss:3.9949, succeed, steps:127, total reward:9.5000, 1.1628155708312988 sec
Episode 2405, loss:-12.8855, fail, steps:203, total reward:9.4000, 2.119633674621582 sec
Episode 2406, loss:4.6897, succeed, steps:196, total reward:18.7000, 1.8135409355163574 sec
Episode 2407, loss:3.5148, succeed, steps:160, total reward:3.1000, 1.4399299621582031 sec
Episode 2408, loss:6.1573, succeed, steps:153, total reward:30.3000, 1.158672571182251 sec
Episode 2409, loss:5.0862, succeed, steps:171, total reward:28.8000, 1.2109014987945557 sec
Episode 2410, loss:6.9350, succeed, steps:170, total reward:30.5000, 1.1635875701904297 

Episode 2490, loss:3.8291, succeed, steps:176, total reward:21.6000, 1.5494139194488525 sec
Episode 2491, loss:4.9340, succeed, steps:123, total reward:13.3000, 1.0047070980072021 sec
Episode 2492, loss:8.7813, succeed, steps:152, total reward:24.8000, 1.0836825370788574 sec
Episode 2493, loss:2.7612, succeed, steps:196, total reward:17.4000, 1.825221300125122 sec
Episode 2494, loss:7.7523, succeed, steps:182, total reward:28.1000, 1.524810791015625 sec
Episode 2495, loss:-4.7933, fail, steps:206, total reward:18.8000, 1.9852557182312012 sec
Episode 2496, loss:5.5427, succeed, steps:180, total reward:13.1000, 1.6200659275054932 sec
Episode 2497, loss:-3.1304, succeed, steps:204, total reward:12.1000, 1.9185781478881836 sec
Episode 2498, loss:5.7602, succeed, steps:162, total reward:27.6000, 1.354578971862793 sec
Episode 2499, loss:2.8930, succeed, steps:141, total reward:10.5000, 1.1567802429199219 sec
Episode 2500, loss:-5.0837, fail, steps:208, total reward:19.4000, 2.027443885803222

Episode 2580, loss:2.7767, succeed, steps:169, total reward:19.0000, 1.4759430885314941 sec
Episode 2581, loss:-4.4318, fail, steps:205, total reward:16.5000, 2.0562963485717773 sec
Episode 2582, loss:5.2115, succeed, steps:166, total reward:24.5000, 1.4429738521575928 sec
Episode 2583, loss:3.5438, succeed, steps:171, total reward:30.5000, 1.2939486503601074 sec
Episode 2584, loss:2.8859, succeed, steps:201, total reward:21.9000, 1.6945123672485352 sec
Episode 2585, loss:0.7712, succeed, steps:187, total reward:15.3000, 1.6260817050933838 sec
Episode 2586, loss:-2.3004, succeed, steps:155, total reward:11.0000, 1.3286824226379395 sec
Episode 2587, loss:7.7803, succeed, steps:220, total reward:40.1000, 1.9155840873718262 sec
Episode 2588, loss:3.6217, succeed, steps:148, total reward:25.3000, 1.2588467597961426 sec
Episode 2589, loss:-1.6322, fail, steps:211, total reward:26.4000, 2.0890309810638428 sec
Episode 2590, loss:-0.6963, fail, steps:214, total reward:29.4000, 1.85060834884643

Episode 2670, loss:1.5570, succeed, steps:126, total reward:12.4000, 1.0358483791351318 sec
Episode 2671, loss:5.8311, succeed, steps:174, total reward:34.1000, 1.1996853351593018 sec
Episode 2672, loss:2.2355, succeed, steps:123, total reward:16.6000, 0.9262053966522217 sec
Episode 2673, loss:4.3517, succeed, steps:167, total reward:13.1000, 1.2126643657684326 sec
Episode 2674, loss:8.8037, succeed, steps:176, total reward:38.4000, 1.1649224758148193 sec
Episode 2675, loss:0.9155, succeed, steps:154, total reward:12.9000, 1.1192984580993652 sec
Episode 2676, loss:0.6375, succeed, steps:185, total reward:24.0000, 1.3243522644042969 sec
Episode 2677, loss:2.1608, succeed, steps:162, total reward:11.2000, 1.237654209136963 sec
Episode 2678, loss:0.8974, succeed, steps:143, total reward:27.5000, 0.9801485538482666 sec
Episode 2679, loss:-6.2074, fail, steps:212, total reward:25.6000, 1.915313959121704 sec
Episode 2680, loss:7.1791, succeed, steps:173, total reward:26.4000, 1.4448277950286

Episode 2732, loss:3.5965, succeed, steps:170, total reward:20.4000, 1.375087022781372 sec
Episode 2733, loss:6.0885, succeed, steps:152, total reward:33.2000, 1.0924882888793945 sec
Episode 2734, loss:3.5639, succeed, steps:151, total reward:14.0000, 1.0963807106018066 sec
Episode 2735, loss:10.3615, succeed, steps:206, total reward:26.1000, 1.890641689300537 sec
Episode 2736, loss:6.9292, succeed, steps:202, total reward:14.7000, 1.7522492408752441 sec
Episode 2737, loss:-8.1194, fail, steps:201, total reward:10.0000, 1.827157735824585 sec
Episode 2738, loss:2.7626, succeed, steps:208, total reward:24.3000, 1.8003895282745361 sec
Episode 2739, loss:-2.0872, fail, steps:217, total reward:32.8000, 1.8487827777862549 sec
Episode 2740, loss:3.3994, succeed, steps:152, total reward:22.8000, 1.379089117050171 sec
Episode 2741, loss:7.6095, succeed, steps:175, total reward:15.6000, 1.4728538990020752 sec
Episode 2742, loss:-11.8790, fail, steps:205, total reward:16.6000, 1.8777284622192383 

Episode 2822, loss:3.2943, succeed, steps:175, total reward:27.8000, 1.3248486518859863 sec
Episode 2823, loss:-8.3004, fail, steps:195, total reward:-2.2000, 1.8739545345306396 sec
Episode 2824, loss:10.1921, succeed, steps:185, total reward:37.7000, 1.5923173427581787 sec
Episode 2825, loss:-20.9371, fail, steps:190, total reward:-6.7000, 1.9541935920715332 sec
Episode 2826, loss:-7.2309, fail, steps:205, total reward:16.6000, 1.820343017578125 sec
Episode 2827, loss:-2.2172, fail, steps:216, total reward:30.3000, 1.8922016620635986 sec
Episode 2828, loss:-0.4522, fail, steps:215, total reward:30.5000, 1.888305425643921 sec
Episode 2829, loss:-1.9055, fail, steps:223, total reward:43.0000, 1.8391761779785156 sec
Episode 2830, loss:5.0272, succeed, steps:190, total reward:21.8000, 1.6502223014831543 sec
Episode 2831, loss:-9.6657, fail, steps:202, total reward:11.0000, 1.9416179656982422 sec
Episode 2832, loss:4.6800, succeed, steps:150, total reward:23.5000, 1.2647686004638672 sec
Ep

Episode 2913, loss:7.8292, succeed, steps:155, total reward:17.4000, 1.1565189361572266 sec
Episode 2914, loss:4.7245, succeed, steps:157, total reward:13.0000, 1.208932876586914 sec
Episode 2915, loss:-10.8824, fail, steps:198, total reward:6.9000, 1.9234752655029297 sec
Episode 2916, loss:-4.5835, fail, steps:209, total reward:22.2000, 1.78564453125 sec
Episode 2917, loss:8.8446, succeed, steps:191, total reward:18.7000, 1.6313059329986572 sec
Episode 2918, loss:2.3368, succeed, steps:128, total reward:2.8000, 1.1146464347839355 sec
Episode 2919, loss:0.6540, succeed, steps:125, total reward:4.0000, 0.9605293273925781 sec
Episode 2920, loss:-7.5617, fail, steps:205, total reward:16.5000, 1.9704780578613281 sec
Episode 2921, loss:5.9636, succeed, steps:139, total reward:15.0000, 1.218172550201416 sec
Episode 2922, loss:-8.1776, fail, steps:199, total reward:7.9000, 2.062429666519165 sec
Episode 2923, loss:3.7086, succeed, steps:143, total reward:12.0000, 1.2612049579620361 sec
Episode

Episode 3001, loss:2.5900, succeed, steps:194, total reward:23.6000, 1.7251830101013184 sec
Episode 3002, loss:8.5621, succeed, steps:170, total reward:28.2000, 1.3253934383392334 sec
Episode 3003, loss:2.8245, succeed, steps:130, total reward:14.8000, 0.9719433784484863 sec
Episode 3004, loss:-10.1599, fail, steps:204, total reward:12.2000, 2.0331199169158936 sec
Episode 3005, loss:1.1800, succeed, steps:161, total reward:16.7000, 1.4123406410217285 sec
Episode 3006, loss:1.1970, succeed, steps:186, total reward:19.6000, 1.576430082321167 sec
Episode 3007, loss:2.8746, succeed, steps:163, total reward:22.2000, 1.191579818725586 sec
Episode 3008, loss:5.4528, succeed, steps:172, total reward:26.6000, 1.2478456497192383 sec
Episode 3009, loss:0.9988, succeed, steps:199, total reward:20.9000, 1.6747543811798096 sec
Episode 3010, loss:-0.5822, succeed, steps:148, total reward:10.5000, 1.2021849155426025 sec
Episode 3011, loss:-5.6212, fail, steps:205, total reward:17.8000, 1.9448628425598

Episode 3091, loss:1.3678, fail, steps:213, total reward:28.4000, 1.9479289054870605 sec
Episode 3092, loss:4.5268, succeed, steps:180, total reward:32.6000, 1.519361972808838 sec
Episode 3093, loss:10.5557, succeed, steps:162, total reward:28.4000, 1.2292592525482178 sec
Episode 3094, loss:2.1189, fail, steps:207, total reward:19.8000, 1.914506196975708 sec
Episode 3095, loss:-4.5631, fail, steps:204, total reward:14.3000, 1.794609546661377 sec
Episode 3096, loss:4.5274, succeed, steps:203, total reward:36.8000, 1.7926220893859863 sec
Episode 3097, loss:11.1653, succeed, steps:182, total reward:29.4000, 1.4394946098327637 sec
Episode 3098, loss:-0.0009, succeed, steps:168, total reward:-1.3000, 1.3938915729522705 sec
Episode 3099, loss:1.4950, succeed, steps:145, total reward:12.6000, 1.08493971824646 sec
len16 Comparison 10 13 less Comparison 8 14 more Swap Comparison 6 14 less Comparison 14 15 more Swap Comparison 2 5 more Swap Comparison 3 4 more Swap Comparison 1 2 more Swap Compa

Episode 3145, loss:-8.2638, fail, steps:211, total reward:23.7000, 1.8529667854309082 sec
Episode 3146, loss:6.0092, succeed, steps:168, total reward:20.6000, 1.4647040367126465 sec
Episode 3147, loss:2.0854, succeed, steps:189, total reward:35.5000, 1.5266120433807373 sec
Episode 3148, loss:-1.4202, fail, steps:217, total reward:33.8000, 1.8842236995697021 sec
Episode 3149, loss:11.1307, succeed, steps:144, total reward:19.8000, 1.2233655452728271 sec
Episode 3150, loss:6.3866, succeed, steps:207, total reward:22.4000, 2.0496275424957275 sec
Episode 3151, loss:4.5996, succeed, steps:131, total reward:20.0000, 1.1486897468566895 sec
Episode 3152, loss:-6.3481, fail, steps:216, total reward:32.7000, 2.215414524078369 sec
Episode 3153, loss:-0.5605, succeed, steps:118, total reward:10.6000, 1.0887384414672852 sec
Episode 3154, loss:-13.0078, fail, steps:199, total reward:8.9000, 2.1781857013702393 sec
Episode 3155, loss:6.8699, succeed, steps:181, total reward:11.5000, 1.56007981300354 s

Episode 3236, loss:3.8163, succeed, steps:212, total reward:33.4000, 1.9980995655059814 sec
Episode 3237, loss:5.4457, succeed, steps:192, total reward:27.8000, 1.6102890968322754 sec
Episode 3238, loss:10.3625, succeed, steps:163, total reward:30.3000, 1.27945876121521 sec
Episode 3239, loss:-8.2589, fail, steps:211, total reward:24.2000, 1.920192003250122 sec
Episode 3240, loss:-3.2148, succeed, steps:166, total reward:8.6000, 1.5187735557556152 sec
Episode 3241, loss:-0.6997, succeed, steps:166, total reward:1.0000, 1.4546623229980469 sec
Episode 3242, loss:7.1108, succeed, steps:197, total reward:31.1000, 1.5979039669036865 sec
Episode 3243, loss:7.0038, succeed, steps:147, total reward:17.8000, 1.1787958145141602 sec
Episode 3244, loss:6.9659, succeed, steps:159, total reward:28.6000, 1.0962576866149902 sec
Episode 3245, loss:4.4050, succeed, steps:179, total reward:9.1000, 1.3730380535125732 sec
Episode 3246, loss:4.7039, succeed, steps:169, total reward:24.6000, 1.20588374137878

Episode 3300, loss:2.3505, succeed, steps:179, total reward:11.4000, 1.612424373626709 sec
Episode 3301, loss:-7.6727, fail, steps:208, total reward:19.7000, 2.032684326171875 sec
Episode 3302, loss:-1.9653, succeed, steps:188, total reward:9.2000, 1.6979115009307861 sec
Episode 3303, loss:-3.6154, fail, steps:209, total reward:22.2000, 1.9107396602630615 sec
Episode 3304, loss:8.8115, succeed, steps:178, total reward:29.3000, 1.5563735961914062 sec
Episode 3305, loss:1.5156, succeed, steps:184, total reward:26.0000, 1.4661965370178223 sec
Episode 3306, loss:2.8399, succeed, steps:192, total reward:29.2000, 1.4897468090057373 sec
Episode 3307, loss:3.2190, succeed, steps:183, total reward:21.5000, 1.3521649837493896 sec
Episode 3308, loss:3.2270, succeed, steps:143, total reward:10.4000, 1.0814085006713867 sec
Episode 3309, loss:1.2744, succeed, steps:134, total reward:15.2000, 0.9590027332305908 sec
Episode 3310, loss:-3.3220, succeed, steps:174, total reward:5.0000, 1.308489561080932

Episode 3390, loss:-5.2144, fail, steps:204, total reward:13.4000, 1.8519549369812012 sec
Episode 3391, loss:4.6527, succeed, steps:154, total reward:23.3000, 1.366621971130371 sec
Episode 3392, loss:14.5670, succeed, steps:213, total reward:41.0000, 1.7876780033111572 sec
Episode 3393, loss:4.7636, succeed, steps:194, total reward:31.1000, 1.6089813709259033 sec
Episode 3394, loss:10.6954, succeed, steps:195, total reward:30.7000, 1.5471773147583008 sec
Episode 3395, loss:6.3734, succeed, steps:140, total reward:20.3000, 1.0912811756134033 sec
Episode 3396, loss:-11.0953, fail, steps:207, total reward:19.8000, 2.0123238563537598 sec
Episode 3397, loss:3.4001, succeed, steps:179, total reward:34.2000, 1.4892895221710205 sec
Episode 3398, loss:-6.7183, fail, steps:208, total reward:20.3000, 2.0313501358032227 sec
Episode 3399, loss:3.6581, succeed, steps:127, total reward:23.5000, 1.0550222396850586 sec
len16 Comparison 9 10 less Comparison 8 9 more Swap Comparison 7 8 less Comparison 6

Episode 3449, loss:-1.8494, succeed, steps:204, total reward:13.5000, 1.8972079753875732 sec
Episode 3450, loss:1.3933, succeed, steps:115, total reward:25.8000, 1.0383038520812988 sec
Episode 3451, loss:-1.8607, fail, steps:216, total reward:32.7000, 2.2656545639038086 sec
Episode 3452, loss:5.3394, succeed, steps:165, total reward:28.0000, 1.3914918899536133 sec
Episode 3453, loss:5.6739, succeed, steps:202, total reward:19.3000, 1.9890825748443604 sec
Episode 3454, loss:1.3169, fail, steps:220, total reward:38.0000, 1.9425599575042725 sec
Episode 3455, loss:-5.5277, fail, steps:205, total reward:16.5000, 1.8478147983551025 sec
Episode 3456, loss:0.7970, succeed, steps:188, total reward:15.6000, 1.7566919326782227 sec
Episode 3457, loss:2.3543, succeed, steps:127, total reward:12.5000, 1.0298452377319336 sec
Episode 3458, loss:9.9208, succeed, steps:183, total reward:37.9000, 1.3206448554992676 sec
Episode 3459, loss:5.2441, succeed, steps:164, total reward:23.1000, 1.265504598617553

Episode 3515, loss:-1.0628, succeed, steps:161, total reward:8.8000, 1.531144380569458 sec
Episode 3516, loss:-6.9900, fail, steps:197, total reward:5.9000, 2.015544891357422 sec
Episode 3517, loss:5.9947, succeed, steps:156, total reward:19.2000, 1.4062716960906982 sec
Episode 3518, loss:-9.2751, fail, steps:199, total reward:8.0000, 2.081319808959961 sec
Episode 3519, loss:-4.4964, fail, steps:206, total reward:16.1000, 1.8199036121368408 sec
Episode 3520, loss:5.1861, succeed, steps:192, total reward:31.1000, 1.6810314655303955 sec
Episode 3521, loss:0.0815, succeed, steps:184, total reward:12.6000, 1.52628493309021 sec
Episode 3522, loss:1.9833, succeed, steps:209, total reward:24.3000, 1.815152883529663 sec
Episode 3523, loss:-9.4030, fail, steps:197, total reward:4.1000, 1.872642993927002 sec
Episode 3524, loss:-7.3321, succeed, steps:197, total reward:7.8000, 1.8445448875427246 sec
Episode 3525, loss:-9.0015, fail, steps:211, total reward:23.4000, 1.9013214111328125 sec
Episode 

Episode 3600, loss:2.8018, succeed, steps:191, total reward:33.6000, 1.3004508018493652 sec
Episode 3601, loss:2.3998, succeed, steps:153, total reward:13.2000, 1.1136395931243896 sec
Episode 3602, loss:6.3620, succeed, steps:189, total reward:32.5000, 1.3059709072113037 sec
Episode 3603, loss:-9.5191, fail, steps:202, total reward:7.4000, 1.890381097793579 sec
Episode 3604, loss:11.1993, succeed, steps:161, total reward:35.4000, 1.359626054763794 sec
Episode 3605, loss:3.1397, succeed, steps:159, total reward:23.3000, 1.2315900325775146 sec
Episode 3606, loss:-0.5086, fail, steps:212, total reward:25.6000, 2.0054969787597656 sec
Episode 3607, loss:2.3802, succeed, steps:157, total reward:17.4000, 1.412343978881836 sec
Episode 3608, loss:4.7455, succeed, steps:168, total reward:35.0000, 1.231227159500122 sec
Episode 3609, loss:-0.6236, fail, steps:208, total reward:20.8000, 2.017259120941162 sec
Episode 3610, loss:4.2377, succeed, steps:147, total reward:21.7000, 1.2395150661468506 sec

Episode 3690, loss:3.8306, succeed, steps:148, total reward:12.5000, 1.3906738758087158 sec
Episode 3691, loss:-3.6074, fail, steps:209, total reward:23.1000, 2.075899600982666 sec
Episode 3692, loss:2.0480, succeed, steps:128, total reward:3.2000, 1.2057912349700928 sec
Episode 3693, loss:-7.0749, fail, steps:200, total reward:8.4000, 2.116870641708374 sec
Episode 3694, loss:0.8774, succeed, steps:127, total reward:15.2000, 1.1334426403045654 sec
Episode 3695, loss:0.6002, succeed, steps:178, total reward:7.3000, 1.6529254913330078 sec
Episode 3696, loss:4.7910, succeed, steps:175, total reward:19.8000, 1.360499620437622 sec
Episode 3697, loss:-3.9151, fail, steps:211, total reward:25.2000, 1.891730785369873 sec
Episode 3698, loss:-7.5636, fail, steps:207, total reward:16.5000, 1.835970163345337 sec
Episode 3699, loss:-2.6988, fail, steps:210, total reward:24.1000, 1.8449711799621582 sec
len16 Comparison 9 10 more Swap Comparison 8 9 less Comparison 7 8 more Swap Comparison 6 7 more S

Episode 3749, loss:2.3111, succeed, steps:110, total reward:10.5000, 0.9158017635345459 sec
Episode 3750, loss:0.3384, succeed, steps:187, total reward:12.3000, 1.609182596206665 sec
Episode 3751, loss:1.6033, succeed, steps:174, total reward:24.2000, 1.34116530418396 sec
Episode 3752, loss:2.7125, succeed, steps:160, total reward:21.8000, 1.2682666778564453 sec
Episode 3753, loss:2.3272, succeed, steps:154, total reward:22.6000, 1.1015799045562744 sec
Episode 3754, loss:7.3155, succeed, steps:179, total reward:33.6000, 1.2129156589508057 sec
Episode 3755, loss:-5.8721, fail, steps:208, total reward:20.8000, 1.8985621929168701 sec
Episode 3756, loss:3.9587, succeed, steps:171, total reward:13.0000, 1.529876708984375 sec
Episode 3757, loss:3.0583, succeed, steps:156, total reward:21.6000, 1.2829957008361816 sec
Episode 3758, loss:-5.0233, succeed, steps:141, total reward:14.4000, 1.039921522140503 sec
Episode 3759, loss:2.8792, succeed, steps:147, total reward:20.1000, 1.020753622055053

Episode 3804, loss:-1.4684, succeed, steps:191, total reward:16.7000, 1.6210932731628418 sec
Episode 3805, loss:-1.1532, fail, steps:214, total reward:28.8000, 2.0098206996917725 sec
Episode 3806, loss:-1.6600, succeed, steps:203, total reward:23.2000, 1.8270447254180908 sec
Episode 3807, loss:5.1495, succeed, steps:147, total reward:21.7000, 1.2942628860473633 sec
Episode 3808, loss:3.9153, fail, steps:218, total reward:36.0000, 2.1457860469818115 sec
Episode 3809, loss:-3.0591, fail, steps:212, total reward:27.4000, 1.8027749061584473 sec
Episode 3810, loss:-2.1067, fail, steps:207, total reward:18.6000, 1.8930935859680176 sec
Episode 3811, loss:1.1590, succeed, steps:147, total reward:15.9000, 1.334322452545166 sec
Episode 3812, loss:-2.3122, fail, steps:220, total reward:37.4000, 2.128688097000122 sec
Episode 3813, loss:5.5002, succeed, steps:194, total reward:40.1000, 1.633690595626831 sec
Episode 3814, loss:1.9386, fail, steps:210, total reward:23.5000, 1.9087040424346924 sec
Epi

Episode 3894, loss:-6.8559, fail, steps:205, total reward:15.1000, 2.0918774604797363 sec
Episode 3895, loss:0.2580, succeed, steps:177, total reward:30.4000, 1.4611494541168213 sec
Episode 3896, loss:-5.5651, fail, steps:207, total reward:18.0000, 2.018301248550415 sec
Episode 3897, loss:2.7104, succeed, steps:186, total reward:10.0000, 1.7391598224639893 sec
Episode 3898, loss:0.9035, succeed, steps:126, total reward:13.7000, 1.0269923210144043 sec
Episode 3899, loss:-2.7873, fail, steps:211, total reward:26.4000, 2.0702457427978516 sec
len16 Comparison 9 10 less Comparison 8 9 less Comparison 7 8 more Swap Comparison 6 7 less Comparison 5 6 more Swap Comparison 4 5 more Swap Comparison 3 4 more Swap Comparison 2 3 less Comparison 1 2 more Swap Comparison 0 1 more Swap Comparison 10 11 more Swap Comparison 11 12 more Swap Comparison 12 13 less Comparison 13 14 more Swap Comparison 14 15 more Swap Comparison 9 10 more Swap Comparison 8 9 more Swap Comparison 7 8 more Swap Comparison 6

Episode 3961, loss:3.6524, succeed, steps:191, total reward:29.3000, 1.5960958003997803 sec
Episode 3962, loss:-0.2800, succeed, steps:198, total reward:18.9000, 1.788508653640747 sec
Episode 3963, loss:-6.8220, fail, steps:206, total reward:14.9000, 1.907200574874878 sec
Episode 3964, loss:5.4339, succeed, steps:163, total reward:31.6000, 1.3795013427734375 sec
Episode 3965, loss:2.6420, succeed, steps:176, total reward:24.4000, 1.4159650802612305 sec
Episode 3966, loss:7.3721, succeed, steps:173, total reward:35.1000, 1.19297194480896 sec
Episode 3967, loss:-7.2207, fail, steps:214, total reward:30.7000, 2.0196211338043213 sec
Episode 3968, loss:-4.7954, fail, steps:213, total reward:24.5000, 1.8554377555847168 sec
Episode 3969, loss:-1.1603, succeed, steps:169, total reward:15.6000, 1.529454231262207 sec
Episode 3970, loss:-4.2431, fail, steps:204, total reward:12.8000, 1.992828130722046 sec
Episode 3971, loss:7.0512, succeed, steps:145, total reward:22.7000, 1.249678611755371 sec
E

Episode 4019, loss:-1.7960, fail, steps:201, total reward:8.2000, 1.8439910411834717 sec
Episode 4020, loss:-1.6143, fail, steps:213, total reward:28.4000, 1.842149019241333 sec
Episode 4021, loss:-4.3015, fail, steps:207, total reward:18.3000, 1.8135876655578613 sec
Episode 4022, loss:-6.8708, fail, steps:201, total reward:9.7000, 1.8916428089141846 sec
Episode 4023, loss:4.8031, succeed, steps:192, total reward:28.5000, 1.6746203899383545 sec
Episode 4024, loss:3.5575, succeed, steps:130, total reward:11.2000, 1.0821583271026611 sec
Episode 4025, loss:-1.6729, fail, steps:205, total reward:16.9000, 2.024338722229004 sec
Episode 4026, loss:-6.9854, fail, steps:214, total reward:25.3000, 1.8471777439117432 sec
Episode 4027, loss:-9.0959, fail, steps:201, total reward:5.2000, 1.861574411392212 sec
Episode 4028, loss:3.0865, succeed, steps:151, total reward:30.3000, 1.2855970859527588 sec
Episode 4029, loss:1.9361, succeed, steps:149, total reward:16.1000, 1.169954776763916 sec
Episode 4

Episode 4100, loss:3.7612, succeed, steps:180, total reward:35.3000, 1.1948330402374268 sec
Episode 4101, loss:6.3319, succeed, steps:114, total reward:21.2000, 0.792102575302124 sec
Episode 4102, loss:6.1746, succeed, steps:179, total reward:28.8000, 1.2479286193847656 sec
Episode 4103, loss:-0.4726, fail, steps:215, total reward:30.8000, 1.87154221534729 sec
Episode 4104, loss:1.9504, succeed, steps:150, total reward:26.0000, 1.2530429363250732 sec
Episode 4105, loss:-1.1868, succeed, steps:187, total reward:13.4000, 1.5999424457550049 sec
Episode 4106, loss:4.6897, succeed, steps:216, total reward:31.6000, 1.971850872039795 sec
Episode 4107, loss:-1.5168, succeed, steps:174, total reward:3.3000, 1.6254167556762695 sec
Episode 4108, loss:4.6705, succeed, steps:130, total reward:21.6000, 1.0217223167419434 sec
Episode 4109, loss:1.7947, succeed, steps:173, total reward:33.2000, 1.2113127708435059 sec
Episode 4110, loss:9.8878, succeed, steps:186, total reward:33.5000, 1.29519677162170

Episode 4190, loss:-10.5623, fail, steps:202, total reward:10.8000, 2.1901674270629883 sec
Episode 4191, loss:-15.1627, fail, steps:191, total reward:-4.8000, 1.8377139568328857 sec
Episode 4192, loss:1.9151, succeed, steps:178, total reward:25.8000, 1.6246166229248047 sec
Episode 4193, loss:4.4484, succeed, steps:149, total reward:19.2000, 1.177448034286499 sec
Episode 4194, loss:-0.5917, succeed, steps:130, total reward:11.7000, 0.9483590126037598 sec
Episode 4195, loss:0.5965, succeed, steps:156, total reward:23.3000, 1.0802557468414307 sec
Episode 4196, loss:2.4052, fail, steps:210, total reward:21.4000, 1.9269025325775146 sec
Episode 4197, loss:3.0428, succeed, steps:171, total reward:30.1000, 1.453761339187622 sec
Episode 4198, loss:-3.1212, fail, steps:214, total reward:27.9000, 2.070986032485962 sec
Episode 4199, loss:5.3450, succeed, steps:143, total reward:22.9000, 1.2449429035186768 sec
len16 Comparison 9 10 more Swap Comparison 8 9 more Swap Comparison 7 8 more Swap Compari

Episode 4246, loss:1.4784, succeed, steps:153, total reward:26.4000, 1.1075904369354248 sec
Episode 4247, loss:0.4984, fail, steps:220, total reward:38.0000, 1.992875099182129 sec
Episode 4248, loss:2.7466, succeed, steps:195, total reward:31.7000, 1.6460390090942383 sec
Episode 4249, loss:3.8629, succeed, steps:175, total reward:35.9000, 1.2919328212738037 sec
Episode 4250, loss:1.4402, succeed, steps:155, total reward:24.6000, 1.1241064071655273 sec
Episode 4251, loss:7.5630, succeed, steps:196, total reward:32.5000, 1.4564390182495117 sec
Episode 4252, loss:1.0490, succeed, steps:160, total reward:17.6000, 1.1580970287322998 sec
Episode 4253, loss:5.9057, succeed, steps:127, total reward:1.9000, 0.9786279201507568 sec
Episode 4254, loss:-0.4670, fail, steps:217, total reward:31.9000, 1.9843218326568604 sec
Episode 4255, loss:-1.6781, fail, steps:211, total reward:25.1000, 1.8066701889038086 sec
Episode 4256, loss:-7.1504, fail, steps:198, total reward:4.5000, 1.8804097175598145 sec


Episode 4336, loss:4.9737, succeed, steps:131, total reward:13.5000, 0.9531996250152588 sec
Episode 4337, loss:1.9357, succeed, steps:164, total reward:12.8000, 1.1762380599975586 sec
Episode 4338, loss:-11.4436, fail, steps:199, total reward:7.9000, 1.893693447113037 sec
Episode 4339, loss:3.2586, succeed, steps:141, total reward:15.0000, 1.2497880458831787 sec
Episode 4340, loss:3.2703, succeed, steps:161, total reward:23.4000, 1.3033065795898438 sec
Episode 4341, loss:-4.2655, succeed, steps:171, total reward:-11.0000, 1.5150625705718994 sec
Episode 4342, loss:-4.2555, fail, steps:212, total reward:27.4000, 1.868394136428833 sec
Episode 4343, loss:4.4765, succeed, steps:176, total reward:24.2000, 1.5010685920715332 sec
Episode 4344, loss:-9.1650, fail, steps:200, total reward:8.7000, 2.043083906173706 sec
Episode 4345, loss:1.5418, succeed, steps:153, total reward:23.8000, 1.3649413585662842 sec
Episode 4346, loss:2.6266, succeed, steps:139, total reward:14.7000, 1.1669285297393799 

Episode 4400, loss:2.3765, succeed, steps:175, total reward:25.0000, 1.3693268299102783 sec
Episode 4401, loss:-1.9503, succeed, steps:188, total reward:21.1000, 1.5556073188781738 sec
Episode 4402, loss:4.0997, succeed, steps:197, total reward:30.9000, 1.594735860824585 sec
Episode 4403, loss:5.7765, succeed, steps:174, total reward:12.5000, 1.3399062156677246 sec
Episode 4404, loss:-7.8136, fail, steps:199, total reward:9.2000, 1.881321668624878 sec
Episode 4405, loss:-0.3923, succeed, steps:163, total reward:12.1000, 1.475492238998413 sec
Episode 4406, loss:7.0223, succeed, steps:154, total reward:28.1000, 1.2380273342132568 sec
Episode 4407, loss:4.8297, succeed, steps:163, total reward:31.9000, 1.1534934043884277 sec
Episode 4408, loss:-2.7373, fail, steps:213, total reward:28.4000, 1.978341817855835 sec
Episode 4409, loss:9.5008, succeed, steps:177, total reward:29.0000, 1.4804222583770752 sec
Episode 4410, loss:-12.1892, fail, steps:194, total reward:0.7000, 1.9737281799316406 s

Episode 4490, loss:1.8742, succeed, steps:154, total reward:27.4000, 1.1507492065429688 sec
Episode 4491, loss:1.9721, succeed, steps:186, total reward:6.1000, 1.653186559677124 sec
Episode 4492, loss:3.9869, succeed, steps:156, total reward:21.3000, 1.191852331161499 sec
Episode 4493, loss:10.0598, succeed, steps:212, total reward:40.9000, 1.86020827293396 sec
Episode 4494, loss:-1.4553, fail, steps:208, total reward:18.7000, 1.8854501247406006 sec
Episode 4495, loss:-0.0390, succeed, steps:141, total reward:13.1000, 1.334735631942749 sec
Episode 4496, loss:-6.8852, fail, steps:205, total reward:16.5000, 2.1703436374664307 sec
Episode 4497, loss:0.6278, succeed, steps:95, total reward:10.7000, 0.8812716007232666 sec
Episode 4498, loss:0.0364, succeed, steps:186, total reward:19.5000, 1.4510157108306885 sec
Episode 4499, loss:3.0490, succeed, steps:176, total reward:13.1000, 1.3426027297973633 sec
len16 Comparison 9 10 less Comparison 8 9 more Swap Comparison 7 8 more Swap Comparison 6

Episode 4554, loss:-11.8393, fail, steps:199, total reward:5.8000, 2.0075128078460693 sec
Episode 4555, loss:1.7314, succeed, steps:165, total reward:20.2000, 1.477567195892334 sec
Episode 4556, loss:1.5844, succeed, steps:134, total reward:18.5000, 1.0353672504425049 sec
Episode 4557, loss:1.3569, succeed, steps:197, total reward:11.9000, 1.8662655353546143 sec
Episode 4558, loss:3.0410, succeed, steps:193, total reward:14.3000, 1.664271593093872 sec
Episode 4559, loss:-11.3455, fail, steps:208, total reward:17.2000, 1.853175163269043 sec
Episode 4560, loss:8.5699, succeed, steps:155, total reward:24.6000, 1.3352842330932617 sec
Episode 4561, loss:-4.1645, fail, steps:208, total reward:22.1000, 2.1341757774353027 sec
Episode 4562, loss:2.8798, succeed, steps:188, total reward:17.2000, 1.753309965133667 sec
Episode 4563, loss:-10.8026, fail, steps:205, total reward:15.3000, 1.9370265007019043 sec
Episode 4564, loss:-12.5932, fail, steps:204, total reward:10.4000, 1.8846983909606934 sec

Episode 4615, loss:3.6980, succeed, steps:136, total reward:13.3000, 0.9881296157836914 sec
Episode 4616, loss:5.2726, succeed, steps:161, total reward:29.9000, 1.0963895320892334 sec
Episode 4617, loss:-0.2352, succeed, steps:111, total reward:9.5000, 0.8100419044494629 sec
Episode 4618, loss:1.2228, succeed, steps:132, total reward:22.0000, 0.8826701641082764 sec
Episode 4619, loss:-1.3187, fail, steps:211, total reward:23.4000, 2.026224374771118 sec
Episode 4620, loss:-6.7063, fail, steps:214, total reward:26.4000, 1.849637508392334 sec
Episode 4621, loss:-2.1634, fail, steps:220, total reward:36.5000, 1.879176139831543 sec
Episode 4622, loss:-1.8507, fail, steps:216, total reward:31.8000, 1.8229830265045166 sec
Episode 4623, loss:5.9143, succeed, steps:142, total reward:17.7000, 1.2555232048034668 sec
Episode 4624, loss:4.6942, succeed, steps:133, total reward:18.1000, 1.0426995754241943 sec
Episode 4625, loss:1.9698, succeed, steps:174, total reward:19.9000, 1.319756031036377 sec


Episode 4700, loss:3.9161, succeed, steps:185, total reward:26.3000, 1.535982608795166 sec
Episode 4701, loss:0.7785, succeed, steps:166, total reward:12.5000, 1.2555224895477295 sec
Episode 4702, loss:-2.5556, fail, steps:206, total reward:18.8000, 1.891097068786621 sec
Episode 4703, loss:-2.3157, succeed, steps:141, total reward:8.3000, 1.392319917678833 sec
Episode 4704, loss:1.5386, succeed, steps:133, total reward:19.4000, 1.0282657146453857 sec
Episode 4705, loss:2.5264, succeed, steps:197, total reward:24.0000, 1.7462067604064941 sec
Episode 4706, loss:-0.1230, succeed, steps:141, total reward:14.3000, 1.0688178539276123 sec
Episode 4707, loss:5.8237, succeed, steps:207, total reward:32.7000, 1.6179924011230469 sec
Episode 4708, loss:0.8504, succeed, steps:106, total reward:12.3000, 0.8655109405517578 sec
Episode 4709, loss:0.7240, succeed, steps:128, total reward:7.6000, 0.9834318161010742 sec
Episode 4710, loss:8.1414, succeed, steps:163, total reward:32.2000, 1.22390055656433

Episode 4791, loss:1.9724, succeed, steps:159, total reward:14.0000, 1.3026349544525146 sec
Episode 4792, loss:1.7213, succeed, steps:144, total reward:16.1000, 1.070796251296997 sec
Episode 4793, loss:-12.9596, fail, steps:211, total reward:24.0000, 2.0530662536621094 sec
Episode 4794, loss:3.8759, succeed, steps:199, total reward:30.2000, 1.7329974174499512 sec
Episode 4795, loss:5.3453, succeed, steps:166, total reward:13.2000, 1.5344414710998535 sec
Episode 4796, loss:8.0525, succeed, steps:176, total reward:24.1000, 1.3749306201934814 sec
Episode 4797, loss:2.7048, succeed, steps:191, total reward:23.8000, 1.6435277462005615 sec
Episode 4798, loss:3.7000, succeed, steps:192, total reward:10.4000, 1.8092880249023438 sec
Episode 4799, loss:-6.0366, fail, steps:214, total reward:28.2000, 1.9221341609954834 sec
len16 Comparison 9 10 less Comparison 8 9 less Comparison 7 8 more Swap Comparison 6 7 more Swap Comparison 5 6 more Swap Comparison 4 5 more Swap Comparison 3 4 more Swap Comp

Episode 4852, loss:-2.0224, succeed, steps:156, total reward:9.9000, 1.4437241554260254 sec
Episode 4853, loss:7.6239, succeed, steps:188, total reward:31.0000, 1.5410828590393066 sec
Episode 4854, loss:-9.2471, fail, steps:210, total reward:20.8000, 1.922027587890625 sec
Episode 4855, loss:2.1815, succeed, steps:204, total reward:28.7000, 1.7389180660247803 sec
Episode 4856, loss:0.3601, succeed, steps:120, total reward:16.5000, 0.9746475219726562 sec
Episode 4857, loss:6.3672, succeed, steps:156, total reward:27.8000, 1.1238856315612793 sec
Episode 4858, loss:-3.0210, fail, steps:208, total reward:20.9000, 2.025146007537842 sec
Episode 4859, loss:1.2788, succeed, steps:173, total reward:20.4000, 1.5986549854278564 sec
Episode 4860, loss:2.5746, succeed, steps:202, total reward:22.9000, 1.8725347518920898 sec
Episode 4861, loss:5.0963, succeed, steps:121, total reward:19.1000, 0.9672009944915771 sec
Episode 4862, loss:1.3834, succeed, steps:205, total reward:30.3000, 1.581987142562866

Episode 4920, loss:-22.6957, fail, steps:186, total reward:-17.2000, 1.922222375869751 sec
Episode 4921, loss:-10.1143, fail, steps:190, total reward:-6.1000, 1.8971285820007324 sec
Episode 4922, loss:7.1165, succeed, steps:164, total reward:24.1000, 1.4441735744476318 sec
Episode 4923, loss:3.2219, succeed, steps:175, total reward:24.4000, 1.4131524562835693 sec
Episode 4924, loss:5.9063, succeed, steps:179, total reward:27.7000, 1.3347899913787842 sec
Episode 4925, loss:0.8439, succeed, steps:149, total reward:19.6000, 1.058401107788086 sec
Episode 4926, loss:-10.8411, fail, steps:196, total reward:-0.6000, 1.917571783065796 sec
Episode 4927, loss:4.8054, succeed, steps:177, total reward:19.0000, 1.5800604820251465 sec
Episode 4928, loss:3.2158, succeed, steps:166, total reward:20.0000, 1.3362669944763184 sec
Episode 4929, loss:5.5858, succeed, steps:191, total reward:11.1000, 1.8514442443847656 sec
Episode 4930, loss:3.1880, succeed, steps:161, total reward:14.7000, 1.40680074691772

Episode 5008, loss:0.0666, succeed, steps:183, total reward:20.7000, 1.6076469421386719 sec
Episode 5009, loss:3.8525, succeed, steps:136, total reward:21.1000, 1.099585771560669 sec
Episode 5010, loss:-4.1651, fail, steps:202, total reward:11.3000, 1.974846601486206 sec
Episode 5011, loss:4.4199, succeed, steps:208, total reward:30.1000, 1.841693639755249 sec
Episode 5012, loss:-11.2422, fail, steps:196, total reward:3.6000, 1.873013973236084 sec
Episode 5013, loss:-1.7320, fail, steps:207, total reward:17.7000, 1.8832459449768066 sec
Episode 5014, loss:-8.4045, fail, steps:208, total reward:19.1000, 1.9135477542877197 sec
Episode 5015, loss:-7.0558, fail, steps:213, total reward:27.8000, 1.8558235168457031 sec
Episode 5016, loss:2.4731, succeed, steps:198, total reward:19.0000, 1.8199589252471924 sec
Episode 5017, loss:-8.6132, fail, steps:208, total reward:18.7000, 1.848780870437622 sec
Episode 5018, loss:5.9159, succeed, steps:170, total reward:27.8000, 1.5090320110321045 sec
Episo

Episode 5098, loss:-0.9253, succeed, steps:158, total reward:13.5000, 1.3356256484985352 sec
Episode 5099, loss:3.5992, succeed, steps:173, total reward:26.4000, 1.2495903968811035 sec
len16 Comparison 9 10 less Comparison 8 9 more Swap Comparison 7 8 more Swap Comparison 6 7 more Swap Comparison 5 6 more Swap Comparison 4 5 equal Comparison 3 4 more Swap Comparison 2 3 more Swap Comparison 1 2 more Swap Comparison 0 1 less Comparison 10 11 more Swap Comparison 11 12 less Comparison 12 13 more Swap Comparison 13 14 more Swap Comparison 14 15 less Comparison 9 10 less Comparison 8 9 more Swap Comparison 7 8 more Swap Comparison 6 7 more Swap Comparison 5 6 less Comparison 4 5 more Swap Comparison 3 4 more Swap Comparison 2 3 more Swap Comparison 1 2 equal Comparison 13 14 less Comparison 10 11 less Comparison 11 12 less Comparison 9 10 less Comparison 13 14 less Comparison 0 1 less Comparison 8 9 more Swap Comparison 7 8 more Swap Comparison 6 7 less Comparison 5 6 more Swap Comparison 

Episode 5162, loss:-4.8666, fail, steps:197, total reward:5.9000, 1.852128505706787 sec
Episode 5163, loss:5.6144, succeed, steps:190, total reward:21.8000, 1.7015373706817627 sec
Episode 5164, loss:3.0664, succeed, steps:168, total reward:23.2000, 1.3146960735321045 sec
Episode 5165, loss:3.2732, succeed, steps:201, total reward:23.7000, 1.7842886447906494 sec
Episode 5166, loss:6.7182, succeed, steps:181, total reward:32.7000, 1.5772266387939453 sec
Episode 5167, loss:-0.5681, succeed, steps:184, total reward:9.4000, 1.5647950172424316 sec
Episode 5168, loss:0.3605, fail, steps:221, total reward:40.3000, 1.858130693435669 sec
Episode 5169, loss:6.1593, succeed, steps:185, total reward:35.0000, 1.6005678176879883 sec
Episode 5170, loss:3.3857, succeed, steps:205, total reward:19.0000, 1.9518802165985107 sec
Episode 5171, loss:3.2724, succeed, steps:159, total reward:17.8000, 1.5121779441833496 sec
Episode 5172, loss:-2.8986, fail, steps:212, total reward:27.4000, 2.0370800495147705 se

Episode 5252, loss:-8.0683, fail, steps:208, total reward:22.1000, 1.925142526626587 sec
Episode 5253, loss:3.2163, succeed, steps:131, total reward:12.6000, 1.1950554847717285 sec
Episode 5254, loss:8.5844, succeed, steps:210, total reward:30.1000, 2.1181445121765137 sec
Episode 5255, loss:-8.0919, fail, steps:201, total reward:11.2000, 1.9069960117340088 sec
Episode 5256, loss:-14.8482, fail, steps:190, total reward:-4.9000, 1.852851152420044 sec
Episode 5257, loss:2.2552, succeed, steps:134, total reward:19.2000, 1.2118825912475586 sec
Episode 5258, loss:-8.8861, fail, steps:205, total reward:14.4000, 2.1673712730407715 sec
Episode 5259, loss:1.5196, succeed, steps:121, total reward:9.2000, 1.0916104316711426 sec
Episode 5260, loss:10.5470, succeed, steps:198, total reward:27.2000, 1.7402887344360352 sec
Episode 5261, loss:4.4316, succeed, steps:205, total reward:24.2000, 1.7813694477081299 sec
Episode 5262, loss:4.3953, succeed, steps:182, total reward:29.9000, 1.5220980644226074 s

Episode 5308, loss:-15.8916, fail, steps:190, total reward:-9.5000, 1.8908510208129883 sec
Episode 5309, loss:-2.9723, succeed, steps:202, total reward:17.4000, 1.841071605682373 sec
Episode 5310, loss:0.9414, succeed, steps:157, total reward:11.6000, 1.462437391281128 sec
Episode 5311, loss:-0.2076, succeed, steps:188, total reward:8.6000, 1.7847373485565186 sec
Episode 5312, loss:5.4894, succeed, steps:158, total reward:23.7000, 1.2121236324310303 sec
Episode 5313, loss:5.4827, succeed, steps:217, total reward:37.8000, 1.8996071815490723 sec
Episode 5314, loss:3.3048, succeed, steps:168, total reward:17.6000, 1.5007977485656738 sec
Episode 5315, loss:6.5179, succeed, steps:151, total reward:16.1000, 1.2164011001586914 sec
Episode 5316, loss:3.1436, succeed, steps:203, total reward:32.6000, 1.6711909770965576 sec
Episode 5317, loss:1.4762, fail, steps:218, total reward:34.2000, 1.8844516277313232 sec
Episode 5318, loss:4.6345, succeed, steps:176, total reward:29.9000, 1.50290250778198

Episode 5398, loss:2.3980, succeed, steps:146, total reward:20.5000, 1.1245617866516113 sec
Episode 5399, loss:0.8836, succeed, steps:105, total reward:9.6000, 0.7711515426635742 sec
len16 Comparison 9 10 less Comparison 8 9 less Comparison 7 8 more Swap Comparison 6 7 more Swap Comparison 5 6 more Swap Comparison 4 5 more Swap Comparison 3 4 more Swap Comparison 2 3 more Swap Comparison 1 2 more Swap Comparison 0 1 more Swap Comparison 10 11 less Comparison 11 12 more Swap Comparison 12 13 more Swap Comparison 13 14 more Swap Comparison 14 15 equal Comparison 9 10 less Comparison 8 9 more Swap Comparison 7 8 less Comparison 6 7 equal Comparison 5 6 more Swap Comparison 4 5 more Swap Comparison 3 4 more Swap Comparison 2 3 more Swap Comparison 1 2 more Swap Comparison 10 11 more Swap Comparison 11 12 more Swap Comparison 12 13 more Swap Comparison 13 14 less Comparison 9 10 more Swap Comparison 8 9 more Swap Comparison 7 8 less Comparison 6 7 more Swap Comparison 5 6 more Swap Comparis

Episode 5465, loss:3.4575, succeed, steps:197, total reward:19.7000, 1.7396509647369385 sec
Episode 5466, loss:-0.4998, succeed, steps:172, total reward:13.6000, 1.5541412830352783 sec
Episode 5467, loss:0.8343, succeed, steps:156, total reward:21.0000, 1.2914068698883057 sec
Episode 5468, loss:-6.7272, fail, steps:207, total reward:17.7000, 1.971975564956665 sec
Episode 5469, loss:4.0234, succeed, steps:159, total reward:23.3000, 1.3637566566467285 sec
Episode 5470, loss:0.6534, succeed, steps:159, total reward:26.6000, 1.239138126373291 sec
Episode 5471, loss:6.1629, succeed, steps:217, total reward:42.3000, 1.9272918701171875 sec
Episode 5472, loss:7.0237, succeed, steps:156, total reward:28.1000, 1.424870491027832 sec
Episode 5473, loss:-6.6282, fail, steps:211, total reward:26.4000, 2.108299493789673 sec
Episode 5474, loss:-4.2773, fail, steps:222, total reward:41.3000, 1.8668932914733887 sec
Episode 5475, loss:-1.0103, succeed, steps:180, total reward:15.3000, 1.6145365238189697 

Episode 5520, loss:1.1079, succeed, steps:191, total reward:17.6000, 1.730116367340088 sec
Episode 5521, loss:-4.5568, fail, steps:215, total reward:31.7000, 1.9759552478790283 sec
Episode 5522, loss:-1.5911, succeed, steps:187, total reward:18.5000, 1.765446662902832 sec
Episode 5523, loss:1.9119, succeed, steps:191, total reward:21.9000, 1.7107787132263184 sec
Episode 5524, loss:4.2156, succeed, steps:166, total reward:22.9000, 1.267953872680664 sec
Episode 5525, loss:7.3725, succeed, steps:189, total reward:20.0000, 1.4764750003814697 sec
Episode 5526, loss:-2.3167, succeed, steps:174, total reward:12.3000, 1.3740429878234863 sec
Episode 5527, loss:4.6927, succeed, steps:185, total reward:13.9000, 1.4983758926391602 sec
Episode 5528, loss:-0.4927, succeed, steps:204, total reward:18.9000, 1.8116140365600586 sec
Episode 5529, loss:-10.4732, fail, steps:207, total reward:16.8000, 1.8563926219940186 sec
Episode 5530, loss:-3.3276, fail, steps:212, total reward:26.2000, 1.85103917121887

Episode 5600, loss:6.3678, succeed, steps:187, total reward:17.7000, 1.5561258792877197 sec
Episode 5601, loss:2.0428, succeed, steps:181, total reward:23.4000, 1.3479814529418945 sec
Episode 5602, loss:-5.9366, fail, steps:205, total reward:15.3000, 1.8332252502441406 sec
Episode 5603, loss:3.1141, succeed, steps:188, total reward:10.7000, 1.7354364395141602 sec
Episode 5604, loss:-0.3241, succeed, steps:200, total reward:9.7000, 1.8679897785186768 sec
Episode 5605, loss:-4.4343, fail, steps:211, total reward:22.1000, 1.864745855331421 sec
Episode 5606, loss:3.2967, succeed, steps:189, total reward:15.6000, 1.691713809967041 sec
Episode 5607, loss:1.8672, succeed, steps:155, total reward:20.7000, 1.2651877403259277 sec
Episode 5608, loss:3.9768, succeed, steps:140, total reward:18.6000, 1.091127634048462 sec
Episode 5609, loss:2.8886, succeed, steps:182, total reward:31.7000, 1.3418936729431152 sec
Episode 5610, loss:-0.7321, fail, steps:211, total reward:23.4000, 1.8839352130889893 s

Episode 5690, loss:2.9593, succeed, steps:171, total reward:19.1000, 1.6078221797943115 sec
Episode 5691, loss:4.5778, succeed, steps:205, total reward:20.4000, 1.9412696361541748 sec
Episode 5692, loss:3.1223, succeed, steps:124, total reward:19.2000, 1.115657091140747 sec
Episode 5693, loss:5.6922, succeed, steps:190, total reward:33.6000, 1.5377204418182373 sec
Episode 5694, loss:1.9647, succeed, steps:149, total reward:16.4000, 1.1203787326812744 sec
Episode 5695, loss:-10.5936, fail, steps:193, total reward:0.6000, 1.9762985706329346 sec
Episode 5696, loss:2.8923, succeed, steps:207, total reward:23.2000, 1.870105266571045 sec
Episode 5697, loss:3.5525, succeed, steps:131, total reward:16.8000, 1.1744685173034668 sec
Episode 5698, loss:6.1353, succeed, steps:143, total reward:21.6000, 1.1323363780975342 sec
Episode 5699, loss:3.8883, succeed, steps:175, total reward:17.2000, 1.3812658786773682 sec
len16 Comparison 9 10 less Comparison 8 9 more Swap Comparison 7 8 more Swap Compari

Episode 5745, loss:0.2154, succeed, steps:126, total reward:10.5000, 1.0957608222961426 sec
Episode 5746, loss:5.8409, succeed, steps:161, total reward:28.0000, 1.2908697128295898 sec
Episode 5747, loss:3.3002, succeed, steps:131, total reward:26.2000, 0.9202768802642822 sec
Episode 5748, loss:5.6327, succeed, steps:150, total reward:23.4000, 1.0521836280822754 sec
Episode 5749, loss:10.3079, succeed, steps:188, total reward:32.5000, 1.3595423698425293 sec
Episode 5750, loss:4.9137, succeed, steps:198, total reward:25.5000, 1.7094674110412598 sec
Episode 5751, loss:2.6827, succeed, steps:175, total reward:17.3000, 1.5619306564331055 sec
Episode 5752, loss:1.6136, succeed, steps:129, total reward:10.3000, 1.0438365936279297 sec
Episode 5753, loss:4.7745, succeed, steps:184, total reward:34.0000, 1.2775912284851074 sec
Episode 5754, loss:0.1632, succeed, steps:169, total reward:19.2000, 1.2417597770690918 sec
Episode 5755, loss:5.4060, succeed, steps:151, total reward:25.4000, 1.10845923

Episode 5835, loss:3.0291, succeed, steps:131, total reward:14.5000, 1.2715494632720947 sec
Episode 5836, loss:0.1050, succeed, steps:156, total reward:20.4000, 1.242844581604004 sec
Episode 5837, loss:2.1549, succeed, steps:133, total reward:24.6000, 0.941033124923706 sec
Episode 5838, loss:4.3059, succeed, steps:171, total reward:18.1000, 1.2941997051239014 sec
Episode 5839, loss:4.0250, succeed, steps:207, total reward:20.6000, 1.899474859237671 sec
Episode 5840, loss:-9.0529, fail, steps:201, total reward:7.9000, 1.8666532039642334 sec
Episode 5841, loss:3.8966, succeed, steps:189, total reward:31.0000, 1.6457951068878174 sec
Episode 5842, loss:-3.5928, fail, steps:223, total reward:42.3000, 1.9428300857543945 sec
Episode 5843, loss:-2.3687, succeed, steps:173, total reward:5.0000, 1.6140892505645752 sec
Episode 5844, loss:4.0462, succeed, steps:174, total reward:21.4000, 1.3893349170684814 sec
Episode 5845, loss:0.0228, succeed, steps:169, total reward:16.2000, 1.3045802116394043 

Episode 5925, loss:3.2406, succeed, steps:200, total reward:9.0000, 1.8427619934082031 sec
Episode 5926, loss:1.8016, succeed, steps:137, total reward:31.9000, 1.1294302940368652 sec
Episode 5927, loss:-4.8869, succeed, steps:192, total reward:-1.2000, 2.054290771484375 sec
Episode 5928, loss:10.2787, succeed, steps:139, total reward:34.5000, 1.2192628383636475 sec
Episode 5929, loss:5.8972, succeed, steps:170, total reward:23.9000, 1.39266037940979 sec
Episode 5930, loss:-2.2855, succeed, steps:175, total reward:13.8000, 1.3650977611541748 sec
Episode 5931, loss:-4.6666, fail, steps:199, total reward:5.0000, 1.8371968269348145 sec
Episode 5932, loss:-0.3870, succeed, steps:182, total reward:3.4000, 1.7156453132629395 sec
Episode 5933, loss:-5.4223, fail, steps:207, total reward:19.8000, 1.9150757789611816 sec
Episode 5934, loss:3.3345, succeed, steps:201, total reward:25.4000, 1.7215516567230225 sec
Episode 5935, loss:5.5731, succeed, steps:199, total reward:37.0000, 1.709244728088379

Episode 6000, loss:3.8641, succeed, steps:141, total reward:23.5000, 1.14666748046875 sec
Checkpoint saved at episode 6000 to /home/mcwave/code/autocode/datasets/rl_sort_transformer_easy/list16_transformer4_192_gamma07_step640_v3/ckpt_6000_0.7810_169.51.pth
Learning rate = 0.000019
Episode 6001, loss:2.9919, succeed, steps:185, total reward:8.5000, 1.4773070812225342 sec
Episode 6002, loss:6.0034, succeed, steps:172, total reward:31.9000, 1.3070478439331055 sec
Episode 6003, loss:0.5419, succeed, steps:148, total reward:12.4000, 1.1235413551330566 sec
Episode 6004, loss:5.2230, succeed, steps:172, total reward:37.0000, 1.1770780086517334 sec
Episode 6005, loss:-4.3994, fail, steps:205, total reward:16.9000, 1.9723358154296875 sec
Episode 6006, loss:1.4817, succeed, steps:178, total reward:25.5000, 1.591552972793579 sec
Episode 6007, loss:-1.4525, succeed, steps:191, total reward:4.0000, 1.8755559921264648 sec
Episode 6008, loss:-1.4308, fail, steps:212, total reward:27.4000, 1.90613365

Episode 6088, loss:-3.7863, fail, steps:199, total reward:9.2000, 1.9545912742614746 sec
Episode 6089, loss:-5.1334, fail, steps:201, total reward:10.6000, 1.880638599395752 sec
Episode 6090, loss:1.6894, succeed, steps:175, total reward:31.2000, 1.483562707901001 sec
Episode 6091, loss:1.7009, succeed, steps:202, total reward:25.1000, 1.948631763458252 sec
Episode 6092, loss:3.0124, succeed, steps:187, total reward:17.5000, 1.7764477729797363 sec
Episode 6093, loss:-3.5752, succeed, steps:156, total reward:3.7000, 1.3152401447296143 sec
Episode 6094, loss:1.4008, succeed, steps:189, total reward:21.0000, 1.4397928714752197 sec
Episode 6095, loss:5.5964, succeed, steps:202, total reward:22.9000, 1.7370929718017578 sec
Episode 6096, loss:2.6059, succeed, steps:153, total reward:28.4000, 1.3638207912445068 sec
Episode 6097, loss:3.3693, succeed, steps:161, total reward:27.9000, 1.2212114334106445 sec
Episode 6098, loss:6.7058, succeed, steps:142, total reward:22.6000, 1.0218510627746582 

Episode 6146, loss:-0.6080, succeed, steps:154, total reward:12.0000, 1.2952697277069092 sec
Episode 6147, loss:2.5414, succeed, steps:187, total reward:28.9000, 1.374953269958496 sec
Episode 6148, loss:-6.9077, fail, steps:201, total reward:9.4000, 1.8813693523406982 sec
Episode 6149, loss:10.8951, succeed, steps:130, total reward:23.3000, 1.1440489292144775 sec
Episode 6150, loss:-8.8793, fail, steps:208, total reward:20.9000, 2.2014482021331787 sec
Episode 6151, loss:4.9796, succeed, steps:157, total reward:23.9000, 1.396540880203247 sec
Episode 6152, loss:1.0693, succeed, steps:200, total reward:23.4000, 1.7029919624328613 sec
Episode 6153, loss:-5.6474, fail, steps:210, total reward:24.1000, 1.8449172973632812 sec
Episode 6154, loss:-5.7525, fail, steps:220, total reward:37.8000, 1.8163745403289795 sec
Episode 6155, loss:8.1813, succeed, steps:159, total reward:24.7000, 1.405998945236206 sec
Episode 6156, loss:2.8261, succeed, steps:200, total reward:11.2000, 2.1373047828674316 se

Episode 6236, loss:-1.7094, fail, steps:207, total reward:17.7000, 1.9101271629333496 sec
Episode 6237, loss:-4.4816, fail, steps:217, total reward:33.7000, 1.8549437522888184 sec
Episode 6238, loss:-12.1179, fail, steps:192, total reward:-2.9000, 1.843263864517212 sec
Episode 6239, loss:4.9753, succeed, steps:174, total reward:34.1000, 1.5572030544281006 sec
Episode 6240, loss:3.6633, succeed, steps:152, total reward:31.9000, 1.1870126724243164 sec
Episode 6241, loss:1.6135, succeed, steps:168, total reward:13.0000, 1.2471113204956055 sec
Episode 6242, loss:-4.8953, fail, steps:210, total reward:24.1000, 1.889789342880249 sec
Episode 6243, loss:2.9998, succeed, steps:180, total reward:23.0000, 1.5682930946350098 sec
Episode 6244, loss:-0.0872, fail, steps:217, total reward:33.7000, 2.071369171142578 sec
Episode 6245, loss:0.6761, succeed, steps:109, total reward:20.1000, 0.9708077907562256 sec
Episode 6246, loss:-9.9743, fail, steps:201, total reward:11.2000, 2.2081470489501953 sec
Ep

Episode 6300, loss:2.4194, succeed, steps:165, total reward:25.8000, 1.292783260345459 sec
Episode 6301, loss:1.9243, succeed, steps:198, total reward:19.8000, 1.865276575088501 sec
Episode 6302, loss:0.5309, succeed, steps:173, total reward:15.6000, 1.5598106384277344 sec
Episode 6303, loss:2.4152, succeed, steps:173, total reward:24.3000, 1.352010726928711 sec
Episode 6304, loss:-5.1271, fail, steps:206, total reward:17.6000, 1.9094345569610596 sec
Episode 6305, loss:-7.6037, succeed, steps:163, total reward:-3.7000, 1.6325304508209229 sec
Episode 6306, loss:3.3707, succeed, steps:170, total reward:25.9000, 1.3298046588897705 sec
Episode 6307, loss:2.2586, succeed, steps:117, total reward:15.5000, 0.8506109714508057 sec
Episode 6308, loss:0.5010, succeed, steps:184, total reward:24.7000, 1.296191692352295 sec
Episode 6309, loss:-4.2529, fail, steps:204, total reward:12.8000, 1.8378958702087402 sec
Episode 6310, loss:1.4104, succeed, steps:120, total reward:8.8000, 1.1011972427368164 

Episode 6390, loss:6.1824, succeed, steps:138, total reward:16.6000, 1.098266363143921 sec
Episode 6391, loss:4.5778, succeed, steps:182, total reward:36.0000, 1.3147594928741455 sec
Episode 6392, loss:4.9671, succeed, steps:198, total reward:24.7000, 1.658043384552002 sec
Episode 6393, loss:-0.8193, succeed, steps:207, total reward:21.9000, 1.8296723365783691 sec
Episode 6394, loss:5.5025, succeed, steps:118, total reward:16.8000, 0.9538867473602295 sec
Episode 6395, loss:-9.7640, fail, steps:193, total reward:-0.3000, 1.7128796577453613 sec
Episode 6396, loss:2.7805, succeed, steps:134, total reward:14.6000, 1.2230637073516846 sec
Episode 6397, loss:7.5426, succeed, steps:154, total reward:23.9000, 1.2146785259246826 sec
Episode 6398, loss:3.2344, succeed, steps:136, total reward:26.7000, 0.9351439476013184 sec
Episode 6399, loss:1.1750, succeed, steps:192, total reward:22.0000, 1.4897096157073975 sec
len16 Comparison 9 10 more Swap Comparison 8 9 more Swap Comparison 11 12 less Comp

Episode 6446, loss:3.2719, succeed, steps:163, total reward:18.9000, 1.1481866836547852 sec
Episode 6447, loss:5.9484, succeed, steps:166, total reward:7.3000, 1.25240159034729 sec
Episode 6448, loss:1.2771, succeed, steps:202, total reward:26.1000, 1.7874133586883545 sec
Episode 6449, loss:3.4781, succeed, steps:139, total reward:25.1000, 1.1381912231445312 sec
Episode 6450, loss:-5.9201, fail, steps:206, total reward:17.6000, 1.9554979801177979 sec
Episode 6451, loss:3.8744, succeed, steps:167, total reward:13.8000, 1.526536226272583 sec
Episode 6452, loss:2.2624, succeed, steps:128, total reward:12.2000, 1.036097526550293 sec
Episode 6453, loss:-4.0692, succeed, steps:202, total reward:9.2000, 2.0317742824554443 sec
Episode 6454, loss:-7.9261, fail, steps:206, total reward:18.8000, 1.8613979816436768 sec
Episode 6455, loss:-0.1720, fail, steps:219, total reward:35.8000, 1.8770081996917725 sec
Episode 6456, loss:-3.5497, fail, steps:196, total reward:2.5000, 1.8852715492248535 sec
Ep

Episode 6511, loss:7.6272, succeed, steps:163, total reward:31.2000, 1.2325451374053955 sec
Episode 6512, loss:0.2652, succeed, steps:146, total reward:15.5000, 1.0859956741333008 sec
Episode 6513, loss:4.3847, succeed, steps:166, total reward:29.0000, 1.1477491855621338 sec
Episode 6514, loss:3.8447, succeed, steps:174, total reward:31.6000, 1.193699598312378 sec
Episode 6515, loss:6.3416, succeed, steps:160, total reward:25.6000, 1.1734676361083984 sec
Episode 6516, loss:1.5644, succeed, steps:185, total reward:13.7000, 1.5009534358978271 sec
Episode 6517, loss:3.9364, succeed, steps:152, total reward:31.0000, 1.053154706954956 sec
Episode 6518, loss:3.2848, succeed, steps:180, total reward:23.5000, 1.3596713542938232 sec
Episode 6519, loss:7.4436, succeed, steps:194, total reward:32.7000, 1.4248781204223633 sec
Episode 6520, loss:-15.5179, fail, steps:192, total reward:-3.5000, 1.9028797149658203 sec
Episode 6521, loss:1.5439, succeed, steps:144, total reward:10.9000, 1.328439712524

Episode 6600, loss:4.2530, succeed, steps:172, total reward:31.1000, 1.2130489349365234 sec
Episode 6601, loss:2.7177, succeed, steps:121, total reward:13.7000, 0.884829044342041 sec
Episode 6602, loss:1.0665, succeed, steps:97, total reward:9.5000, 0.6889457702636719 sec
Episode 6603, loss:2.7724, succeed, steps:113, total reward:13.4000, 0.7711527347564697 sec
Episode 6604, loss:-0.3494, succeed, steps:162, total reward:14.6000, 1.118354320526123 sec
Episode 6605, loss:-2.3317, fail, steps:212, total reward:25.9000, 1.9223005771636963 sec
Episode 6606, loss:-1.3374, succeed, steps:181, total reward:25.8000, 1.561241626739502 sec
Episode 6607, loss:3.9082, succeed, steps:190, total reward:24.8000, 1.506568431854248 sec
Episode 6608, loss:-4.6524, fail, steps:199, total reward:9.2000, 1.8198299407958984 sec
Episode 6609, loss:-9.9623, fail, steps:207, total reward:16.2000, 1.84013032913208 sec
Episode 6610, loss:-1.6579, succeed, steps:176, total reward:-1.7000, 1.743129014968872 sec
E

Episode 6690, loss:2.6884, succeed, steps:195, total reward:14.6000, 1.5953912734985352 sec
Episode 6691, loss:2.0344, succeed, steps:166, total reward:12.5000, 1.4476163387298584 sec
Episode 6692, loss:4.0915, succeed, steps:197, total reward:17.3000, 1.8651819229125977 sec
Episode 6693, loss:3.7394, succeed, steps:173, total reward:18.3000, 1.5500128269195557 sec
Episode 6694, loss:1.5079, succeed, steps:168, total reward:29.4000, 1.2779223918914795 sec
Episode 6695, loss:2.8346, succeed, steps:168, total reward:6.5000, 1.4113798141479492 sec
Episode 6696, loss:8.9468, succeed, steps:213, total reward:30.5000, 1.8418724536895752 sec
Episode 6697, loss:-19.3736, fail, steps:191, total reward:-7.8000, 1.8101956844329834 sec
Episode 6698, loss:4.8361, succeed, steps:177, total reward:37.1000, 1.4773082733154297 sec
Episode 6699, loss:2.6402, succeed, steps:197, total reward:24.9000, 1.7701354026794434 sec
len16 Comparison 9 10 less Comparison 8 9 more Swap Comparison 7 8 less Comparison

Episode 6748, loss:4.8012, succeed, steps:199, total reward:26.7000, 1.8357720375061035 sec
Episode 6749, loss:0.3765, succeed, steps:113, total reward:3.3000, 0.9751005172729492 sec
Episode 6750, loss:1.8695, succeed, steps:161, total reward:21.4000, 1.2708518505096436 sec
Episode 6751, loss:0.3261, succeed, steps:184, total reward:23.6000, 1.483518362045288 sec
Episode 6752, loss:2.4066, succeed, steps:153, total reward:27.7000, 1.0547831058502197 sec
Episode 6753, loss:-2.1216, fail, steps:208, total reward:22.1000, 1.9490866661071777 sec
Episode 6754, loss:5.5890, succeed, steps:181, total reward:18.9000, 1.5994491577148438 sec
Episode 6755, loss:-2.5845, fail, steps:209, total reward:23.1000, 2.0512337684631348 sec
Episode 6756, loss:3.9213, succeed, steps:180, total reward:27.5000, 1.5540552139282227 sec
Episode 6757, loss:2.2053, succeed, steps:188, total reward:27.0000, 1.454383373260498 sec
Episode 6758, loss:1.2130, succeed, steps:182, total reward:12.5000, 1.5026023387908936

Episode 6816, loss:7.1057, succeed, steps:156, total reward:31.4000, 1.2892792224884033 sec
Episode 6817, loss:-5.5731, fail, steps:195, total reward:0.5000, 2.181901693344116 sec
Episode 6818, loss:-5.3822, fail, steps:205, total reward:17.8000, 1.8633694648742676 sec
Episode 6819, loss:-0.3433, succeed, steps:194, total reward:10.8000, 1.7547895908355713 sec
Episode 6820, loss:1.8362, succeed, steps:177, total reward:12.7000, 1.6323215961456299 sec
Episode 6821, loss:6.6395, succeed, steps:154, total reward:27.2000, 1.1807122230529785 sec
Episode 6822, loss:8.0357, succeed, steps:182, total reward:28.1000, 1.3577654361724854 sec
Episode 6823, loss:3.3012, succeed, steps:157, total reward:17.2000, 1.1418046951293945 sec
Episode 6824, loss:8.7973, succeed, steps:183, total reward:36.0000, 1.2239305973052979 sec
Episode 6825, loss:-16.0123, fail, steps:205, total reward:14.4000, 1.8854126930236816 sec
Episode 6826, loss:6.1049, succeed, steps:107, total reward:15.3000, 0.969115495681762

Episode 6900, loss:5.6727, succeed, steps:186, total reward:15.3000, 1.7435507774353027 sec
Episode 6901, loss:1.6800, succeed, steps:197, total reward:11.7000, 1.8609504699707031 sec
Episode 6902, loss:4.0884, succeed, steps:189, total reward:22.1000, 1.6803209781646729 sec
Episode 6903, loss:4.4103, succeed, steps:192, total reward:3.0000, 1.9694364070892334 sec
Episode 6904, loss:2.4304, succeed, steps:170, total reward:15.9000, 1.4855363368988037 sec
Episode 6905, loss:-10.6992, fail, steps:189, total reward:-6.0000, 1.9993095397949219 sec
Episode 6906, loss:3.5628, succeed, steps:161, total reward:22.7000, 1.4348645210266113 sec
Episode 6907, loss:4.2502, succeed, steps:167, total reward:14.5000, 1.407360553741455 sec
Episode 6908, loss:-0.1426, succeed, steps:194, total reward:13.6000, 1.879185438156128 sec
Episode 6909, loss:6.7537, succeed, steps:176, total reward:25.5000, 1.4950430393218994 sec
Episode 6910, loss:-12.4275, fail, steps:207, total reward:17.7000, 2.0510177612304

Episode 6990, loss:-6.4658, fail, steps:204, total reward:15.5000, 1.9485392570495605 sec
Episode 6991, loss:-4.2840, succeed, steps:187, total reward:7.2000, 1.8184564113616943 sec
Episode 6992, loss:5.9251, succeed, steps:159, total reward:21.4000, 1.35905122756958 sec
Episode 6993, loss:-2.4556, succeed, steps:171, total reward:-14.2000, 1.4884142875671387 sec
Episode 6994, loss:3.6517, succeed, steps:195, total reward:27.5000, 1.5953848361968994 sec
Episode 6995, loss:0.2919, fail, steps:209, total reward:23.1000, 1.9047586917877197 sec
Episode 6996, loss:-9.1246, fail, steps:208, total reward:16.4000, 1.904496192932129 sec
Episode 6997, loss:-6.7191, fail, steps:195, total reward:2.6000, 1.8144605159759521 sec
Episode 6998, loss:-8.2008, fail, steps:205, total reward:15.6000, 1.8693015575408936 sec
Episode 6999, loss:0.1034, succeed, steps:157, total reward:23.7000, 1.4017095565795898 sec
Episode 7000, loss:-1.3107, fail, steps:215, total reward:30.5000, 2.1875383853912354 sec
Che

Episode 7078, loss:4.4065, succeed, steps:199, total reward:17.8000, 1.9000141620635986 sec
Episode 7079, loss:0.3497, succeed, steps:201, total reward:13.5000, 1.8202404975891113 sec
Episode 7080, loss:4.5536, succeed, steps:182, total reward:38.5000, 1.4773945808410645 sec
Episode 7081, loss:-1.2968, fail, steps:225, total reward:45.0000, 2.1310017108917236 sec
Episode 7082, loss:4.3035, succeed, steps:129, total reward:20.7000, 1.143690586090088 sec
Episode 7083, loss:-3.4686, fail, steps:215, total reward:29.3000, 2.2649102210998535 sec
Episode 7084, loss:-10.1759, fail, steps:205, total reward:12.9000, 1.86320161819458 sec
Episode 7085, loss:13.0854, succeed, steps:201, total reward:46.8000, 1.7522518634796143 sec
Episode 7086, loss:1.5250, succeed, steps:192, total reward:20.4000, 1.731065034866333 sec
Episode 7087, loss:-4.1792, fail, steps:208, total reward:19.9000, 1.8181977272033691 sec
Episode 7088, loss:1.3703, succeed, steps:162, total reward:12.1000, 1.4839723110198975 se

Episode 7145, loss:2.8723, succeed, steps:207, total reward:28.6000, 1.7780420780181885 sec
Episode 7146, loss:-3.5046, succeed, steps:193, total reward:15.5000, 1.7779765129089355 sec
Episode 7147, loss:3.4343, succeed, steps:192, total reward:31.6000, 1.6792888641357422 sec
Episode 7148, loss:-1.2674, succeed, steps:152, total reward:5.9000, 1.3080825805664062 sec
Episode 7149, loss:5.9746, succeed, steps:184, total reward:19.7000, 1.3995444774627686 sec
Episode 7150, loss:3.6455, succeed, steps:176, total reward:24.7000, 1.2999827861785889 sec
Episode 7151, loss:5.6483, succeed, steps:148, total reward:19.8000, 1.1431419849395752 sec
Episode 7152, loss:-11.2941, fail, steps:204, total reward:11.9000, 1.9637682437896729 sec
Episode 7153, loss:4.5271, succeed, steps:149, total reward:21.1000, 1.3110389709472656 sec
Episode 7154, loss:-1.3668, succeed, steps:184, total reward:8.8000, 1.6544888019561768 sec
Episode 7155, loss:2.3664, succeed, steps:190, total reward:19.0000, 1.611808061

Episode 7203, loss:1.2883, succeed, steps:158, total reward:6.7000, 1.4314708709716797 sec
Episode 7204, loss:3.9302, succeed, steps:116, total reward:21.9000, 0.8881959915161133 sec
Episode 7205, loss:6.4671, succeed, steps:129, total reward:24.5000, 0.8929188251495361 sec
Episode 7206, loss:-6.4882, fail, steps:208, total reward:20.8000, 2.0527708530426025 sec
Episode 7207, loss:1.0806, succeed, steps:162, total reward:16.7000, 1.50467848777771 sec
Episode 7208, loss:4.4061, succeed, steps:156, total reward:19.4000, 1.2360775470733643 sec
Episode 7209, loss:5.4126, succeed, steps:165, total reward:21.5000, 1.2056303024291992 sec
Episode 7210, loss:3.9816, succeed, steps:202, total reward:20.3000, 1.870255470275879 sec
Episode 7211, loss:4.1540, succeed, steps:181, total reward:21.9000, 1.6281471252441406 sec
Episode 7212, loss:-5.4349, fail, steps:209, total reward:21.6000, 2.0838332176208496 sec
Episode 7213, loss:-9.4414, fail, steps:193, total reward:-1.9000, 1.8601491451263428 se

Episode 7293, loss:-7.7088, fail, steps:200, total reward:9.0000, 1.89744234085083 sec
Episode 7294, loss:-7.3127, fail, steps:208, total reward:20.8000, 1.9201433658599854 sec
Episode 7295, loss:3.0336, succeed, steps:157, total reward:20.2000, 1.388167142868042 sec
Episode 7296, loss:0.2121, succeed, steps:186, total reward:14.7000, 1.6214385032653809 sec
Episode 7297, loss:-3.5836, fail, steps:207, total reward:19.2000, 1.9015171527862549 sec
Episode 7298, loss:6.3495, succeed, steps:134, total reward:22.7000, 1.1743319034576416 sec
Episode 7299, loss:6.3557, succeed, steps:150, total reward:22.8000, 1.1573362350463867 sec
Episode 7300, loss:-8.1151, fail, steps:200, total reward:8.1000, 2.0669708251953125 sec
Episode 7301, loss:-1.5284, fail, steps:212, total reward:27.4000, 1.8702685832977295 sec
Episode 7302, loss:4.4974, succeed, steps:155, total reward:19.5000, 1.4138152599334717 sec
Episode 7303, loss:2.4669, succeed, steps:168, total reward:23.2000, 1.3403513431549072 sec
Epi

Episode 7384, loss:4.1226, succeed, steps:156, total reward:10.6000, 1.4572067260742188 sec
Episode 7385, loss:5.9787, succeed, steps:143, total reward:25.5000, 1.1007943153381348 sec
Episode 7386, loss:3.6472, succeed, steps:138, total reward:15.0000, 1.0594415664672852 sec
Episode 7387, loss:3.4429, succeed, steps:133, total reward:13.0000, 0.9478166103363037 sec
Episode 7388, loss:1.4046, succeed, steps:176, total reward:18.2000, 1.3015587329864502 sec
Episode 7389, loss:-10.1724, fail, steps:201, total reward:11.2000, 1.8556559085845947 sec
Episode 7390, loss:-8.9700, fail, steps:196, total reward:4.9000, 1.8699331283569336 sec
Episode 7391, loss:5.1009, succeed, steps:159, total reward:18.1000, 1.4116106033325195 sec
Episode 7392, loss:1.5808, succeed, steps:116, total reward:6.3000, 1.0307035446166992 sec
Episode 7393, loss:4.1901, succeed, steps:212, total reward:32.5000, 1.9077870845794678 sec
Episode 7394, loss:4.8055, succeed, steps:164, total reward:33.5000, 1.33807635307312

Episode 7449, loss:3.2706, succeed, steps:198, total reward:19.4000, 1.7570838928222656 sec
Episode 7450, loss:1.7619, succeed, steps:161, total reward:22.7000, 1.4009532928466797 sec
Episode 7451, loss:2.5426, succeed, steps:188, total reward:26.3000, 1.6025569438934326 sec
Episode 7452, loss:5.0496, succeed, steps:171, total reward:33.4000, 1.208061695098877 sec
Episode 7453, loss:-1.7583, succeed, steps:127, total reward:1.9000, 0.9726645946502686 sec
Episode 7454, loss:3.4112, succeed, steps:162, total reward:22.4000, 1.1201746463775635 sec
Episode 7455, loss:5.5967, succeed, steps:184, total reward:32.1000, 1.2609620094299316 sec
Episode 7456, loss:5.9471, succeed, steps:181, total reward:16.7000, 1.3563740253448486 sec
Episode 7457, loss:-1.5261, fail, steps:213, total reward:26.6000, 1.9353418350219727 sec
Episode 7458, loss:2.9464, succeed, steps:186, total reward:12.0000, 1.7243921756744385 sec
Episode 7459, loss:-7.7875, fail, steps:203, total reward:13.6000, 1.96463727951049

Episode 7510, loss:0.1251, succeed, steps:213, total reward:33.1000, 1.8455336093902588 sec
Episode 7511, loss:3.2404, succeed, steps:207, total reward:37.2000, 1.8212859630584717 sec
Episode 7512, loss:1.8962, succeed, steps:200, total reward:25.4000, 1.74676513671875 sec
Episode 7513, loss:1.1959, succeed, steps:143, total reward:19.1000, 1.167858600616455 sec
Episode 7514, loss:10.0930, succeed, steps:195, total reward:39.2000, 1.4179956912994385 sec
Episode 7515, loss:1.5841, succeed, steps:144, total reward:9.6000, 1.1215615272521973 sec
Episode 7516, loss:-7.6342, fail, steps:205, total reward:16.5000, 1.9385852813720703 sec
Episode 7517, loss:4.1262, succeed, steps:111, total reward:22.5000, 0.9916131496429443 sec
Episode 7518, loss:4.0556, succeed, steps:148, total reward:11.0000, 1.2986416816711426 sec
Episode 7519, loss:-5.7004, fail, steps:207, total reward:16.8000, 2.0085091590881348 sec
Episode 7520, loss:1.4933, succeed, steps:194, total reward:27.5000, 1.6263341903686523

len16 Comparison 9 10 more Swap Comparison 8 9 less Comparison 7 8 more Swap Comparison 6 7 less Comparison 5 6 more Swap Comparison 4 5 less Comparison 3 4 more Swap Comparison 2 3 more Swap Comparison 1 2 more Swap Comparison 0 1 more Swap Comparison 10 11 more Swap Comparison 11 12 more Swap Comparison 12 13 more Swap Comparison 9 10 more Swap Comparison 5 6 less Comparison 13 14 more Swap Comparison 8 9 more Swap Comparison 14 15 more Swap Comparison 7 8 less Comparison 6 7 more Swap Comparison 10 11 more Swap Comparison 5 6 less Comparison 4 5 more Swap Comparison 3 4 more Swap Comparison 2 3 more Swap Comparison 11 12 more Swap Comparison 12 13 more Swap Comparison 13 14 more Swap Comparison 9 10 more Swap Comparison 8 9 more Swap Comparison 7 8 less Comparison 7 8 less Comparison 6 7 less Comparison 5 6 more Swap Comparison 4 5 more Swap Comparison 3 4 more Swap Comparison 2 3 less Comparison 10 11 less Comparison 9 10 less Comparison 11 12 more Swap Comparison 8 9 less Comparis

Episode 7655, loss:5.9790, succeed, steps:177, total reward:29.3000, 1.4141616821289062 sec
Episode 7656, loss:-3.0586, fail, steps:217, total reward:34.1000, 1.9495823383331299 sec
Episode 7657, loss:2.9179, succeed, steps:164, total reward:13.4000, 1.513500690460205 sec
Episode 7658, loss:0.4551, succeed, steps:174, total reward:-2.0000, 1.5375537872314453 sec
Episode 7659, loss:1.4326, succeed, steps:101, total reward:9.5000, 0.7838177680969238 sec
Episode 7660, loss:-1.2094, succeed, steps:176, total reward:11.2000, 1.3472609519958496 sec
Episode 7661, loss:0.2726, succeed, steps:169, total reward:16.5000, 1.344329833984375 sec
Episode 7662, loss:3.9986, succeed, steps:208, total reward:30.4000, 1.8246724605560303 sec
Episode 7663, loss:4.0219, succeed, steps:198, total reward:31.1000, 1.7185404300689697 sec
Episode 7664, loss:4.0991, succeed, steps:198, total reward:33.7000, 1.731557846069336 sec
Episode 7665, loss:-10.7899, fail, steps:193, total reward:-3.6000, 1.966166973114013

Episode 7714, loss:-3.6408, fail, steps:207, total reward:18.6000, 2.0559751987457275 sec
Episode 7715, loss:5.3716, succeed, steps:150, total reward:27.3000, 1.359666109085083 sec
Episode 7716, loss:4.7414, succeed, steps:182, total reward:31.8000, 1.4347689151763916 sec
Episode 7717, loss:-5.5751, fail, steps:210, total reward:23.8000, 1.925379991531372 sec
Episode 7718, loss:0.3080, succeed, steps:188, total reward:16.0000, 1.63511061668396 sec
Episode 7719, loss:3.6638, succeed, steps:155, total reward:26.8000, 1.2181057929992676 sec
Episode 7720, loss:-11.0074, fail, steps:195, total reward:2.6000, 2.0053608417510986 sec
Episode 7721, loss:4.8637, succeed, steps:156, total reward:26.8000, 1.340590238571167 sec
Episode 7722, loss:1.9694, succeed, steps:188, total reward:23.7000, 1.536062240600586 sec
Episode 7723, loss:3.3847, succeed, steps:191, total reward:36.5000, 1.4207899570465088 sec
Episode 7724, loss:-0.1493, succeed, steps:202, total reward:18.1000, 1.8851134777069092 sec

Episode 7800, loss:7.5973, succeed, steps:177, total reward:28.3000, 1.3300602436065674 sec
Episode 7801, loss:4.0796, succeed, steps:160, total reward:24.4000, 1.123910903930664 sec
Episode 7802, loss:5.2205, succeed, steps:189, total reward:25.4000, 1.3474555015563965 sec
Episode 7803, loss:2.8418, succeed, steps:163, total reward:20.5000, 1.1987183094024658 sec
Episode 7804, loss:-7.2083, fail, steps:217, total reward:32.5000, 1.9503846168518066 sec
Episode 7805, loss:6.5588, succeed, steps:191, total reward:31.0000, 1.6748907566070557 sec
Episode 7806, loss:5.1231, succeed, steps:169, total reward:23.1000, 1.3228356838226318 sec
Episode 7807, loss:-9.0696, fail, steps:208, total reward:18.7000, 1.9953515529632568 sec
Episode 7808, loss:9.7041, succeed, steps:204, total reward:30.6000, 1.7533738613128662 sec
Episode 7809, loss:2.9015, succeed, steps:132, total reward:6.8000, 1.1226942539215088 sec
Episode 7810, loss:-10.9403, fail, steps:196, total reward:3.6000, 2.05322527885437 se

Episode 7891, loss:2.9692, succeed, steps:189, total reward:32.5000, 1.5225210189819336 sec
Episode 7892, loss:0.7152, succeed, steps:162, total reward:14.0000, 1.2245121002197266 sec
Episode 7893, loss:5.1739, succeed, steps:217, total reward:35.6000, 1.895575761795044 sec
Episode 7894, loss:1.7076, succeed, steps:157, total reward:14.8000, 1.4001269340515137 sec
Episode 7895, loss:4.2796, succeed, steps:131, total reward:8.1000, 1.138601541519165 sec
Episode 7896, loss:0.0753, succeed, steps:160, total reward:13.7000, 1.180140495300293 sec
Episode 7897, loss:2.5298, succeed, steps:197, total reward:40.5000, 1.3995468616485596 sec
Episode 7898, loss:0.4309, succeed, steps:178, total reward:8.5000, 1.4106645584106445 sec
Episode 7899, loss:-6.1496, fail, steps:206, total reward:17.9000, 1.866218090057373 sec
len16 Comparison 9 10 more Swap Comparison 8 9 less Comparison 7 8 more Swap Comparison 6 7 more Swap Comparison 5 6 more Swap Comparison 4 5 more Swap Comparison 3 4 more Swap Com

Episode 7948, loss:3.4493, succeed, steps:156, total reward:23.6000, 1.1250994205474854 sec
Episode 7949, loss:-0.7666, succeed, steps:190, total reward:14.8000, 1.610131025314331 sec
Episode 7950, loss:-4.4886, succeed, steps:198, total reward:12.0000, 1.7137908935546875 sec
Episode 7951, loss:3.6409, succeed, steps:210, total reward:33.2000, 1.8575830459594727 sec
Episode 7952, loss:8.9923, succeed, steps:206, total reward:26.8000, 1.7631914615631104 sec
Episode 7953, loss:7.5686, succeed, steps:175, total reward:27.3000, 1.503978967666626 sec
Episode 7954, loss:6.2323, succeed, steps:162, total reward:33.5000, 1.238459825515747 sec
Episode 7955, loss:4.0199, succeed, steps:208, total reward:24.2000, 1.953216314315796 sec
Episode 7956, loss:-4.8217, fail, steps:214, total reward:28.8000, 2.0022077560424805 sec
Episode 7957, loss:0.1470, succeed, steps:171, total reward:24.7000, 1.5062785148620605 sec
Episode 7958, loss:5.1747, succeed, steps:204, total reward:14.2000, 2.0379683971405

Episode 8015, loss:0.7879, succeed, steps:136, total reward:17.2000, 1.0515263080596924 sec
Episode 8016, loss:-8.6581, fail, steps:213, total reward:26.3000, 2.0806429386138916 sec
Episode 8017, loss:4.7850, succeed, steps:192, total reward:30.3000, 1.633375883102417 sec
Episode 8018, loss:1.5632, succeed, steps:146, total reward:16.0000, 1.2272801399230957 sec
Episode 8019, loss:-5.5056, fail, steps:202, total reward:10.1000, 1.9902329444885254 sec
Episode 8020, loss:-4.3945, succeed, steps:158, total reward:-4.6000, 1.5144572257995605 sec
Episode 8021, loss:4.0112, succeed, steps:202, total reward:24.7000, 1.7503907680511475 sec
Episode 8022, loss:4.7009, succeed, steps:213, total reward:37.7000, 1.7252414226531982 sec
Episode 8023, loss:-1.2112, succeed, steps:160, total reward:14.7000, 1.4545717239379883 sec
Episode 8024, loss:2.2722, succeed, steps:185, total reward:19.0000, 1.56459641456604 sec
Episode 8025, loss:-3.6977, fail, steps:214, total reward:28.6000, 1.872835636138916 

Episode 8100, loss:-0.5481, succeed, steps:147, total reward:11.3000, 1.075211524963379 sec
Episode 8101, loss:0.6210, succeed, steps:177, total reward:27.8000, 1.2639474868774414 sec
Episode 8102, loss:4.2394, succeed, steps:173, total reward:22.4000, 1.2369799613952637 sec
Episode 8103, loss:-2.0537, succeed, steps:187, total reward:0.6000, 1.8305482864379883 sec
Episode 8104, loss:1.4717, succeed, steps:189, total reward:23.2000, 1.6385107040405273 sec
Episode 8105, loss:2.1668, succeed, steps:182, total reward:20.7000, 1.457848072052002 sec
Episode 8106, loss:-1.3368, fail, steps:208, total reward:20.2000, 1.9296765327453613 sec
Episode 8107, loss:0.1228, fail, steps:210, total reward:23.8000, 1.8528037071228027 sec
Episode 8108, loss:3.8759, succeed, steps:167, total reward:22.0000, 1.4795684814453125 sec
Episode 8109, loss:0.4053, succeed, steps:206, total reward:32.6000, 1.9428229331970215 sec
Episode 8110, loss:-7.1384, fail, steps:210, total reward:24.1000, 1.9841103553771973 

Episode 8190, loss:3.3337, succeed, steps:182, total reward:20.8000, 1.4163539409637451 sec
Episode 8191, loss:2.2741, succeed, steps:130, total reward:24.6000, 0.9023356437683105 sec
Episode 8192, loss:6.1153, succeed, steps:179, total reward:21.2000, 1.2544679641723633 sec
Episode 8193, loss:3.7618, succeed, steps:106, total reward:19.7000, 0.7441682815551758 sec
Episode 8194, loss:-4.1762, fail, steps:211, total reward:25.1000, 2.0617377758026123 sec
Episode 8195, loss:3.2950, succeed, steps:145, total reward:21.7000, 1.2317233085632324 sec
Episode 8196, loss:1.2828, succeed, steps:184, total reward:26.2000, 1.5461363792419434 sec
Episode 8197, loss:3.8088, succeed, steps:161, total reward:31.8000, 1.1907920837402344 sec
Episode 8198, loss:1.4897, succeed, steps:132, total reward:22.7000, 0.911431074142456 sec
Episode 8199, loss:4.4835, succeed, steps:136, total reward:23.7000, 0.9187002182006836 sec
len16 Comparison 9 10 more Swap Comparison 8 9 more Swap Comparison 7 8 more Swap C

Episode 8253, loss:-1.3680, fail, steps:205, total reward:16.9000, 1.9597928524017334 sec
Episode 8254, loss:-11.2445, fail, steps:204, total reward:14.0000, 1.8438496589660645 sec
Episode 8255, loss:4.3499, succeed, steps:203, total reward:25.7000, 1.759718894958496 sec
Episode 8256, loss:5.9971, succeed, steps:198, total reward:26.7000, 1.8296942710876465 sec
Episode 8257, loss:7.3483, succeed, steps:184, total reward:20.8000, 1.619060754776001 sec
Episode 8258, loss:3.7171, succeed, steps:141, total reward:22.2000, 1.0815868377685547 sec
Episode 8259, loss:5.9155, succeed, steps:204, total reward:20.2000, 1.9423096179962158 sec
Episode 8260, loss:2.2229, succeed, steps:136, total reward:6.2000, 1.236081838607788 sec
Episode 8261, loss:5.8959, succeed, steps:166, total reward:33.0000, 1.3366878032684326 sec
Episode 8262, loss:1.4296, succeed, steps:195, total reward:15.1000, 1.8771350383758545 sec
Episode 8263, loss:-8.1220, fail, steps:205, total reward:16.5000, 1.9076881408691406 s

Episode 8318, loss:7.4940, succeed, steps:169, total reward:38.5000, 1.1717727184295654 sec
Episode 8319, loss:4.1417, succeed, steps:196, total reward:29.1000, 1.525696039199829 sec
Episode 8320, loss:2.5926, succeed, steps:202, total reward:25.8000, 1.7653913497924805 sec
Episode 8321, loss:1.1965, fail, steps:220, total reward:39.0000, 1.8953580856323242 sec
Episode 8322, loss:3.7550, succeed, steps:130, total reward:24.2000, 1.1084401607513428 sec
Episode 8323, loss:-9.9850, fail, steps:190, total reward:-5.0000, 2.1265997886657715 sec
Episode 8324, loss:4.2373, succeed, steps:203, total reward:33.5000, 1.699916124343872 sec
Episode 8325, loss:8.9593, succeed, steps:184, total reward:36.6000, 1.6374123096466064 sec
Episode 8326, loss:-5.1891, fail, steps:207, total reward:19.8000, 2.0012128353118896 sec
Episode 8327, loss:-11.0128, fail, steps:198, total reward:3.0000, 1.8426012992858887 sec
Episode 8328, loss:2.1244, succeed, steps:128, total reward:23.9000, 1.0928146839141846 sec

Episode 8400, loss:-1.7991, succeed, steps:172, total reward:14.2000, 1.5931971073150635 sec
Episode 8401, loss:4.0631, succeed, steps:173, total reward:34.4000, 1.3587281703948975 sec
Episode 8402, loss:-9.2462, fail, steps:199, total reward:6.5000, 1.9422001838684082 sec
Episode 8403, loss:-3.7499, fail, steps:203, total reward:12.1000, 1.8511605262756348 sec
Episode 8404, loss:3.2480, succeed, steps:197, total reward:38.6000, 1.6718459129333496 sec
Episode 8405, loss:4.1422, succeed, steps:171, total reward:17.3000, 1.4430742263793945 sec
Episode 8406, loss:-3.2202, fail, steps:208, total reward:20.8000, 1.9046807289123535 sec
Episode 8407, loss:8.3921, succeed, steps:180, total reward:37.8000, 1.4843811988830566 sec
Episode 8408, loss:1.9192, succeed, steps:171, total reward:1.9000, 1.5058369636535645 sec
Episode 8409, loss:5.6442, succeed, steps:197, total reward:34.7000, 1.448122501373291 sec
Episode 8410, loss:8.1063, succeed, steps:176, total reward:36.1000, 1.3054356575012207 

Episode 8490, loss:2.0622, succeed, steps:138, total reward:12.1000, 1.0032434463500977 sec
Episode 8491, loss:5.2139, succeed, steps:177, total reward:37.4000, 1.2599666118621826 sec
Episode 8492, loss:6.5265, succeed, steps:171, total reward:33.7000, 1.1453649997711182 sec
Episode 8493, loss:-10.3305, fail, steps:202, total reward:12.0000, 1.9533319473266602 sec
Episode 8494, loss:6.6821, succeed, steps:166, total reward:31.0000, 1.4015018939971924 sec
Episode 8495, loss:-1.9204, fail, steps:207, total reward:18.9000, 2.152348041534424 sec
Episode 8496, loss:3.4281, succeed, steps:191, total reward:20.2000, 1.7393543720245361 sec
Episode 8497, loss:1.6985, succeed, steps:157, total reward:21.3000, 1.2625856399536133 sec
Episode 8498, loss:-6.4918, fail, steps:207, total reward:18.9000, 1.9161906242370605 sec
Episode 8499, loss:9.8857, succeed, steps:197, total reward:39.9000, 1.613034725189209 sec
len16 Comparison 9 10 less Comparison 8 9 less Comparison 7 8 more Swap Comparison 6 7 

Episode 8554, loss:-1.0810, succeed, steps:197, total reward:9.1000, 1.8848254680633545 sec
Episode 8555, loss:1.9888, succeed, steps:159, total reward:13.8000, 1.526259183883667 sec
Episode 8556, loss:0.8234, succeed, steps:150, total reward:13.5000, 1.1835579872131348 sec
Episode 8557, loss:4.4253, succeed, steps:206, total reward:37.9000, 1.6821682453155518 sec
Episode 8558, loss:1.5020, succeed, steps:166, total reward:33.3000, 1.2751715183258057 sec
Episode 8559, loss:7.3176, succeed, steps:197, total reward:32.8000, 1.5593159198760986 sec
Episode 8560, loss:-5.6783, fail, steps:208, total reward:19.7000, 1.9107334613800049 sec
Episode 8561, loss:3.7047, succeed, steps:138, total reward:20.9000, 1.1978869438171387 sec
Episode 8562, loss:1.5700, succeed, steps:145, total reward:1.0000, 1.2711420059204102 sec
Episode 8563, loss:1.9026, succeed, steps:175, total reward:9.3000, 1.363569736480713 sec
Episode 8564, loss:2.3183, succeed, steps:179, total reward:25.8000, 1.260783195495605

Episode 8644, loss:0.0298, succeed, steps:207, total reward:24.1000, 1.7659685611724854 sec
Episode 8645, loss:-4.9934, fail, steps:208, total reward:20.8000, 1.7773001194000244 sec
Episode 8646, loss:-11.0467, fail, steps:202, total reward:10.7000, 1.8787012100219727 sec
Episode 8647, loss:2.2246, succeed, steps:154, total reward:2.8000, 1.4821782112121582 sec
Episode 8648, loss:0.8487, succeed, steps:214, total reward:36.7000, 1.976032018661499 sec
Episode 8649, loss:-1.8444, fail, steps:212, total reward:26.5000, 1.8204021453857422 sec
Episode 8650, loss:4.1668, succeed, steps:138, total reward:24.1000, 1.1456124782562256 sec
Episode 8651, loss:-3.3415, fail, steps:208, total reward:20.6000, 2.174677848815918 sec
Episode 8652, loss:4.5290, succeed, steps:160, total reward:29.5000, 1.355048656463623 sec
Episode 8653, loss:5.4877, succeed, steps:146, total reward:25.7000, 1.1068518161773682 sec
Episode 8654, loss:-6.2796, fail, steps:205, total reward:15.0000, 2.0061278343200684 sec
E

Episode 8716, loss:1.7773, succeed, steps:133, total reward:18.2000, 1.1960508823394775 sec
Episode 8717, loss:-5.4628, fail, steps:208, total reward:20.8000, 2.1502459049224854 sec
Episode 8718, loss:-0.4369, succeed, steps:186, total reward:13.7000, 1.7582621574401855 sec
Episode 8719, loss:6.2003, succeed, steps:195, total reward:28.8000, 1.5782389640808105 sec
Episode 8720, loss:1.4993, succeed, steps:180, total reward:22.6000, 1.3529586791992188 sec
Episode 8721, loss:5.2178, succeed, steps:153, total reward:29.0000, 1.1331076622009277 sec
Episode 8722, loss:6.1584, succeed, steps:173, total reward:30.2000, 1.2402451038360596 sec
Episode 8723, loss:3.5597, succeed, steps:139, total reward:11.6000, 1.0138661861419678 sec
Episode 8724, loss:5.5070, succeed, steps:155, total reward:32.0000, 1.0216529369354248 sec
Episode 8725, loss:2.4458, succeed, steps:201, total reward:8.3000, 1.997514009475708 sec
Episode 8726, loss:5.8770, succeed, steps:155, total reward:21.0000, 1.343243122100

Episode 8806, loss:4.6726, succeed, steps:177, total reward:28.3000, 1.4587852954864502 sec
Episode 8807, loss:1.4968, succeed, steps:153, total reward:21.3000, 1.106618881225586 sec
Episode 8808, loss:0.4678, succeed, steps:207, total reward:20.5000, 1.933215856552124 sec
Episode 8809, loss:-0.2675, succeed, steps:190, total reward:16.6000, 1.6869010925292969 sec
Episode 8810, loss:2.0495, succeed, steps:175, total reward:23.7000, 1.389575719833374 sec
Episode 8811, loss:-6.9139, fail, steps:193, total reward:0.6000, 1.9516148567199707 sec
Episode 8812, loss:-3.2567, fail, steps:214, total reward:28.8000, 1.840200662612915 sec
Episode 8813, loss:-4.8284, fail, steps:202, total reward:12.2000, 1.8429503440856934 sec
Episode 8814, loss:4.2742, succeed, steps:169, total reward:21.6000, 1.511207103729248 sec
Episode 8815, loss:2.2064, succeed, steps:181, total reward:18.4000, 1.553403377532959 sec
Episode 8816, loss:-0.3459, succeed, steps:188, total reward:18.6000, 1.4782371520996094 sec

Episode 8896, loss:1.7448, succeed, steps:158, total reward:23.1000, 1.2486844062805176 sec
Episode 8897, loss:3.4319, succeed, steps:113, total reward:17.0000, 0.8116579055786133 sec
Episode 8898, loss:1.2838, succeed, steps:162, total reward:19.2000, 1.1752657890319824 sec
Episode 8899, loss:1.6289, succeed, steps:150, total reward:24.7000, 1.0227315425872803 sec
len16 Comparison 9 10 more Swap Comparison 8 9 less Comparison 7 8 more Swap Comparison 6 7 more Swap Comparison 5 6 more Swap Comparison 4 5 more Swap Comparison 3 4 more Swap Comparison 2 3 more Swap Comparison 1 2 more Swap Comparison 0 1 less Comparison 10 11 less Comparison 11 12 more Swap Comparison 12 13 more Swap Comparison 13 14 equal Comparison 14 15 more Swap Comparison 9 10 less Comparison 8 9 more Swap Comparison 7 8 more Swap Comparison 6 7 more Swap Comparison 5 6 more Swap Comparison 4 5 more Swap Comparison 3 4 more Swap Comparison 2 3 more Swap Comparison 1 2 less Comparison 10 11 more Swap Comparison 11 12

Episode 8959, loss:-6.0705, fail, steps:202, total reward:9.8000, 2.131758689880371 sec
Episode 8960, loss:7.1995, succeed, steps:218, total reward:39.4000, 1.9094970226287842 sec
Episode 8961, loss:-6.1089, fail, steps:196, total reward:0.3000, 1.855802059173584 sec
Episode 8962, loss:8.6224, succeed, steps:186, total reward:28.6000, 1.6219933032989502 sec
Episode 8963, loss:-9.9113, fail, steps:205, total reward:17.8000, 2.0231857299804688 sec
Episode 8964, loss:-4.3161, succeed, steps:198, total reward:9.8000, 1.8390319347381592 sec
Episode 8965, loss:-1.4251, fail, steps:211, total reward:23.1000, 1.9475252628326416 sec
Episode 8966, loss:4.7126, succeed, steps:158, total reward:30.9000, 1.3743135929107666 sec
Episode 8967, loss:4.3630, succeed, steps:162, total reward:10.5000, 1.3662524223327637 sec
Episode 8968, loss:-0.8908, succeed, steps:165, total reward:2.5000, 1.3151094913482666 sec
Episode 8969, loss:8.2207, succeed, steps:138, total reward:25.7000, 0.9688396453857422 sec


Episode 9026, loss:3.2465, succeed, steps:170, total reward:35.0000, 1.137218713760376 sec
Episode 9027, loss:2.4680, succeed, steps:158, total reward:15.2000, 1.125688076019287 sec
Episode 9028, loss:-1.6577, succeed, steps:197, total reward:14.6000, 1.8868005275726318 sec
Episode 9029, loss:5.1049, succeed, steps:207, total reward:24.5000, 1.8624894618988037 sec
Episode 9030, loss:3.5573, succeed, steps:137, total reward:33.5000, 1.125514268875122 sec
Episode 9031, loss:-7.0274, fail, steps:202, total reward:10.2000, 2.1755802631378174 sec
Episode 9032, loss:-0.5352, succeed, steps:137, total reward:5.2000, 1.3687076568603516 sec
Episode 9033, loss:7.0808, succeed, steps:164, total reward:31.6000, 1.2649409770965576 sec
Episode 9034, loss:2.7120, succeed, steps:94, total reward:10.3000, 0.6994566917419434 sec
Episode 9035, loss:-0.1503, succeed, steps:142, total reward:23.6000, 0.9514830112457275 sec
Episode 9036, loss:1.9024, succeed, steps:194, total reward:15.4000, 1.7754218578338

Episode 9116, loss:-7.2881, fail, steps:208, total reward:21.8000, 2.216320514678955 sec
Episode 9117, loss:0.6419, succeed, steps:187, total reward:19.7000, 1.6214299201965332 sec
Episode 9118, loss:1.4412, succeed, steps:187, total reward:35.7000, 1.4625589847564697 sec
Episode 9119, loss:3.0061, succeed, steps:194, total reward:31.8000, 1.3698923587799072 sec
Episode 9120, loss:1.9822, succeed, steps:176, total reward:27.1000, 1.2875003814697266 sec
Episode 9121, loss:2.8704, succeed, steps:186, total reward:12.3000, 1.4807987213134766 sec
Episode 9122, loss:5.9978, succeed, steps:173, total reward:27.9000, 1.2912490367889404 sec
Episode 9123, loss:1.4411, succeed, steps:206, total reward:33.9000, 1.7597665786743164 sec
Episode 9124, loss:-0.0906, succeed, steps:172, total reward:13.3000, 1.5666213035583496 sec
Episode 9125, loss:1.9304, succeed, steps:147, total reward:8.4000, 1.2924716472625732 sec
Episode 9126, loss:4.8239, succeed, steps:170, total reward:20.0000, 1.224336147308

Episode 9200, loss:4.7442, succeed, steps:184, total reward:18.6000, 1.3769598007202148 sec
Episode 9201, loss:4.3996, succeed, steps:191, total reward:28.0000, 1.5723645687103271 sec
Episode 9202, loss:1.8529, succeed, steps:161, total reward:33.1000, 1.1482722759246826 sec
Episode 9203, loss:2.0660, succeed, steps:184, total reward:11.4000, 1.4800312519073486 sec
Episode 9204, loss:-0.5634, succeed, steps:172, total reward:1.0000, 1.4998347759246826 sec
Episode 9205, loss:10.3616, succeed, steps:143, total reward:23.3000, 1.085930347442627 sec
Episode 9206, loss:3.6100, succeed, steps:167, total reward:19.6000, 1.227504014968872 sec
Episode 9207, loss:2.9628, succeed, steps:161, total reward:23.2000, 1.177109718322754 sec
Episode 9208, loss:4.6058, succeed, steps:149, total reward:22.9000, 1.0263581275939941 sec
Episode 9209, loss:-0.9874, fail, steps:206, total reward:18.8000, 1.9768638610839844 sec
Episode 9210, loss:-4.0535, fail, steps:205, total reward:16.3000, 1.825956583023071

Episode 9290, loss:2.6223, succeed, steps:148, total reward:18.8000, 1.8512108325958252 sec
Episode 9291, loss:6.0084, succeed, steps:174, total reward:35.4000, 1.9599127769470215 sec
Episode 9292, loss:5.5709, succeed, steps:152, total reward:20.3000, 1.870227575302124 sec
Episode 9293, loss:1.8953, succeed, steps:112, total reward:16.6000, 1.4348816871643066 sec
Episode 9294, loss:-0.7333, succeed, steps:143, total reward:-2.6000, 1.8307392597198486 sec
Episode 9295, loss:1.6983, succeed, steps:187, total reward:11.1000, 2.628905773162842 sec
Episode 9296, loss:6.9877, succeed, steps:213, total reward:37.7000, 2.7719969749450684 sec
Episode 9297, loss:-0.4287, succeed, steps:150, total reward:-1.3000, 2.186556577682495 sec
Episode 9298, loss:-8.5209, fail, steps:213, total reward:28.4000, 3.277104139328003 sec
Episode 9299, loss:3.3607, succeed, steps:188, total reward:25.1000, 2.5720160007476807 sec
len16 Comparison 9 10 less Comparison 8 9 more Swap Comparison 7 8 more Swap Compari

Episode 9346, loss:6.8603, succeed, steps:136, total reward:23.5000, 1.575422763824463 sec
Episode 9347, loss:3.3874, succeed, steps:197, total reward:7.7000, 2.9596304893493652 sec
Episode 9348, loss:-2.3551, fail, steps:220, total reward:38.0000, 2.9730546474456787 sec
Episode 9349, loss:1.4311, succeed, steps:155, total reward:29.7000, 2.0678811073303223 sec
Episode 9350, loss:4.7974, succeed, steps:187, total reward:24.9000, 2.524303674697876 sec
Episode 9351, loss:-5.9257, fail, steps:200, total reward:8.7000, 2.8855373859405518 sec
Episode 9352, loss:1.3401, succeed, steps:170, total reward:19.1000, 2.453375816345215 sec
Episode 9353, loss:4.7641, succeed, steps:134, total reward:18.8000, 1.8281216621398926 sec
Episode 9354, loss:7.1025, succeed, steps:188, total reward:36.7000, 2.260161876678467 sec
Episode 9355, loss:2.8785, succeed, steps:126, total reward:23.5000, 1.495410442352295 sec
Episode 9356, loss:5.4192, succeed, steps:182, total reward:17.1000, 2.350398302078247 sec


Episode 9407, loss:2.4558, succeed, steps:98, total reward:16.6000, 1.2364838123321533 sec
Episode 9408, loss:1.2103, succeed, steps:157, total reward:23.3000, 1.9146981239318848 sec
Episode 9409, loss:2.9178, succeed, steps:151, total reward:24.5000, 1.789320707321167 sec
Episode 9410, loss:4.4729, succeed, steps:135, total reward:20.8000, 1.6409144401550293 sec
Episode 9411, loss:2.9860, succeed, steps:168, total reward:30.7000, 2.0099987983703613 sec
Episode 9412, loss:6.2989, succeed, steps:175, total reward:26.0000, 2.1716041564941406 sec
Episode 9413, loss:-5.6397, fail, steps:211, total reward:23.6000, 2.8865418434143066 sec
Episode 9414, loss:2.4641, succeed, steps:157, total reward:27.0000, 2.0310559272766113 sec
Episode 9415, loss:2.1894, succeed, steps:159, total reward:23.7000, 2.0572509765625 sec
Episode 9416, loss:4.2115, succeed, steps:163, total reward:33.8000, 1.9067025184631348 sec
Episode 9417, loss:2.4451, succeed, steps:155, total reward:19.3000, 1.9353911876678467

Episode 9497, loss:-0.1165, succeed, steps:148, total reward:5.8000, 2.1729578971862793 sec
Episode 9498, loss:5.9516, succeed, steps:115, total reward:24.2000, 1.4061720371246338 sec
Episode 9499, loss:-1.1023, fail, steps:217, total reward:33.7000, 3.192065477371216 sec
len16 Comparison 9 10 more Swap Comparison 8 9 more Swap Comparison 7 8 more Swap Comparison 6 7 more Swap Comparison 5 6 less Comparison 4 5 more Swap Comparison 3 4 more Swap Comparison 2 3 less Comparison 1 2 more Swap Comparison 0 1 more Swap Comparison 10 11 more Swap Comparison 11 12 more Swap Comparison 2 3 less Comparison 12 13 more Swap Comparison 13 14 more Swap Comparison 14 15 more Swap Comparison 9 10 more Swap Comparison 8 9 more Swap Comparison 7 8 less Comparison 6 7 less Comparison 5 6 more Swap Comparison 4 5 more Swap Comparison 3 4 less Comparison 2 3 less Comparison 1 2 more Swap Comparison 10 11 more Swap Comparison 11 12 more Swap Comparison 12 13 more Swap Comparison 13 14 less Comparison 9 10 

Episode 9561, loss:6.5939, succeed, steps:185, total reward:40.2000, 2.2773067951202393 sec
Episode 9562, loss:0.4988, succeed, steps:186, total reward:29.7000, 2.469280481338501 sec
Episode 9563, loss:1.1956, succeed, steps:148, total reward:21.5000, 1.889909267425537 sec
Episode 9564, loss:0.6967, succeed, steps:140, total reward:14.2000, 1.745884895324707 sec
Episode 9565, loss:1.6260, succeed, steps:206, total reward:30.0000, 2.7903404235839844 sec
Episode 9566, loss:-3.1531, fail, steps:206, total reward:17.6000, 2.8869411945343018 sec
Episode 9567, loss:5.7685, succeed, steps:162, total reward:32.2000, 2.145923614501953 sec
Episode 9568, loss:7.4210, succeed, steps:169, total reward:26.9000, 2.289682388305664 sec
Episode 9569, loss:-10.8668, fail, steps:202, total reward:12.2000, 2.998929500579834 sec
Episode 9570, loss:0.6245, succeed, steps:181, total reward:12.8000, 2.58915638923645 sec
Episode 9571, loss:2.9873, fail, steps:219, total reward:37.0000, 3.0135679244995117 sec
Ep

Episode 9623, loss:3.4575, succeed, steps:154, total reward:36.5000, 1.9876925945281982 sec
Episode 9624, loss:2.3194, succeed, steps:162, total reward:28.3000, 2.1663105487823486 sec
Episode 9625, loss:-2.6554, fail, steps:208, total reward:19.6000, 3.0089635848999023 sec
Episode 9626, loss:4.4820, succeed, steps:176, total reward:9.3000, 2.5113790035247803 sec
Episode 9627, loss:-0.3197, succeed, steps:181, total reward:5.0000, 2.5159595012664795 sec
Episode 9628, loss:-2.7982, succeed, steps:209, total reward:22.5000, 2.8464083671569824 sec
Episode 9629, loss:4.4898, succeed, steps:157, total reward:25.2000, 2.230768918991089 sec
Episode 9630, loss:7.8139, succeed, steps:217, total reward:40.4000, 3.1019771099090576 sec
Episode 9631, loss:3.1539, succeed, steps:160, total reward:30.5000, 2.1033687591552734 sec
Episode 9632, loss:2.6339, succeed, steps:158, total reward:19.7000, 2.0287506580352783 sec
Episode 9633, loss:4.6032, succeed, steps:199, total reward:21.7000, 2.786873340606

Episode 9700, loss:0.4083, succeed, steps:131, total reward:13.0000, 1.6976096630096436 sec
Episode 9701, loss:4.7802, succeed, steps:179, total reward:29.4000, 2.1530401706695557 sec
Episode 9702, loss:2.6514, succeed, steps:114, total reward:16.0000, 1.3896446228027344 sec
Episode 9703, loss:4.0180, succeed, steps:170, total reward:16.2000, 2.145228862762451 sec
Episode 9704, loss:-12.1898, fail, steps:198, total reward:6.9000, 2.8891427516937256 sec
Episode 9705, loss:-2.1993, fail, steps:214, total reward:28.6000, 2.835907220840454 sec
Episode 9706, loss:9.3960, succeed, steps:190, total reward:27.0000, 2.667685031890869 sec
Episode 9707, loss:1.2274, succeed, steps:160, total reward:23.0000, 2.101672887802124 sec
Episode 9708, loss:3.3872, succeed, steps:210, total reward:31.8000, 2.842277765274048 sec
Episode 9709, loss:6.2469, succeed, steps:158, total reward:29.2000, 2.1377577781677246 sec
Episode 9710, loss:7.5251, succeed, steps:143, total reward:30.7000, 1.72660231590271 sec

Episode 9791, loss:6.6891, succeed, steps:156, total reward:35.9000, 1.7996985912322998 sec
Episode 9792, loss:-6.4514, fail, steps:201, total reward:7.9000, 2.9633405208587646 sec
Episode 9793, loss:-5.7377, fail, steps:219, total reward:34.0000, 2.917343854904175 sec
Episode 9794, loss:1.6841, succeed, steps:163, total reward:20.2000, 2.327420949935913 sec
Episode 9795, loss:2.1624, succeed, steps:191, total reward:20.6000, 2.608341693878174 sec
Episode 9796, loss:3.6165, succeed, steps:207, total reward:34.4000, 2.616605043411255 sec
Episode 9797, loss:-2.9099, fail, steps:216, total reward:32.4000, 3.0507595539093018 sec
Episode 9798, loss:-5.0708, fail, steps:202, total reward:11.7000, 2.8893377780914307 sec
Episode 9799, loss:-9.5053, fail, steps:204, total reward:13.7000, 2.9575226306915283 sec
len16 Comparison 9 10 more Swap Comparison 8 9 more Swap Comparison 7 8 more Swap Comparison 6 7 less Comparison 5 6 more Swap Comparison 4 5 less Comparison 3 4 more Swap Comparison 2 3 

Episode 9863, loss:-12.6280, fail, steps:192, total reward:-4.1000, 2.9117534160614014 sec
Episode 9864, loss:3.7300, succeed, steps:159, total reward:16.7000, 2.228925943374634 sec
Episode 9865, loss:7.6089, succeed, steps:206, total reward:31.0000, 2.9598898887634277 sec
Episode 9866, loss:8.6517, succeed, steps:173, total reward:21.5000, 2.464498996734619 sec
Episode 9867, loss:1.7231, fail, steps:213, total reward:26.6000, 3.0730316638946533 sec
Episode 9868, loss:0.0035, succeed, steps:193, total reward:15.8000, 2.6354153156280518 sec
Episode 9869, loss:-0.2777, succeed, steps:130, total reward:9.6000, 1.8159136772155762 sec
Episode 9870, loss:2.0517, succeed, steps:191, total reward:25.5000, 2.383711814880371 sec
Episode 9871, loss:2.9833, succeed, steps:148, total reward:37.0000, 1.8116176128387451 sec
Episode 9872, loss:2.6089, succeed, steps:189, total reward:23.4000, 2.469635248184204 sec
Episode 9873, loss:4.9790, succeed, steps:169, total reward:32.7000, 2.011606216430664 s

Episode 9923, loss:-1.5596, succeed, steps:180, total reward:11.1000, 2.4831337928771973 sec
Episode 9924, loss:1.4639, succeed, steps:127, total reward:10.0000, 1.6989977359771729 sec
Episode 9925, loss:-9.1559, fail, steps:197, total reward:5.9000, 3.056001901626587 sec
Episode 9926, loss:2.0680, succeed, steps:196, total reward:23.3000, 2.7372264862060547 sec
Episode 9927, loss:1.4816, succeed, steps:168, total reward:22.0000, 2.2492411136627197 sec
Episode 9928, loss:4.2923, succeed, steps:172, total reward:5.9000, 2.2970781326293945 sec
Episode 9929, loss:5.6748, succeed, steps:210, total reward:35.4000, 2.786735773086548 sec
Episode 9930, loss:6.2102, succeed, steps:161, total reward:24.0000, 2.2511205673217773 sec
Episode 9931, loss:8.5548, succeed, steps:184, total reward:26.0000, 2.408782720565796 sec
Episode 9932, loss:-5.7194, fail, steps:208, total reward:22.1000, 3.0733511447906494 sec
Episode 9933, loss:-1.5529, fail, steps:202, total reward:13.5000, 2.8588290214538574 se

Episode 10000, loss:-2.3598, succeed, steps:134, total reward:8.7000, 1.6675148010253906 sec
Checkpoint saved at episode 10000 to /home/mcwave/code/autocode/datasets/rl_sort_transformer_easy/list16_transformer4_192_gamma07_step640_v3/ckpt_10000_0.7810_169.08.pth
Learning rate = 0.000016
Episode 10001, loss:5.9176, succeed, steps:176, total reward:22.6000, 2.071591854095459 sec
Episode 10002, loss:6.6795, succeed, steps:191, total reward:25.8000, 2.4786181449890137 sec
Episode 10003, loss:2.9649, succeed, steps:159, total reward:25.6000, 1.951111078262329 sec
Episode 10004, loss:2.8647, succeed, steps:107, total reward:15.5000, 1.2939081192016602 sec
Episode 10005, loss:5.4707, succeed, steps:187, total reward:44.8000, 2.1046946048736572 sec
Episode 10006, loss:5.8218, succeed, steps:194, total reward:31.4000, 2.36258602142334 sec
Episode 10007, loss:5.3133, succeed, steps:166, total reward:36.2000, 1.9388511180877686 sec
Episode 10008, loss:3.0077, succeed, steps:184, total reward:21.4

Episode 10087, loss:-0.9051, succeed, steps:193, total reward:24.7000, 2.499650001525879 sec
Episode 10088, loss:2.6782, succeed, steps:135, total reward:11.0000, 1.695303201675415 sec
Episode 10089, loss:-6.9683, fail, steps:207, total reward:16.2000, 3.003647565841675 sec
Episode 10090, loss:0.1925, fail, steps:216, total reward:32.7000, 2.9716193675994873 sec
Episode 10091, loss:1.5945, succeed, steps:207, total reward:23.6000, 2.8985695838928223 sec
Episode 10092, loss:2.9232, succeed, steps:206, total reward:40.4000, 2.7728962898254395 sec
Episode 10093, loss:1.4917, succeed, steps:152, total reward:31.0000, 1.8260526657104492 sec
Episode 10094, loss:-0.4160, fail, steps:212, total reward:25.9000, 2.9858334064483643 sec
Episode 10095, loss:1.2292, succeed, steps:133, total reward:25.9000, 1.6762306690216064 sec
Episode 10096, loss:3.6783, succeed, steps:171, total reward:16.9000, 2.3999881744384766 sec
Episode 10097, loss:3.1040, succeed, steps:181, total reward:36.2000, 2.1679055

Episode 10154, loss:-1.1460, succeed, steps:215, total reward:42.7000, 2.062361717224121 sec



KeyboardInterrupt

