In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.optim.lr_scheduler as lr_scheduler
import math
import random

NUM_POSITIONS = 16

MIN_LIST_LEN = 2
MAX_LIST_LEN = 2
MAX_STEPS = 10

SUCCESS_REWARD = 0.5
STEP_REWARD = -0.3
COMPARISON_ENTROPY_MULTIPLIER = -0.00
SWAP_REWARD = 1.0
INVALID_ACTION_REWARD = -10.0
LONGTERM_GAMMA = 0.99
SHORTTERM_GAMMA = 0.7

EPS_START = 0.5
EPS_END = 0.05
EPS_DECAY = 1000
LR_SCHEDULER_GAMMA = 0.93
NUM_EPISODES = 100000
EPISODES_SAVE = 1000
OUTPUT_DIR = '/home/mcwave/code/autocode/datasets/rl_sort_transformer_curriculum/list2_transformer4_192_gamma07_step10_v1'

# Define the vocabulary
vocab = {
    'Comparison': 0,
    'Swap': 1,
    'less': 2,
    'equal': 3,
    'more': 4,
    '0': 5,
    '1': 6,
    '2': 7,
    '3': 8,
    '4': 9,
    '5': 10,
    '6': 11,
    '7': 12,
    '8': 13,
    '9': 14,
    '10': 15,
    '11': 16,
    '12': 17,
    '13': 18,
    '14': 19,
    '15': 20,
    'len1': 21,
    'len2': 22,
    'len3': 23,
    'len4': 24,
    'len5': 25,
    'len6': 26,
    'len7': 27,
    'len8': 28,
    'len9': 29,
    'len10': 30,
    'len11': 31,
    'len12': 32,
    'len13': 33,
    'len14': 34,
    'len15': 35,
    'len16': 36,
    'start0': 37,
    'start1': 38,
    'start2': 39,
    'start3': 40,
    'start4': 41,
    'start5': 42,
    'start6': 43,
    'start7': 44,
    'start8': 45,
    'start9': 46,
    'start10': 47,
    'start11': 48,
    'start12': 49,
    'start13': 50,
    'start14': 51,
    'start15': 52,
}
inv_vocab = {v: k for k, v in vocab.items()}

def compute_entropy(N, alpha=1):
    K = 2**N
    values = np.arange(K)
    unnormalized_probs = np.exp(-alpha * values)
    Z = unnormalized_probs.sum()
    probs = unnormalized_probs / Z
    return values, -np.log2(probs)

_, int_entropy = compute_entropy(4)

def get_entropy_of_integer(x):
    x = min(15, abs(x))
    return int_entropy[x]

def compute_min_delta_entropy(comparisons):
    # Initialize the result list to store minDelta values
    min_delta = None

    # Iterate through each pair in the comparisons list
    i = len(comparisons) - 1
    xi, yi = comparisons[i]
    if i == 0:
        # For i = 0, use the first case directly
        min_delta = (xi, min(yi, yi - xi), 0)
    else:
        # For i > 0, compute all possible options and select the minimal one
        options = []

        # Simple Entropy
        simple_entropy = (xi, min(yi, yi - xi), 0)
        options.append(simple_entropy)

        # First Delta Entropy
        xi_prev, yi_prev = comparisons[i - 1]
        first_delta_entropy = (xi - xi_prev, yi - yi_prev, 0)
        options.append(first_delta_entropy)

        # Second Delta Entropy (only valid for i > 1)
        if i > 1:
            xi_prev2, yi_prev2 = comparisons[i - 2]
            second_delta_entropy = (
                (xi - xi_prev) - (xi_prev - xi_prev2),
                (yi - yi_prev) - (yi_prev - yi_prev2),
                0,
            )
            options.append(second_delta_entropy)

        # Arbitrary Position Entropy (only valid for i > 1)
        for j in range(i):
            xj, yj = comparisons[j]
            arbitrary_position_entropy = (
                xi - xj,
                yi - yj,
                min(j, i - j),
            )
            options.append(arbitrary_position_entropy)

        # Find the option with the minimal sum
        min_delta = min(options, key=lambda t: sum([get_entropy_of_integer(x) for x in t]))

    entropy = sum([get_entropy_of_integer(x) for x in min_delta])
    if len(comparisons) == 1:
        return 3 * entropy
    else:
        return entropy

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define the environment
class SortingEnv:
    def __init__(self):
        self.max_steps = MAX_STEPS

    def reset(self):
        self.length = random.randint(MIN_LIST_LEN, MAX_LIST_LEN)
        self.start_pos = random.randint(0, NUM_POSITIONS // self.length - 1) * self.length
        self.list = [-1] * self.start_pos + [random.randint(1, 100) for _ in range(self.length)]
        while self.list == sorted(self.list):
            self.list = [-1] * self.start_pos + [random.randint(1, 100) for _ in range(self.length)]
        self.indices = None
        self.current_step = 0
        self.done = False
        length_token = 'len{}'.format(self.length)
        start_pos_token = 'start{}'.format(self.start_pos)
        return vocab[length_token], vocab[start_pos_token], self.list.copy()
    
    def get_list(self):
        return self.list
    
    def get_length(self):
        return self.length
    
    def get_start_pos(self):
        return self.start_pos

    def step(self, action_tokens):
        action = action_tokens[0]
        reward = -0.01  # default penalty
        response_token = None

        if action == vocab['Comparison']:
            if len(action_tokens) != 3:
                print("Error! Comparison without 2 indices!")
                reward = INVALID_ACTION_REWARD
                self.done = True
                return response_token, reward, self.done, self.list.copy()
            index1 = action_tokens[1] - vocab['0']
            index2 = action_tokens[2] - vocab['0']
            if index1 < self.start_pos or index1 >= self.start_pos + self.length \
                or index2 < self.start_pos or index2 >= self.start_pos + self.length:
                print(f"Error! Comparison with invalid indices {index1} {index2}")
                reward = INVALID_ACTION_REWARD
                self.done = True
                return response_token, reward, self.done, self.list.copy()
            self.indices = (index1, index2)
            if self.list[index1] < self.list[index2]:
                response_token = vocab['less']
                reward = STEP_REWARD
            elif self.list[index1] == self.list[index2]:
                response_token = vocab['equal']
                reward = STEP_REWARD * 2
            else:
                response_token = vocab['more']
                reward = STEP_REWARD
        elif action == vocab['Swap']:
            if self.indices is None:
                reward = INVALID_ACTION_REWARD
                self.done = True
                return response_token, reward, self.done, self.list.copy()
            index1, index2 = self.indices
            prev_list = self.list.copy()
            self.list[index1], self.list[index2] = self.list[index2], self.list[index1]
            if self.list == sorted(self.list):
                reward = SUCCESS_REWARD
                self.done = True
            #elif prev_list[index1] > prev_list[index2] and self.list[index1] <= self.list[index2]:
            #    reward = 0.1
            elif (index1 < index2 and prev_list[index1] > prev_list[index2] and self.list[index1] <= self.list[index2]) or \
                (index1 > index2 and prev_list[index1] < prev_list[index2] and self.list[index1] >= self.list[index2]):
                reward = SWAP_REWARD
            elif (index1 < index2 and prev_list[index1] < prev_list[index2] and self.list[index1] >= self.list[index2]) or \
                (index1 > index2 and prev_list[index1] > prev_list[index2] and self.list[index1] <= self.list[index2]):
                reward = -SWAP_REWARD
            else:
                reward = STEP_REWARD
            self.indices = None
        else:
            reward = INVALID_ACTION_REWARD
            self.done = True

        self.current_step += 1
        if self.current_step >= self.max_steps:
            self.done = True
        return response_token, reward, self.done, self.list.copy()


Using device: cuda


In [2]:
# Positional Encoding for Transformer
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=512):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=0.1)

        pe = torch.zeros(max_len, d_model)  # (max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)  # (max_len, 1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() *
                             (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)  # Even indices
        pe[:, 1::2] = torch.cos(position * div_term)  # Odd indices
        pe = pe.unsqueeze(1)  # (max_len, 1, d_model)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)

# Transformer Model
class TransformerModel(nn.Module):
    def __init__(self, vocab_size, d_model=192, nhead=8, num_layers=4):
        super(TransformerModel, self).__init__()
        self.model_type = 'Transformer'
        self.d_model = d_model

        self.embedding = nn.Embedding(vocab_size, d_model)
        self.pos_encoder = PositionalEncoding(d_model)
        encoder_layers = nn.TransformerEncoderLayer(d_model, nhead)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        self.decoder = nn.Linear(d_model, vocab_size)

        self.init_weights()

    def init_weights(self):
        initrange = 0.1
        nn.init.uniform_(self.embedding.weight, -initrange, initrange)
        nn.init.zeros_(self.decoder.bias)
        nn.init.uniform_(self.decoder.weight, -initrange, initrange)

    def forward(self, src):
        src = self.embedding(src) * math.sqrt(self.d_model)
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src)
        output = self.decoder(output)
        return output

def decode(input_tokens, inv_vocab):
    return ' '.join([inv_vocab[x] for x in input_tokens])


def save_checkpoint(model, optimizer, episode, folder, filename):
    """
    Save the model and optimizer state to the designated filepath.

    Args:
        model (nn.Module): The model to save.
        optimizer (torch.optim.Optimizer): The optimizer whose state to save.
        episode (int): The current episode number.
        filepath (str): The path where to save the checkpoint.
    """
    filepath = os.path.join(folder, filename)
    # Ensure the directory exists
    os.makedirs(os.path.dirname(filepath), exist_ok=True)
    # Save the checkpoint
    torch.save({
        'episode': episode,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
    }, filepath)
    print(f"Checkpoint saved at episode {episode} to {filepath}")

def load_checkpoint(filepath, model, optimizer):
    """
    Load the model and optimizer state from the designated filepath.

    Args:
        filepath (str): The path from where to load the checkpoint.
        model (nn.Module): The model into which to load the state_dict.
        optimizer (torch.optim.Optimizer): The optimizer into which to load the state.

    Returns:
        int: The episode number to resume from.
    """
    if os.path.isfile(filepath):
        checkpoint = torch.load(filepath, map_location=device)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        episode = checkpoint['episode']
        print(f"Checkpoint loaded from {filepath}, resuming from episode {episode}")
        return episode
    else:
        print(f"No checkpoint found at {filepath}, starting from scratch.")
        return 0

In [3]:
def compute_bellman_returns(raw_rewards, gamma):
    bellman_returns = []
    R = 0
    for r in raw_rewards[::-1]:
        R = r + gamma * R
        bellman_returns.insert(0, R)
    return bellman_returns

# Training Loop
def train(verbose=False):
    # Removed torch.autograd.set_detect_anomaly(True)
    vocab_size = len(vocab)
    model = TransformerModel(vocab_size).to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-4)  # Reduced learning rate
    scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=LR_SCHEDULER_GAMMA)
    
    # Optionally, load a checkpoint
    #checkpoint_path = os.path.join(OUTPUT_DIR, "ckpt_99000_0.9940_86.73.pth")
    #load_checkpoint(checkpoint_path, model, optimizer)

    episode_cnt = 0
    total_reward = 0.0
    num_successes = 0
    total_steps = 0
    
    for episode in range(NUM_EPISODES):
        t1 = time.time()
        model.train()  # Set model to training mode
        env = SortingEnv()
        initial_token_id, start_pos, current_list = env.reset()
        input_tokens = [initial_token_id, start_pos]
        log_probs = []
        rewards = []
        comparisons = []
        
        state = 'expect_action'
        done = False
        success = False

        while not done and len(input_tokens) < env.max_steps:
            if verbose:
                print(decode(input_tokens, inv_vocab))
                print(env.get_list())
                #print(comparisons)
            # Prepare input tensor
            input_seq = torch.tensor(input_tokens, dtype=torch.long, device=device).unsqueeze(1)  # (seq_len, batch_size)
            # Get model output
            with torch.autocast(device_type=device.type, dtype=torch.bfloat16):
                output = model(input_seq)  # (seq_len, batch_size, vocab_size)
                # Get logits for the last token
                logits = output[-1, 0, :]  # (vocab_size)

                # Check for NaNs in logits
                if torch.isnan(logits).any():
                    print(f"Episode {episode}, NaNs in logits before masking.")
                    break

                # Get valid tokens based on state
                def get_valid_tokens(state):
                    action_tokens = [vocab['Comparison'], vocab['Swap']]
                    index_tokens = [vocab[str(i)] for i in range(env.get_start_pos(), env.get_start_pos() + env.get_length())]
                    if state == 'expect_action':
                        return action_tokens
                    elif state == 'expect_index1':
                        return index_tokens[:-1]
                    elif state == 'expect_index2':
                        return [x for x in index_tokens if x > input_tokens[-1]]
                    else:
                        # Handle unexpected states by defaulting to expect_action
                        return action_tokens

                valid_token_ids = get_valid_tokens(state)

                # Ensure valid_token_ids are within the vocab range
                if any(idx >= vocab_size or idx < 0 for idx in valid_token_ids):
                    print(f"Episode {episode}, invalid indices in valid_token_ids: {valid_token_ids}")
                    break

                # Mask invalid tokens
                mask_value = -1e9  # Use a large negative value instead of -inf
                mask = torch.full_like(logits, mask_value).to(device)
                mask[valid_token_ids] = 0
                masked_logits = logits + mask

                # Sample action. Have some chance to randomly pick a valid action.
                eps_threshold = EPS_END + (EPS_START - EPS_END) * np.exp(-1.0 * episode / EPS_DECAY)
                if random.random() < eps_threshold:
                    masked_logits = masked_logits / 4

                # Check for NaNs in masked_logits
                if torch.isnan(masked_logits).any():
                    print(f"Episode {episode}, NaNs in masked_logits after masking.")
                    break

                # Compute probabilities
                probs = F.softmax(masked_logits, dim=0)

                # Check for NaNs in probs
                if torch.isnan(probs).any():
                    print(f"Episode {episode}, NaNs in probs after softmax.")
                    break

                try:
                    m = torch.distributions.Categorical(probs)
                    action_token = m.sample()
                    log_prob = m.log_prob(action_token)
                except ValueError as e:
                    print(f"Episode {episode}, error in sampling action: {e}")
                    break

            log_probs.append(log_prob)
            input_tokens.append(action_token.item())

            action = action_token.item()
            reward = 0.0
            if state == 'expect_action':
                if action == vocab['Comparison']:
                    state = 'expect_index1'
                elif action == vocab['Swap']:
                    if env.indices is None:
                        reward = INVALID_ACTION_REWARD
                        rewards.append(reward)
                        done = True
                        continue
                    action_tokens = [vocab['Swap']]
                    response_token, reward, done, current_list = env.step(action_tokens)
                    if done and reward == SUCCESS_REWARD:
                        success = True
                        if episode % 100 == 0:
                            print(decode(input_tokens, inv_vocab))
                    if verbose:
                        print("Reward:", reward)
                    state = 'expect_action'
                else:
                    reward = INVALID_ACTION_REWARD
                    done = True
            elif state == 'expect_index1':
                index1_token = action_token
                state = 'expect_index2'
            elif state == 'expect_index2':
                index2_token = action_token
                action_tokens = [vocab['Comparison'], index1_token.item(), index2_token.item()]
                comparisons.append((int(inv_vocab[index1_token.item()]), int(inv_vocab[index2_token.item()])))
                response_token, reward, done, current_list = env.step(action_tokens)
                if done and reward == SUCCESS_REWARD:
                    success = True
                    if episode % 100 == 0:
                        print(1, decode(input_tokens, inv_vocab))
                else:
                    pass
                    #reward += COMPARISON_ENTROPY_MULTIPLIER * compute_min_delta_entropy(comparisons)
                if verbose:
                    print("Reward:", reward)
                if response_token is not None:
                    input_tokens.append(response_token)
                state = 'expect_action'
            else:
                reward = INVALID_ACTION_REWARD
                done = True

            rewards.append(reward)
        #
        success_rewards = [0.0] * len(rewards)
        if success: 
            num_successes += 1
            success_rewards[-1] = SUCCESS_REWARD

        # Save checkpoint
        if episode > 0 and episode % EPISODES_SAVE == 0:
            avg_reward = total_reward / episode_cnt
            success_rate = num_successes / episode_cnt
            avg_steps = total_steps / episode_cnt
            episode_cnt = 0
            total_reward = 0.0
            num_successes = 0
            total_steps = 0
            save_checkpoint(model, optimizer, episode, OUTPUT_DIR, f"ckpt_{episode}_{success_rate:.4f}_{avg_steps:.2f}.pth")
            #
            # Reduce the lr
            scheduler.step()
            # Optionally, log the learning rate
            current_lr = scheduler.get_last_lr()[0]
            print(f"Learning rate = {current_lr:.6f}")
        #
        assert len(log_probs) == len(rewards), "log_probs and returns have different sizes!"

        if len(log_probs) == 0:
            continue  # Skip if no actions were taken

        # Compute returns and loss within autocast
        with torch.autocast(device_type=device.type, dtype=torch.bfloat16):
            # Compute returns
            returns1 = compute_bellman_returns(rewards, SHORTTERM_GAMMA)
            returns2 = compute_bellman_returns(success_rewards, LONGTERM_GAMMA)
            returns = torch.tensor(np.array(returns1) + np.array(returns2)).to(device)

            # Check for NaNs in returns
            if torch.isnan(returns).any():
                print(f"Episode {episode}, NaNs in returns.")
                continue

            # Compute loss
            loss = 0
            for log_prob, R in zip(log_probs, returns):
                loss -= log_prob * R

            # Check for NaNs in loss
            if torch.isnan(loss):
                print(f"Episode {episode}, NaN in loss.")
                continue

        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping
        optimizer.step()

        episode_cnt += 1
        total_reward += sum(rewards)
        total_steps += len(rewards)
        t2 = time.time()
        if episode % 1 == 0:
            print(f"Episode {episode}, loss:{loss.item():.4f}, {'succeed' if success else 'fail'}, steps:{len(rewards)}, total reward:{sum(rewards):.4f}, {t2-t1} sec")

if __name__ == "__main__":
    train(verbose=False)




Episode 0, loss:-3.6604, fail, steps:1, total reward:-10.0000, 0.33774447441101074 sec
Episode 1, loss:1.4148, succeed, steps:4, total reward:0.2000, 0.021348953247070312 sec
Episode 2, loss:1.1502, succeed, steps:4, total reward:0.2000, 0.018983840942382812 sec
Episode 3, loss:-0.1574, fail, steps:6, total reward:-0.6000, 0.027554035186767578 sec
Episode 4, loss:0.6391, succeed, steps:4, total reward:0.2000, 0.018658161163330078 sec
Episode 5, loss:0.6378, succeed, steps:4, total reward:0.2000, 0.018898725509643555 sec
Episode 6, loss:0.1922, succeed, steps:4, total reward:0.2000, 0.01862788200378418 sec
Episode 7, loss:0.2870, succeed, steps:4, total reward:0.2000, 0.018747806549072266 sec
Episode 8, loss:-10.7224, fail, steps:1, total reward:-10.0000, 0.005882978439331055 sec
Episode 9, loss:-0.3220, fail, steps:6, total reward:-0.6000, 0.02727484703063965 sec
Episode 10, loss:0.4920, succeed, steps:4, total reward:0.2000, 0.018557071685791016 sec
Episode 11, loss:0.2093, succeed, s

len2 start2 Comparison 2 3 more Swap
Episode 100, loss:0.0116, succeed, steps:4, total reward:0.2000, 0.018924236297607422 sec
Episode 101, loss:0.0227, succeed, steps:4, total reward:0.2000, 0.018703937530517578 sec
Episode 102, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018369197845458984 sec
Episode 103, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018482685089111328 sec
Episode 104, loss:0.0212, succeed, steps:4, total reward:0.2000, 0.018285274505615234 sec
Episode 105, loss:0.0355, succeed, steps:4, total reward:0.2000, 0.01868271827697754 sec
Episode 106, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01868581771850586 sec
Episode 107, loss:0.0113, succeed, steps:4, total reward:0.2000, 0.018530845642089844 sec
Episode 108, loss:0.0225, succeed, steps:4, total reward:0.2000, 0.0185244083404541 sec
Episode 109, loss:0.0120, succeed, steps:4, total reward:0.2000, 0.018523693084716797 sec
Episode 110, loss:0.0220, succeed, steps:4, total reward:0.2000, 0.

Episode 201, loss:0.0240, succeed, steps:4, total reward:0.2000, 0.01916337013244629 sec
Episode 202, loss:-40.0281, fail, steps:1, total reward:-10.0000, 0.006045341491699219 sec
Episode 203, loss:0.0086, succeed, steps:4, total reward:0.2000, 0.018459796905517578 sec
Episode 204, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018680810928344727 sec
Episode 205, loss:0.0152, succeed, steps:4, total reward:0.2000, 0.018390655517578125 sec
Episode 206, loss:0.0151, succeed, steps:4, total reward:0.2000, 0.018282651901245117 sec
Episode 207, loss:0.0228, succeed, steps:4, total reward:0.2000, 0.018230676651000977 sec
Episode 208, loss:0.0084, succeed, steps:4, total reward:0.2000, 0.018895864486694336 sec
Episode 209, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018573999404907227 sec
Episode 210, loss:0.0086, succeed, steps:4, total reward:0.2000, 0.018479585647583008 sec
Episode 211, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018880605697631836 sec
Episode 21

Episode 296, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0470578670501709 sec
Episode 297, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.04114842414855957 sec
Episode 298, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.039948463439941406 sec
Episode 299, loss:0.0118, succeed, steps:4, total reward:0.2000, 0.03486800193786621 sec
len2 start10 Comparison 10 11 more Swap
Episode 300, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.03475213050842285 sec
Episode 301, loss:0.0064, succeed, steps:4, total reward:0.2000, 0.032655954360961914 sec
Episode 302, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01562666893005371 sec
Episode 303, loss:0.0178, succeed, steps:4, total reward:0.2000, 0.01661849021911621 sec
Episode 304, loss:0.0063, succeed, steps:4, total reward:0.2000, 0.017453432083129883 sec
Episode 305, loss:0.0123, succeed, steps:4, total reward:0.2000, 0.01781177520751953 sec
Episode 306, loss:0.0119, succeed, steps:4, total reward:0.2000, 0.0

Episode 394, loss:0.0150, succeed, steps:4, total reward:0.2000, 0.018514394760131836 sec
Episode 395, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01883411407470703 sec
Episode 396, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018527507781982422 sec
Episode 397, loss:0.0098, succeed, steps:4, total reward:0.2000, 0.018441200256347656 sec
Episode 398, loss:0.0053, succeed, steps:4, total reward:0.2000, 0.018475770950317383 sec
Episode 399, loss:0.0054, succeed, steps:4, total reward:0.2000, 0.018511295318603516 sec
len2 start2 Comparison 2 3 more Swap
Episode 400, loss:0.0052, succeed, steps:4, total reward:0.2000, 0.018561124801635742 sec
Episode 401, loss:0.0095, succeed, steps:4, total reward:0.2000, 0.019017696380615234 sec
Episode 402, loss:0.0051, succeed, steps:4, total reward:0.2000, 0.018546104431152344 sec
Episode 403, loss:-45.7288, fail, steps:1, total reward:-10.0000, 0.005708932876586914 sec
Episode 404, loss:0.0095, succeed, steps:4, total reward:0.2000

Episode 490, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018269062042236328 sec
Episode 491, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018364429473876953 sec
Episode 492, loss:0.0043, succeed, steps:4, total reward:0.2000, 0.01828145980834961 sec
Episode 493, loss:0.0126, succeed, steps:4, total reward:0.2000, 0.018152236938476562 sec
Episode 494, loss:0.0046, succeed, steps:4, total reward:0.2000, 0.018301963806152344 sec
Episode 495, loss:0.0044, succeed, steps:4, total reward:0.2000, 0.018213987350463867 sec
Episode 496, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018230438232421875 sec
Episode 497, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018465280532836914 sec
Episode 498, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018275022506713867 sec
Episode 499, loss:0.0044, succeed, steps:4, total reward:0.2000, 0.01837778091430664 sec
len2 start10 Comparison 10 11 more Swap
Episode 500, loss:0.0000, succeed, steps:4, total reward:0.200

Episode 583, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01923346519470215 sec
Episode 584, loss:0.0041, succeed, steps:4, total reward:0.2000, 0.019159555435180664 sec
Episode 585, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018520116806030273 sec
Episode 586, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019635915756225586 sec
Episode 587, loss:0.0074, succeed, steps:4, total reward:0.2000, 0.019168853759765625 sec
Episode 588, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019179582595825195 sec
Episode 589, loss:0.0071, succeed, steps:4, total reward:0.2000, 0.018735647201538086 sec
Episode 590, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019448041915893555 sec
Episode 591, loss:0.0070, succeed, steps:4, total reward:0.2000, 0.019048213958740234 sec
Episode 592, loss:0.0073, succeed, steps:4, total reward:0.2000, 0.01964712142944336 sec
Episode 593, loss:0.0038, succeed, steps:4, total reward:0.2000, 0.019237518310546875 sec
Episode 594,

Episode 680, loss:0.0035, succeed, steps:4, total reward:0.2000, 0.019057273864746094 sec
Episode 681, loss:0.0035, succeed, steps:4, total reward:0.2000, 0.012757062911987305 sec
Episode 682, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011243820190429688 sec
Episode 683, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011178731918334961 sec
Episode 684, loss:0.0036, succeed, steps:4, total reward:0.2000, 0.01150369644165039 sec
Episode 685, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011303424835205078 sec
Episode 686, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.012883901596069336 sec
Episode 687, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011232614517211914 sec
Episode 688, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.05328226089477539 sec
Episode 689, loss:0.0034, succeed, steps:4, total reward:0.2000, 0.053057193756103516 sec
Episode 690, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.03508472442626953 sec
Episode 691, 

Episode 781, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018697023391723633 sec
Episode 782, loss:0.0055, succeed, steps:4, total reward:0.2000, 0.018440961837768555 sec
Episode 783, loss:0.0029, succeed, steps:4, total reward:0.2000, 0.01827716827392578 sec
Episode 784, loss:0.0051, succeed, steps:4, total reward:0.2000, 0.018245220184326172 sec
Episode 785, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018488645553588867 sec
Episode 786, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018519163131713867 sec
Episode 787, loss:0.0032, succeed, steps:4, total reward:0.2000, 0.018816709518432617 sec
Episode 788, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018509387969970703 sec
Episode 789, loss:0.0029, succeed, steps:4, total reward:0.2000, 0.018233537673950195 sec
Episode 790, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01852726936340332 sec
Episode 791, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018701553344726562 sec
Episode 792,

Episode 877, loss:0.0049, succeed, steps:4, total reward:0.2000, 0.0181577205657959 sec
Episode 878, loss:0.0028, succeed, steps:4, total reward:0.2000, 0.018225908279418945 sec
Episode 879, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01814579963684082 sec
Episode 880, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018236637115478516 sec
Episode 881, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018808603286743164 sec
Episode 882, loss:0.0048, succeed, steps:4, total reward:0.2000, 0.01845860481262207 sec
Episode 883, loss:0.0048, succeed, steps:4, total reward:0.2000, 0.018611431121826172 sec
Episode 884, loss:0.0048, succeed, steps:4, total reward:0.2000, 0.0185549259185791 sec
Episode 885, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01848626136779785 sec
Episode 886, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018235445022583008 sec
Episode 887, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01834559440612793 sec
Episode 888, loss:

Episode 979, loss:0.0046, succeed, steps:4, total reward:0.2000, 0.019665956497192383 sec
Episode 980, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01862955093383789 sec
Episode 981, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0196683406829834 sec
Episode 982, loss:0.0045, succeed, steps:4, total reward:0.2000, 0.01893019676208496 sec
Episode 983, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018927812576293945 sec
Episode 984, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019160985946655273 sec
Episode 985, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018840551376342773 sec
Episode 986, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019658565521240234 sec
Episode 987, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019459009170532227 sec
Episode 988, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019051313400268555 sec
Episode 989, loss:0.0071, succeed, steps:4, total reward:0.2000, 0.018655776977539062 sec
Episode 990, l

Episode 1069, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019217967987060547 sec
Episode 1070, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018845319747924805 sec
Episode 1071, loss:0.0041, succeed, steps:4, total reward:0.2000, 0.019466638565063477 sec
Episode 1072, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01944422721862793 sec
Episode 1073, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01911187171936035 sec
Episode 1074, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019003629684448242 sec
Episode 1075, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019505739212036133 sec
Episode 1076, loss:0.0027, succeed, steps:4, total reward:0.2000, 0.01957106590270996 sec
Episode 1077, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019556045532226562 sec
Episode 1078, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01897120475769043 sec
Episode 1079, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01987314224243164 sec
Epis

Episode 1170, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019730091094970703 sec
Episode 1171, loss:0.0022, succeed, steps:4, total reward:0.2000, 0.019193172454833984 sec
Episode 1172, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019037246704101562 sec
Episode 1173, loss:0.0039, succeed, steps:4, total reward:0.2000, 0.019817352294921875 sec
Episode 1174, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018760204315185547 sec
Episode 1175, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01924753189086914 sec
Episode 1176, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0189056396484375 sec
Episode 1177, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018565893173217773 sec
Episode 1178, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018946409225463867 sec
Episode 1179, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018914461135864258 sec
Episode 1180, loss:0.0022, succeed, steps:4, total reward:0.2000, 0.01868295669555664 sec
Epi

Episode 1268, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019419193267822266 sec
Episode 1269, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018692731857299805 sec
Episode 1270, loss:0.0035, succeed, steps:4, total reward:0.2000, 0.01951289176940918 sec
Episode 1271, loss:0.0054, succeed, steps:4, total reward:0.2000, 0.018917083740234375 sec
Episode 1272, loss:0.0035, succeed, steps:4, total reward:0.2000, 0.018348217010498047 sec
Episode 1273, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019745826721191406 sec
Episode 1274, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01935553550720215 sec
Episode 1275, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018933773040771484 sec
Episode 1276, loss:0.0019, succeed, steps:4, total reward:0.2000, 0.018964767456054688 sec
Episode 1277, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011294364929199219 sec
Episode 1278, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01152181625366211 sec
Ep

Episode 1364, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.05370640754699707 sec
Episode 1365, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.06018853187561035 sec
Episode 1366, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.05355119705200195 sec
Episode 1367, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.04606294631958008 sec
Episode 1368, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.04090738296508789 sec
Episode 1369, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.03319501876831055 sec
Episode 1370, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.03545498847961426 sec
Episode 1371, loss:0.0032, succeed, steps:4, total reward:0.2000, 0.03382372856140137 sec
Episode 1372, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.030350923538208008 sec
Episode 1373, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.03198361396789551 sec
Episode 1374, loss:0.0052, succeed, steps:4, total reward:0.2000, 0.027945995330810547 sec
Episode 

Episode 1463, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01906561851501465 sec
Episode 1464, loss:0.0031, succeed, steps:4, total reward:0.2000, 0.018762588500976562 sec
Episode 1465, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018649578094482422 sec
Episode 1466, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01879715919494629 sec
Episode 1467, loss:0.0018, succeed, steps:4, total reward:0.2000, 0.01839423179626465 sec
Episode 1468, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01920604705810547 sec
Episode 1469, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018674135208129883 sec
Episode 1470, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019586801528930664 sec
Episode 1471, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018713951110839844 sec
Episode 1472, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01955866813659668 sec
Episode 1473, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019123077392578125 sec
Epis

Episode 1553, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01911783218383789 sec
Episode 1554, loss:0.0048, succeed, steps:4, total reward:0.2000, 0.019212722778320312 sec
Episode 1555, loss:0.0018, succeed, steps:4, total reward:0.2000, 0.02008533477783203 sec
Episode 1556, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019313812255859375 sec
Episode 1557, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01922607421875 sec
Episode 1558, loss:0.0030, succeed, steps:4, total reward:0.2000, 0.0195314884185791 sec
Episode 1559, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01951456069946289 sec
Episode 1560, loss:0.0030, succeed, steps:4, total reward:0.2000, 0.018857240676879883 sec
Episode 1561, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018918752670288086 sec
Episode 1562, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018985748291015625 sec
Episode 1563, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01950979232788086 sec
Episode 1

Episode 1649, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019710063934326172 sec
Episode 1650, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01980423927307129 sec
Episode 1651, loss:0.0016, succeed, steps:4, total reward:0.2000, 0.01931476593017578 sec
Episode 1652, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019185781478881836 sec
Episode 1653, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01906418800354004 sec
Episode 1654, loss:0.0017, succeed, steps:4, total reward:0.2000, 0.018987655639648438 sec
Episode 1655, loss:0.0030, succeed, steps:4, total reward:0.2000, 0.018891334533691406 sec
Episode 1656, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019578933715820312 sec
Episode 1657, loss:0.0029, succeed, steps:4, total reward:0.2000, 0.019522428512573242 sec
Episode 1658, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019118547439575195 sec
Episode 1659, loss:0.0017, succeed, steps:4, total reward:0.2000, 0.018698692321777344 sec
Ep

Episode 1746, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.042615652084350586 sec
Episode 1747, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.04401993751525879 sec
Episode 1748, loss:0.0028, succeed, steps:4, total reward:0.2000, 0.036406755447387695 sec
Episode 1749, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.03759574890136719 sec
Episode 1750, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.03695869445800781 sec
Episode 1751, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.03000020980834961 sec
Episode 1752, loss:0.0015, succeed, steps:4, total reward:0.2000, 0.03058600425720215 sec
Episode 1753, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.030040979385375977 sec
Episode 1754, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.020962953567504883 sec
Episode 1755, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.016977310180664062 sec
Episode 1756, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01813340187072754 sec
Episo

Episode 1842, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018715620040893555 sec
Episode 1843, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018676042556762695 sec
Episode 1844, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01828908920288086 sec
Episode 1845, loss:0.0027, succeed, steps:4, total reward:0.2000, 0.0185697078704834 sec
Episode 1846, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01842522621154785 sec
Episode 1847, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018506765365600586 sec
Episode 1848, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01850295066833496 sec
Episode 1849, loss:0.0015, succeed, steps:4, total reward:0.2000, 0.01846623420715332 sec
Episode 1850, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018810272216796875 sec
Episode 1851, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01846933364868164 sec
Episode 1852, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01850271224975586 sec
Episode

Episode 1939, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018078327178955078 sec
Episode 1940, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018396854400634766 sec
Episode 1941, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018114805221557617 sec
Episode 1942, loss:0.0015, succeed, steps:4, total reward:0.2000, 0.018248319625854492 sec
Episode 1943, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018411874771118164 sec
Episode 1944, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01864004135131836 sec
Episode 1945, loss:0.0026, succeed, steps:4, total reward:0.2000, 0.018399477005004883 sec
Episode 1946, loss:0.0026, succeed, steps:4, total reward:0.2000, 0.0184023380279541 sec
Episode 1947, loss:0.0014, succeed, steps:4, total reward:0.2000, 0.01844644546508789 sec
Episode 1948, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0186154842376709 sec
Episode 1949, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01842045783996582 sec
Episod

Episode 2027, loss:0.0024, succeed, steps:4, total reward:0.2000, 0.018616199493408203 sec
Episode 2028, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01860809326171875 sec
Episode 2029, loss:0.0014, succeed, steps:4, total reward:0.2000, 0.01846146583557129 sec
Episode 2030, loss:0.0015, succeed, steps:4, total reward:0.2000, 0.018309354782104492 sec
Episode 2031, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01846933364868164 sec
Episode 2032, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018427133560180664 sec
Episode 2033, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018492937088012695 sec
Episode 2034, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018833637237548828 sec
Episode 2035, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01833796501159668 sec
Episode 2036, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018214702606201172 sec
Episode 2037, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018251895904541016 sec
Epi

Episode 2119, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018893957138061523 sec
Episode 2120, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018593311309814453 sec
Episode 2121, loss:0.0024, succeed, steps:4, total reward:0.2000, 0.01947808265686035 sec
Episode 2122, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018584251403808594 sec
Episode 2123, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01974320411682129 sec
Episode 2124, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018604040145874023 sec
Episode 2125, loss:0.0024, succeed, steps:4, total reward:0.2000, 0.01980447769165039 sec
Episode 2126, loss:0.0014, succeed, steps:4, total reward:0.2000, 0.0194857120513916 sec
Episode 2127, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01908588409423828 sec
Episode 2128, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019382953643798828 sec
Episode 2129, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019219636917114258 sec
Episo

Episode 2215, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01881575584411621 sec
Episode 2216, loss:0.0014, succeed, steps:4, total reward:0.2000, 0.019838333129882812 sec
Episode 2217, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019465923309326172 sec
Episode 2218, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019666671752929688 sec
Episode 2219, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018723249435424805 sec
Episode 2220, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01965641975402832 sec
Episode 2221, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01876378059387207 sec
Episode 2222, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019518613815307617 sec
Episode 2223, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019162893295288086 sec
Episode 2224, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01905965805053711 sec
Episode 2225, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019108057022094727 sec
Epi

Episode 2308, loss:0.0024, succeed, steps:4, total reward:0.2000, 0.019480466842651367 sec
Episode 2309, loss:0.0024, succeed, steps:4, total reward:0.2000, 0.017682552337646484 sec
Episode 2310, loss:0.0023, succeed, steps:4, total reward:0.2000, 0.019562959671020508 sec
Episode 2311, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018640518188476562 sec
Episode 2312, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018932104110717773 sec
Episode 2313, loss:0.0023, succeed, steps:4, total reward:0.2000, 0.019089698791503906 sec
Episode 2314, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.016888856887817383 sec
Episode 2315, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011438608169555664 sec
Episode 2316, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011445283889770508 sec
Episode 2317, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01138162612915039 sec
Episode 2318, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.013018608093261719 sec


Episode 2409, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.012108087539672852 sec
Episode 2410, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011950492858886719 sec
Episode 2411, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011490106582641602 sec
Episode 2412, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011174440383911133 sec
Episode 2413, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011361122131347656 sec
Episode 2414, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011365413665771484 sec
Episode 2415, loss:0.0022, succeed, steps:4, total reward:0.2000, 0.011507511138916016 sec
Episode 2416, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.06274080276489258 sec
Episode 2417, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.04715299606323242 sec
Episode 2418, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.09261322021484375 sec
Episode 2419, loss:0.0022, succeed, steps:4, total reward:0.2000, 0.06301712989807129 sec
Epi

Episode 2510, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.012128829956054688 sec
Episode 2511, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011823415756225586 sec
Episode 2512, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011392354965209961 sec
Episode 2513, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011333703994750977 sec
Episode 2514, loss:0.0013, succeed, steps:4, total reward:0.2000, 0.011245489120483398 sec
Episode 2515, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01118612289428711 sec
Episode 2516, loss:0.0021, succeed, steps:4, total reward:0.2000, 0.011398553848266602 sec
Episode 2517, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011417150497436523 sec
Episode 2518, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0638284683227539 sec
Episode 2519, loss:0.0012, succeed, steps:4, total reward:0.2000, 0.10162162780761719 sec
Episode 2520, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.06610608100891113 sec
Epis

Episode 2604, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.04250741004943848 sec
Episode 2605, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.03129220008850098 sec
Episode 2606, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.03845071792602539 sec
Episode 2607, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.034483909606933594 sec
Episode 2608, loss:0.0021, succeed, steps:4, total reward:0.2000, 0.032410383224487305 sec
Episode 2609, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.020851612091064453 sec
Episode 2610, loss:0.0021, succeed, steps:4, total reward:0.2000, 0.031508684158325195 sec
Episode 2611, loss:0.0021, succeed, steps:4, total reward:0.2000, 0.021311044692993164 sec
Episode 2612, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.012780427932739258 sec
Episode 2613, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.017879247665405273 sec
Episode 2614, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0180666446685791 sec
Epis

Episode 2696, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.10932445526123047 sec
Episode 2697, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.06731605529785156 sec
Episode 2698, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.049141645431518555 sec
Episode 2699, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.05395174026489258 sec
len2 start10 Comparison 10 11 more Swap
Episode 2700, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.04350471496582031 sec
Episode 2701, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.02515244483947754 sec
Episode 2702, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.03426504135131836 sec
Episode 2703, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.03438878059387207 sec
Episode 2704, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.03306174278259277 sec
Episode 2705, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.032181739807128906 sec
Episode 2706, loss:0.0000, succeed, steps:4, total reward:

Episode 2795, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01901412010192871 sec
Episode 2796, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018253087997436523 sec
Episode 2797, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018347501754760742 sec
Episode 2798, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018346548080444336 sec
Episode 2799, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01875448226928711 sec
len2 start2 Comparison 2 3 more Swap
Episode 2800, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018405437469482422 sec
Episode 2801, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01829814910888672 sec
Episode 2802, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018187284469604492 sec
Episode 2803, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018219709396362305 sec
Episode 2804, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01820969581604004 sec
Episode 2805, loss:0.0000, succeed, steps:4, total reward

Episode 2892, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018558740615844727 sec
Episode 2893, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018627166748046875 sec
Episode 2894, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018407583236694336 sec
Episode 2895, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0184023380279541 sec
Episode 2896, loss:0.0031, succeed, steps:4, total reward:0.2000, 0.018334150314331055 sec
Episode 2897, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018360376358032227 sec
Episode 2898, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018360376358032227 sec
Episode 2899, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01856398582458496 sec
len2 start6 Comparison 6 7 more Swap
Episode 2900, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018457889556884766 sec
Episode 2901, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01860809326171875 sec
Episode 2902, loss:0.0000, succeed, steps:4, total reward

Episode 2983, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018989086151123047 sec
Episode 2984, loss:0.0011, succeed, steps:4, total reward:0.2000, 0.018535375595092773 sec
Episode 2985, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01839590072631836 sec
Episode 2986, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018249034881591797 sec
Episode 2987, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018267154693603516 sec
Episode 2988, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018199682235717773 sec
Episode 2989, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018433094024658203 sec
Episode 2990, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01831197738647461 sec
Episode 2991, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01821279525756836 sec
Episode 2992, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01849508285522461 sec
Episode 2993, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018659114837646484 sec
Epi

Episode 3079, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018682241439819336 sec
Episode 3080, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018578290939331055 sec
Episode 3081, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018442153930664062 sec
Episode 3082, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018476486206054688 sec
Episode 3083, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01820683479309082 sec
Episode 3084, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0182650089263916 sec
Episode 3085, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018218517303466797 sec
Episode 3086, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018254756927490234 sec
Episode 3087, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018476486206054688 sec
Episode 3088, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018481016159057617 sec
Episode 3089, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018719911575317383 sec
Ep

Episode 3172, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019827842712402344 sec
Episode 3173, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018703937530517578 sec
Episode 3174, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019762754440307617 sec
Episode 3175, loss:0.0010, succeed, steps:4, total reward:0.2000, 0.018918275833129883 sec
Episode 3176, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019908905029296875 sec
Episode 3177, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01936626434326172 sec
Episode 3178, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01948261260986328 sec
Episode 3179, loss:0.0017, succeed, steps:4, total reward:0.2000, 0.019389867782592773 sec
Episode 3180, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019541263580322266 sec
Episode 3181, loss:0.0010, succeed, steps:4, total reward:0.2000, 0.019091129302978516 sec
Episode 3182, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019601106643676758 sec
E

Episode 3270, loss:0.0015, succeed, steps:4, total reward:0.2000, 0.019527912139892578 sec
Episode 3271, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019530057907104492 sec
Episode 3272, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0187227725982666 sec
Episode 3273, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019872665405273438 sec
Episode 3274, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018727779388427734 sec
Episode 3275, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019608259201049805 sec
Episode 3276, loss:0.0019, succeed, steps:4, total reward:0.2000, 0.019297122955322266 sec
Episode 3277, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019212007522583008 sec
Episode 3278, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018723726272583008 sec
Episode 3279, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011435747146606445 sec
Episode 3280, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011352777481079102 sec
E

Episode 3367, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0643007755279541 sec
Episode 3368, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.049925804138183594 sec
Episode 3369, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.03760671615600586 sec
Episode 3370, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0419003963470459 sec
Episode 3371, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.037126779556274414 sec
Episode 3372, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0343012809753418 sec
Episode 3373, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.033020973205566406 sec
Episode 3374, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.031068086624145508 sec
Episode 3375, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.03000664710998535 sec
Episode 3376, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.016243457794189453 sec
Episode 3377, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.012304306030273438 sec
Episode

Episode 3460, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018938779830932617 sec
Episode 3461, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018590927124023438 sec
Episode 3462, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01827859878540039 sec
Episode 3463, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018392562866210938 sec
Episode 3464, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018153905868530273 sec
Episode 3465, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018183231353759766 sec
Episode 3466, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018428564071655273 sec
Episode 3467, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018772363662719727 sec
Episode 3468, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018309831619262695 sec
Episode 3469, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018396615982055664 sec
Episode 3470, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018181800842285156 sec


Episode 3555, loss:0.0010, succeed, steps:4, total reward:0.2000, 0.019115686416625977 sec
Episode 3556, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018340587615966797 sec
Episode 3557, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018126249313354492 sec
Episode 3558, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0184171199798584 sec
Episode 3559, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018517732620239258 sec
Episode 3560, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01837921142578125 sec
Episode 3561, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0183870792388916 sec
Episode 3562, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01860189437866211 sec
Episode 3563, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018375635147094727 sec
Episode 3564, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018259286880493164 sec
Episode 3565, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018451213836669922 sec
Episo

Episode 3655, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019432544708251953 sec
Episode 3656, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01851344108581543 sec
Episode 3657, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018941402435302734 sec
Episode 3658, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018795251846313477 sec
Episode 3659, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01840829849243164 sec
Episode 3660, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01847052574157715 sec
Episode 3661, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018388032913208008 sec
Episode 3662, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018826007843017578 sec
Episode 3663, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019381999969482422 sec
Episode 3664, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019186019897460938 sec
Episode 3665, loss:0.0023, succeed, steps:4, total reward:0.2000, 0.018552303314208984 sec
Ep

Episode 3752, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018671274185180664 sec
Episode 3753, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0189516544342041 sec
Episode 3754, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0186460018157959 sec
Episode 3755, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01874995231628418 sec
Episode 3756, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018306732177734375 sec
Episode 3757, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01927781105041504 sec
Episode 3758, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019004106521606445 sec
Episode 3759, loss:0.0014, succeed, steps:4, total reward:0.2000, 0.018845796585083008 sec
Episode 3760, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011486530303955078 sec
Episode 3761, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011474847793579102 sec
Episode 3762, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011639833450317383 sec
Episo

Episode 3847, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019068002700805664 sec
Episode 3848, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019971370697021484 sec
Episode 3849, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011383056640625 sec
Episode 3850, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011427164077758789 sec
Episode 3851, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011250972747802734 sec
Episode 3852, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011349201202392578 sec
Episode 3853, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011367559432983398 sec
Episode 3854, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011354684829711914 sec
Episode 3855, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.08832836151123047 sec
Episode 3856, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.08676791191101074 sec
Episode 3857, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0670936107635498 sec
Episod

Episode 3938, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018841266632080078 sec
Episode 3939, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01812434196472168 sec
Episode 3940, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018137454986572266 sec
Episode 3941, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018085241317749023 sec
Episode 3942, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.017922639846801758 sec
Episode 3943, loss:0.0009, succeed, steps:4, total reward:0.2000, 0.018829822540283203 sec
Episode 3944, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018431901931762695 sec
Episode 3945, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018162250518798828 sec
Episode 3946, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.017945289611816406 sec
Episode 3947, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018302202224731445 sec
Episode 3948, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01878499984741211 sec
E

Episode 4036, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01886153221130371 sec
Episode 4037, loss:0.0013, succeed, steps:4, total reward:0.2000, 0.01861095428466797 sec
Episode 4038, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018484830856323242 sec
Episode 4039, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018580198287963867 sec
Episode 4040, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018421173095703125 sec
Episode 4041, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01831650733947754 sec
Episode 4042, loss:0.0008, succeed, steps:4, total reward:0.2000, 0.01850438117980957 sec
Episode 4043, loss:0.0008, succeed, steps:4, total reward:0.2000, 0.018872737884521484 sec
Episode 4044, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018540143966674805 sec
Episode 4045, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01847052574157715 sec
Episode 4046, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018493175506591797 sec
Epis

Episode 4136, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018685102462768555 sec
Episode 4137, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018870115280151367 sec
Episode 4138, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018203258514404297 sec
Episode 4139, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0182034969329834 sec
Episode 4140, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.012493610382080078 sec
Episode 4141, loss:0.0013, succeed, steps:4, total reward:0.2000, 0.011624336242675781 sec
Episode 4142, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011736392974853516 sec
Episode 4143, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011584281921386719 sec
Episode 4144, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011392831802368164 sec
Episode 4145, loss:0.0013, succeed, steps:4, total reward:0.2000, 0.01841425895690918 sec
Episode 4146, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018454790115356445 sec
Ep

Episode 4226, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018515348434448242 sec
Episode 4227, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01867055892944336 sec
Episode 4228, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018246889114379883 sec
Episode 4229, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018381357192993164 sec
Episode 4230, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018200159072875977 sec
Episode 4231, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018228530883789062 sec
Episode 4232, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018208026885986328 sec
Episode 4233, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018401622772216797 sec
Episode 4234, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018239498138427734 sec
Episode 4235, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01823902130126953 sec
Episode 4236, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018286466598510742 sec
E

Episode 4323, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01961064338684082 sec
Episode 4324, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01881718635559082 sec
Episode 4325, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019512653350830078 sec
Episode 4326, loss:0.0008, succeed, steps:4, total reward:0.2000, 0.018769502639770508 sec
Episode 4327, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01962447166442871 sec
Episode 4328, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019275426864624023 sec
Episode 4329, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01925516128540039 sec
Episode 4330, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019379854202270508 sec
Episode 4331, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01893901824951172 sec
Episode 4332, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01932382583618164 sec
Episode 4333, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01915717124938965 sec
Episod

Episode 4416, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.03741002082824707 sec
Episode 4417, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.034787654876708984 sec
Episode 4418, loss:0.0007, succeed, steps:4, total reward:0.2000, 0.034261465072631836 sec
Episode 4419, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0341951847076416 sec
Episode 4420, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.030859947204589844 sec
Episode 4421, loss:0.0012, succeed, steps:4, total reward:0.2000, 0.011428356170654297 sec
Episode 4422, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.017075300216674805 sec
Episode 4423, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01681041717529297 sec
Episode 4424, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01866769790649414 sec
Episode 4425, loss:0.0007, succeed, steps:4, total reward:0.2000, 0.018149614334106445 sec
Episode 4426, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01798558235168457 sec
Episo

Episode 4512, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01849198341369629 sec
Episode 4513, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018092632293701172 sec
Episode 4514, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01847386360168457 sec
Episode 4515, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01809239387512207 sec
Episode 4516, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01824641227722168 sec
Episode 4517, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01801133155822754 sec
Episode 4518, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0180208683013916 sec
Episode 4519, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018079757690429688 sec
Episode 4520, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.017840862274169922 sec
Episode 4521, loss:0.0012, succeed, steps:4, total reward:0.2000, 0.01830744743347168 sec
Episode 4522, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018152475357055664 sec
Episode

Episode 4609, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.016102075576782227 sec
Episode 4610, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.017856597900390625 sec
Episode 4611, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.016872644424438477 sec
Episode 4612, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.017641305923461914 sec
Episode 4613, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01810002326965332 sec
Episode 4614, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01809525489807129 sec
Episode 4615, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0182492733001709 sec
Episode 4616, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018293142318725586 sec
Episode 4617, loss:0.0008, succeed, steps:4, total reward:0.2000, 0.01848912239074707 sec
Episode 4618, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018674612045288086 sec
Episode 4619, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018527507781982422 sec
Epis

Episode 4702, loss:0.0011, succeed, steps:4, total reward:0.2000, 0.019126415252685547 sec
Episode 4703, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01880645751953125 sec
Episode 4704, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01879572868347168 sec
Episode 4705, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01953864097595215 sec
Episode 4706, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019263029098510742 sec
Episode 4707, loss:0.0007, succeed, steps:4, total reward:0.2000, 0.0191650390625 sec
Episode 4708, loss:0.0007, succeed, steps:4, total reward:0.2000, 0.018685102462768555 sec
Episode 4709, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019469261169433594 sec
Episode 4710, loss:0.0019, succeed, steps:4, total reward:0.2000, 0.019419431686401367 sec
Episode 4711, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018951416015625 sec
Episode 4712, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018970489501953125 sec
Episode 47

Episode 4793, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0452423095703125 sec
Episode 4794, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.039786577224731445 sec
Episode 4795, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.029248952865600586 sec
Episode 4796, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.03384232521057129 sec
Episode 4797, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.03297829627990723 sec
Episode 4798, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.031995534896850586 sec
Episode 4799, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019222497940063477 sec
len2 start4 Comparison 4 5 more Swap
Episode 4800, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011808633804321289 sec
Episode 4801, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01601266860961914 sec
Episode 4802, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.017066001892089844 sec
Episode 4803, loss:0.0000, succeed, steps:4, total reward:

Episode 4883, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.08029365539550781 sec
Episode 4884, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.07383251190185547 sec
Episode 4885, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.05714917182922363 sec
Episode 4886, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.046869754791259766 sec
Episode 4887, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.04281282424926758 sec
Episode 4888, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.04027104377746582 sec
Episode 4889, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.034935951232910156 sec
Episode 4890, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0328066349029541 sec
Episode 4891, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.031919240951538086 sec
Episode 4892, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.012043952941894531 sec
Episode 4893, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01652836799621582 sec
Episode

Episode 4978, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018627405166625977 sec
Episode 4979, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018442392349243164 sec
Episode 4980, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018474340438842773 sec
Episode 4981, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018819093704223633 sec
Episode 4982, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018622875213623047 sec
Episode 4983, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01830601692199707 sec
Episode 4984, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018425464630126953 sec
Episode 4985, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01819920539855957 sec
Episode 4986, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01822638511657715 sec
Episode 4987, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018132686614990234 sec
Episode 4988, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018229961395263672 sec
Ep

Episode 5070, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019251585006713867 sec
Episode 5071, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018925189971923828 sec
Episode 5072, loss:0.0007, succeed, steps:4, total reward:0.2000, 0.019133329391479492 sec
Episode 5073, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019194841384887695 sec
Episode 5074, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018609285354614258 sec
Episode 5075, loss:0.0011, succeed, steps:4, total reward:0.2000, 0.019191741943359375 sec
Episode 5076, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01911139488220215 sec
Episode 5077, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01921367645263672 sec
Episode 5078, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0185854434967041 sec
Episode 5079, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01939225196838379 sec
Episode 5080, loss:0.0011, succeed, steps:4, total reward:0.2000, 0.01933455467224121 sec
Episo

Episode 5165, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019144773483276367 sec
Episode 5166, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019568920135498047 sec
Episode 5167, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01973438262939453 sec
Episode 5168, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.019217252731323242 sec
Episode 5169, loss:0.0006, succeed, steps:4, total reward:0.2000, 0.019133567810058594 sec
Episode 5170, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01919841766357422 sec
Episode 5171, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018909215927124023 sec
Episode 5172, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01645803451538086 sec
Episode 5173, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011656761169433594 sec
Episode 5174, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011631965637207031 sec
Episode 5175, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011497735977172852 sec
Ep

Episode 5259, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018448829650878906 sec
Episode 5260, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.014808177947998047 sec
Episode 5261, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018171310424804688 sec
Episode 5262, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01799631118774414 sec
Episode 5263, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018274307250976562 sec
Episode 5264, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.017812728881835938 sec
Episode 5265, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01833057403564453 sec
Episode 5266, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018033981323242188 sec
Episode 5267, loss:0.0006, succeed, steps:4, total reward:0.2000, 0.01811671257019043 sec
Episode 5268, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018147706985473633 sec
Episode 5269, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018284320831298828 sec
Ep

Episode 5351, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018709182739257812 sec
Episode 5352, loss:0.0010, succeed, steps:4, total reward:0.2000, 0.018681764602661133 sec
Episode 5353, loss:0.0006, succeed, steps:4, total reward:0.2000, 0.018673181533813477 sec
Episode 5354, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01823258399963379 sec
Episode 5355, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018290996551513672 sec
Episode 5356, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018331289291381836 sec
Episode 5357, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018289566040039062 sec
Episode 5358, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018476247787475586 sec
Episode 5359, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018596887588500977 sec
Episode 5360, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.018418312072753906 sec
Episode 5361, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01827836036682129 sec
E

Episode 5443, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011633634567260742 sec
Episode 5444, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011910676956176758 sec
Episode 5445, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0120391845703125 sec
Episode 5446, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011631250381469727 sec
Episode 5447, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01139974594116211 sec
Episode 5448, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011364936828613281 sec
Episode 5449, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011157035827636719 sec
Episode 5450, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011020421981811523 sec
Episode 5451, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010983943939208984 sec
Episode 5452, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011001110076904297 sec
Episode 5453, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010968446731567383 sec
Ep

Episode 5536, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011026144027709961 sec
Episode 5537, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01073908805847168 sec
Episode 5538, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010602474212646484 sec
Episode 5539, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010609865188598633 sec
Episode 5540, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010562419891357422 sec
Episode 5541, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01059865951538086 sec
Episode 5542, loss:0.0006, succeed, steps:4, total reward:0.2000, 0.010619401931762695 sec
Episode 5543, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010599136352539062 sec
Episode 5544, loss:0.0006, succeed, steps:4, total reward:0.2000, 0.010586261749267578 sec
Episode 5545, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01058340072631836 sec
Episode 5546, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010622739791870117 sec
Ep

Episode 5631, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011034727096557617 sec
Episode 5632, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010645389556884766 sec
Episode 5633, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010600090026855469 sec
Episode 5634, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010579347610473633 sec
Episode 5635, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010610580444335938 sec
Episode 5636, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010663747787475586 sec
Episode 5637, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010614633560180664 sec
Episode 5638, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010647773742675781 sec
Episode 5639, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010602474212646484 sec
Episode 5640, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010646820068359375 sec
Episode 5641, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010590553283691406 sec

Episode 5726, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010755300521850586 sec
Episode 5727, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01070713996887207 sec
Episode 5728, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010530948638916016 sec
Episode 5729, loss:0.0005, succeed, steps:4, total reward:0.2000, 0.010530710220336914 sec
Episode 5730, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010491609573364258 sec
Episode 5731, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010541200637817383 sec
Episode 5732, loss:0.0010, succeed, steps:4, total reward:0.2000, 0.010580062866210938 sec
Episode 5733, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010520219802856445 sec
Episode 5734, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01055002212524414 sec
Episode 5735, loss:0.0006, succeed, steps:4, total reward:0.2000, 0.01055002212524414 sec
Episode 5736, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010590076446533203 sec
Ep

Episode 5821, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01087641716003418 sec
Episode 5822, loss:0.0009, succeed, steps:4, total reward:0.2000, 0.010650634765625 sec
Episode 5823, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010521650314331055 sec
Episode 5824, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010606527328491211 sec
Episode 5825, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01052236557006836 sec
Episode 5826, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010528802871704102 sec
Episode 5827, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010525703430175781 sec
Episode 5828, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010545969009399414 sec
Episode 5829, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010607242584228516 sec
Episode 5830, loss:0.0009, succeed, steps:4, total reward:0.2000, 0.010747432708740234 sec
Episode 5831, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010558605194091797 sec
Epis

Episode 5916, loss:0.0009, succeed, steps:4, total reward:0.2000, 0.010909318923950195 sec
Episode 5917, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010588884353637695 sec
Episode 5918, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01049351692199707 sec
Episode 5919, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010584831237792969 sec
Episode 5920, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010580778121948242 sec
Episode 5921, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010545969009399414 sec
Episode 5922, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010509252548217773 sec
Episode 5923, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010550737380981445 sec
Episode 5924, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01056671142578125 sec
Episode 5925, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01056814193725586 sec
Episode 5926, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010545969009399414 sec
Ep

Episode 6008, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010996580123901367 sec
Episode 6009, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010799407958984375 sec
Episode 6010, loss:0.0005, succeed, steps:4, total reward:0.2000, 0.010818958282470703 sec
Episode 6011, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010617494583129883 sec
Episode 6012, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010636568069458008 sec
Episode 6013, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010631084442138672 sec
Episode 6014, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01076364517211914 sec
Episode 6015, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010679960250854492 sec
Episode 6016, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010588884353637695 sec
Episode 6017, loss:0.0014, succeed, steps:4, total reward:0.2000, 0.010605573654174805 sec
Episode 6018, loss:0.0009, succeed, steps:4, total reward:0.2000, 0.010586738586425781 sec


Episode 6103, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01085972785949707 sec
Episode 6104, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010643959045410156 sec
Episode 6105, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010568618774414062 sec
Episode 6106, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010612010955810547 sec
Episode 6107, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010564088821411133 sec
Episode 6108, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010575294494628906 sec
Episode 6109, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010558843612670898 sec
Episode 6110, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010525226593017578 sec
Episode 6111, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010575532913208008 sec
Episode 6112, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010511398315429688 sec
Episode 6113, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010538339614868164 sec


Episode 6198, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010892868041992188 sec
Episode 6199, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010614633560180664 sec
len2 start6 Comparison 6 7 more Swap
Episode 6200, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01053309440612793 sec
Episode 6201, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010521411895751953 sec
Episode 6202, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010484933853149414 sec
Episode 6203, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010567665100097656 sec
Episode 6204, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01056981086730957 sec
Episode 6205, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010512828826904297 sec
Episode 6206, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01055002212524414 sec
Episode 6207, loss:0.0009, succeed, steps:4, total reward:0.2000, 0.010530233383178711 sec
Episode 6208, loss:0.0000, succeed, steps:4, total rewar

Episode 6293, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010796308517456055 sec
Episode 6294, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01066732406616211 sec
Episode 6295, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010657548904418945 sec
Episode 6296, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010588645935058594 sec
Episode 6297, loss:0.0005, succeed, steps:4, total reward:0.2000, 0.010554313659667969 sec
Episode 6298, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01051783561706543 sec
Episode 6299, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010536670684814453 sec
len2 start10 Comparison 10 11 more Swap
Episode 6300, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010671615600585938 sec
Episode 6301, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010534286499023438 sec
Episode 6302, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010573863983154297 sec
Episode 6303, loss:0.0000, succeed, steps:4, total r

Episode 6388, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010850667953491211 sec
Episode 6389, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010640859603881836 sec
Episode 6390, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010562419891357422 sec
Episode 6391, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010540962219238281 sec
Episode 6392, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010540962219238281 sec
Episode 6393, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010558843612670898 sec
Episode 6394, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010520219802856445 sec
Episode 6395, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010526180267333984 sec
Episode 6396, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010559558868408203 sec
Episode 6397, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010538339614868164 sec
Episode 6398, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010843992233276367 sec

Episode 6483, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010707616806030273 sec
Episode 6484, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010685920715332031 sec
Episode 6485, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010570049285888672 sec
Episode 6486, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010559797286987305 sec
Episode 6487, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010559558868408203 sec
Episode 6488, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010579109191894531 sec
Episode 6489, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01054835319519043 sec
Episode 6490, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010555744171142578 sec
Episode 6491, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010622262954711914 sec
Episode 6492, loss:0.0008, succeed, steps:4, total reward:0.2000, 0.010718107223510742 sec
Episode 6493, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010566949844360352 sec


Episode 6578, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010746479034423828 sec
Episode 6579, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010631799697875977 sec
Episode 6580, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010555505752563477 sec
Episode 6581, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010586738586425781 sec
Episode 6582, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010532140731811523 sec
Episode 6583, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010521650314331055 sec
Episode 6584, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010535001754760742 sec
Episode 6585, loss:0.0005, succeed, steps:4, total reward:0.2000, 0.010549545288085938 sec
Episode 6586, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01055145263671875 sec
Episode 6587, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010561943054199219 sec
Episode 6588, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010555505752563477 sec


Episode 6673, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010738134384155273 sec
Episode 6674, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01066136360168457 sec
Episode 6675, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010590791702270508 sec
Episode 6676, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010551214218139648 sec
Episode 6677, loss:0.0007, succeed, steps:4, total reward:0.2000, 0.010547876358032227 sec
Episode 6678, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01057744026184082 sec
Episode 6679, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01055288314819336 sec
Episode 6680, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010544061660766602 sec
Episode 6681, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010518789291381836 sec
Episode 6682, loss:0.0004, succeed, steps:4, total reward:0.2000, 0.010574102401733398 sec
Episode 6683, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010564088821411133 sec
Ep

Episode 6768, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010815143585205078 sec
Episode 6769, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010686874389648438 sec
Episode 6770, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010530233383178711 sec
Episode 6771, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0105743408203125 sec
Episode 6772, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010527849197387695 sec
Episode 6773, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010528326034545898 sec
Episode 6774, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01051640510559082 sec
Episode 6775, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010524272918701172 sec
Episode 6776, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01055288314819336 sec
Episode 6777, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010515451431274414 sec
Episode 6778, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010553836822509766 sec
Epi

Episode 6863, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010757684707641602 sec
Episode 6864, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01059722900390625 sec
Episode 6865, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010554790496826172 sec
Episode 6866, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010758399963378906 sec
Episode 6867, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010552167892456055 sec
Episode 6868, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010577917098999023 sec
Episode 6869, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011049747467041016 sec
Episode 6870, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011033296585083008 sec
Episode 6871, loss:0.0004, succeed, steps:4, total reward:0.2000, 0.011020183563232422 sec
Episode 6872, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010953426361083984 sec
Episode 6873, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010509967803955078 sec


Episode 6958, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010810136795043945 sec
Episode 6959, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010703086853027344 sec
Episode 6960, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010498285293579102 sec
Episode 6961, loss:0.0007, succeed, steps:4, total reward:0.2000, 0.010567665100097656 sec
Episode 6962, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010572671890258789 sec
Episode 6963, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010546207427978516 sec
Episode 6964, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010551214218139648 sec
Episode 6965, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010540485382080078 sec
Episode 6966, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010528087615966797 sec
Episode 6967, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010593891143798828 sec
Episode 6968, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010605096817016602 sec

Episode 7051, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010817766189575195 sec
Episode 7052, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010715723037719727 sec
Episode 7053, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010525941848754883 sec
Episode 7054, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010552167892456055 sec
Episode 7055, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010648012161254883 sec
Episode 7056, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010565519332885742 sec
Episode 7057, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010534048080444336 sec
Episode 7058, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010476112365722656 sec
Episode 7059, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010518074035644531 sec
Episode 7060, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010736942291259766 sec
Episode 7061, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010538339614868164 sec

Episode 7146, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010909080505371094 sec
Episode 7147, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01061391830444336 sec
Episode 7148, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010573148727416992 sec
Episode 7149, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01057887077331543 sec
Episode 7150, loss:0.0010, succeed, steps:4, total reward:0.2000, 0.010539531707763672 sec
Episode 7151, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010528564453125 sec
Episode 7152, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01049947738647461 sec
Episode 7153, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010565996170043945 sec
Episode 7154, loss:0.0004, succeed, steps:4, total reward:0.2000, 0.010629892349243164 sec
Episode 7155, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010550975799560547 sec
Episode 7156, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010502338409423828 sec
Episo

Episode 7241, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010828018188476562 sec
Episode 7242, loss:0.0004, succeed, steps:4, total reward:0.2000, 0.01061105728149414 sec
Episode 7243, loss:0.0006, succeed, steps:4, total reward:0.2000, 0.010584831237792969 sec
Episode 7244, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010552167892456055 sec
Episode 7245, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01055908203125 sec
Episode 7246, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010483741760253906 sec
Episode 7247, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01056981086730957 sec
Episode 7248, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01051950454711914 sec
Episode 7249, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01048421859741211 sec
Episode 7250, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010630130767822266 sec
Episode 7251, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010533332824707031 sec
Episode

Episode 7336, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010691404342651367 sec
Episode 7337, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010636091232299805 sec
Episode 7338, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010471582412719727 sec
Episode 7339, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010577678680419922 sec
Episode 7340, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01052403450012207 sec
Episode 7341, loss:0.0006, succeed, steps:4, total reward:0.2000, 0.010601282119750977 sec
Episode 7342, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010579824447631836 sec
Episode 7343, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010522603988647461 sec
Episode 7344, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010554790496826172 sec
Episode 7345, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010505914688110352 sec
Episode 7346, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01056528091430664 sec
E

Episode 7431, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010772466659545898 sec
Episode 7432, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010572433471679688 sec
Episode 7433, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010550737380981445 sec
Episode 7434, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01054692268371582 sec
Episode 7435, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010533571243286133 sec
Episode 7436, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010644912719726562 sec
Episode 7437, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010479927062988281 sec
Episode 7438, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010596275329589844 sec
Episode 7439, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010536909103393555 sec
Episode 7440, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010530948638916016 sec
Episode 7441, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010532855987548828 sec


Episode 7526, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010711669921875 sec
Episode 7527, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01068425178527832 sec
Episode 7528, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010508298873901367 sec
Episode 7529, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010539770126342773 sec
Episode 7530, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010518312454223633 sec
Episode 7531, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010560274124145508 sec
Episode 7532, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010542631149291992 sec
Episode 7533, loss:0.0004, succeed, steps:4, total reward:0.2000, 0.010581254959106445 sec
Episode 7534, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010528087615966797 sec
Episode 7535, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01059103012084961 sec
Episode 7536, loss:0.0009, succeed, steps:4, total reward:0.2000, 0.010598897933959961 sec
Epis

Episode 7621, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01072549819946289 sec
Episode 7622, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01063680648803711 sec
Episode 7623, loss:0.0003, succeed, steps:4, total reward:0.2000, 0.010513544082641602 sec
Episode 7624, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010562658309936523 sec
Episode 7625, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010634183883666992 sec
Episode 7626, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010586023330688477 sec
Episode 7627, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010563850402832031 sec
Episode 7628, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010586261749267578 sec
Episode 7629, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010528802871704102 sec
Episode 7630, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01055288314819336 sec
Episode 7631, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010527610778808594 sec
Ep

Episode 7716, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010753393173217773 sec
Episode 7717, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010689020156860352 sec
Episode 7718, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010571718215942383 sec
Episode 7719, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010670900344848633 sec
Episode 7720, loss:0.0006, succeed, steps:4, total reward:0.2000, 0.010636568069458008 sec
Episode 7721, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010498046875 sec
Episode 7722, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010573863983154297 sec
Episode 7723, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010524749755859375 sec
Episode 7724, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010506868362426758 sec
Episode 7725, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010549068450927734 sec
Episode 7726, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010522127151489258 sec
Episo

Episode 7811, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010746479034423828 sec
Episode 7812, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010724306106567383 sec
Episode 7813, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01057577133178711 sec
Episode 7814, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010572671890258789 sec
Episode 7815, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010536670684814453 sec
Episode 7816, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010559320449829102 sec
Episode 7817, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010531425476074219 sec
Episode 7818, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010542869567871094 sec
Episode 7819, loss:0.0003, succeed, steps:4, total reward:0.2000, 0.010539531707763672 sec
Episode 7820, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010534286499023438 sec
Episode 7821, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010651111602783203 sec


Episode 7906, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010737895965576172 sec
Episode 7907, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010687828063964844 sec
Episode 7908, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01054525375366211 sec
Episode 7909, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010523796081542969 sec
Episode 7910, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01056361198425293 sec
Episode 7911, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010498285293579102 sec
Episode 7912, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010525703430175781 sec
Episode 7913, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010558605194091797 sec
Episode 7914, loss:0.0003, succeed, steps:4, total reward:0.2000, 0.010476827621459961 sec
Episode 7915, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010544776916503906 sec
Episode 7916, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010508060455322266 sec
E

Checkpoint saved at episode 8000 to /home/mcwave/code/autocode/datasets/rl_sort_transformer_curriculum/list2_transformer4_192_gamma07_step10_v1/ckpt_8000_1.0000_4.00.pth
Learning rate = 0.000056
Episode 8000, loss:0.0003, succeed, steps:4, total reward:0.2000, 0.03741145133972168 sec
Episode 8001, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010777711868286133 sec
Episode 8002, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010599374771118164 sec
Episode 8003, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010587930679321289 sec
Episode 8004, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010517597198486328 sec
Episode 8005, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010596513748168945 sec
Episode 8006, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010571002960205078 sec
Episode 8007, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010706663131713867 sec
Episode 8008, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0105259418

Episode 8094, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01076960563659668 sec
Episode 8095, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010725736618041992 sec
Episode 8096, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010557889938354492 sec
Episode 8097, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010548591613769531 sec
Episode 8098, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010544538497924805 sec
Episode 8099, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010566234588623047 sec
len2 start12 Comparison 12 13 more Swap
Episode 8100, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01053619384765625 sec
Episode 8101, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010553598403930664 sec
Episode 8102, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010549545288085938 sec
Episode 8103, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010555267333984375 sec
Episode 8104, loss:0.0000, succeed, steps:4, total r

Episode 8189, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010770320892333984 sec
Episode 8190, loss:0.0005, succeed, steps:4, total reward:0.2000, 0.010668754577636719 sec
Episode 8191, loss:0.0005, succeed, steps:4, total reward:0.2000, 0.01060175895690918 sec
Episode 8192, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010493993759155273 sec
Episode 8193, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010585308074951172 sec
Episode 8194, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010562896728515625 sec
Episode 8195, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010565042495727539 sec
Episode 8196, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01051640510559082 sec
Episode 8197, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010511636734008789 sec
Episode 8198, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010561227798461914 sec
Episode 8199, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010547637939453125 sec
l

Episode 8284, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010770559310913086 sec
Episode 8285, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010721206665039062 sec
Episode 8286, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010559797286987305 sec
Episode 8287, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010514497756958008 sec
Episode 8288, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01053166389465332 sec
Episode 8289, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010498762130737305 sec
Episode 8290, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01050710678100586 sec
Episode 8291, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01053309440612793 sec
Episode 8292, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010521888732910156 sec
Episode 8293, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010502099990844727 sec
Episode 8294, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01050114631652832 sec
Epi

Episode 8378, loss:0.0003, succeed, steps:4, total reward:0.2000, 0.010809183120727539 sec
Episode 8379, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011010169982910156 sec
Episode 8380, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010803699493408203 sec
Episode 8381, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010822534561157227 sec
Episode 8382, loss:0.0004, succeed, steps:4, total reward:0.2000, 0.010833263397216797 sec
Episode 8383, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010861396789550781 sec
Episode 8384, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010597944259643555 sec
Episode 8385, loss:0.0003, succeed, steps:4, total reward:0.2000, 0.010587692260742188 sec
Episode 8386, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010581493377685547 sec
Episode 8387, loss:0.0004, succeed, steps:4, total reward:0.2000, 0.010601043701171875 sec
Episode 8388, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01065969467163086 sec


Episode 8473, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010953187942504883 sec
Episode 8474, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010657548904418945 sec
Episode 8475, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010630369186401367 sec
Episode 8476, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01062917709350586 sec
Episode 8477, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010604381561279297 sec
Episode 8478, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010657072067260742 sec
Episode 8479, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010590314865112305 sec
Episode 8480, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010642766952514648 sec
Episode 8481, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01059412956237793 sec
Episode 8482, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010596752166748047 sec
Episode 8483, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010584592819213867 sec
E

Episode 8568, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010991334915161133 sec
Episode 8569, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0106658935546875 sec
Episode 8570, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010578632354736328 sec
Episode 8571, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010596752166748047 sec
Episode 8572, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010570287704467773 sec
Episode 8573, loss:0.0004, succeed, steps:4, total reward:0.2000, 0.010695457458496094 sec
Episode 8574, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010653018951416016 sec
Episode 8575, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010605573654174805 sec
Episode 8576, loss:0.0002, succeed, steps:4, total reward:0.2000, 0.010592222213745117 sec
Episode 8577, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010633468627929688 sec
Episode 8578, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010579824447631836 sec
E

Episode 8663, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011062383651733398 sec
Episode 8664, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010623693466186523 sec
Episode 8665, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01060628890991211 sec
Episode 8666, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010677576065063477 sec
Episode 8667, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010587692260742188 sec
Episode 8668, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010582447052001953 sec
Episode 8669, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010637283325195312 sec
Episode 8670, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010633468627929688 sec
Episode 8671, loss:0.0004, succeed, steps:4, total reward:0.2000, 0.01057744026184082 sec
Episode 8672, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010616064071655273 sec
Episode 8673, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010581493377685547 sec
E

Episode 8757, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010972261428833008 sec
Episode 8758, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010749340057373047 sec
Episode 8759, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010639667510986328 sec
Episode 8760, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010752201080322266 sec
Episode 8761, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01055908203125 sec
Episode 8762, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010607719421386719 sec
Episode 8763, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010645866394042969 sec
Episode 8764, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010599374771118164 sec
Episode 8765, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010603904724121094 sec
Episode 8766, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010591745376586914 sec
Episode 8767, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010597467422485352 sec
Epi

Episode 8852, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01089787483215332 sec
Episode 8853, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01071786880493164 sec
Episode 8854, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010601520538330078 sec
Episode 8855, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010655641555786133 sec
Episode 8856, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010620594024658203 sec
Episode 8857, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010614871978759766 sec
Episode 8858, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010559558868408203 sec
Episode 8859, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010604143142700195 sec
Episode 8860, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010667562484741211 sec
Episode 8861, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010587930679321289 sec
Episode 8862, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010625123977661133 sec
E

Episode 8947, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010965824127197266 sec
Episode 8948, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01069498062133789 sec
Episode 8949, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010586261749267578 sec
Episode 8950, loss:0.0002, succeed, steps:4, total reward:0.2000, 0.010583877563476562 sec
Episode 8951, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01061105728149414 sec
Episode 8952, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010600805282592773 sec
Episode 8953, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010595083236694336 sec
Episode 8954, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010547399520874023 sec
Episode 8955, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010629653930664062 sec
Episode 8956, loss:0.0002, succeed, steps:4, total reward:0.2000, 0.01061105728149414 sec
Episode 8957, loss:0.0002, succeed, steps:4, total reward:0.2000, 0.01065683364868164 sec
Epi

Episode 9039, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01098179817199707 sec
Episode 9040, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010735273361206055 sec
Episode 9041, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010601043701171875 sec
Episode 9042, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010589361190795898 sec
Episode 9043, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010579586029052734 sec
Episode 9044, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010762453079223633 sec
Episode 9045, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010570287704467773 sec
Episode 9046, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010617256164550781 sec
Episode 9047, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010570287704467773 sec
Episode 9048, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01059865951538086 sec
Episode 9049, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010633707046508789 sec
E

Episode 9133, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011100530624389648 sec
Episode 9134, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010865926742553711 sec
Episode 9135, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010852336883544922 sec
Episode 9136, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010808944702148438 sec
Episode 9137, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010818958282470703 sec
Episode 9138, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010769128799438477 sec
Episode 9139, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01080012321472168 sec
Episode 9140, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010830879211425781 sec
Episode 9141, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01085805892944336 sec
Episode 9142, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010792970657348633 sec
Episode 9143, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010780096054077148 sec
E

Episode 9228, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010950803756713867 sec
Episode 9229, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010689973831176758 sec
Episode 9230, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010586023330688477 sec
Episode 9231, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010598182678222656 sec
Episode 9232, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010582923889160156 sec
Episode 9233, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010640382766723633 sec
Episode 9234, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010657072067260742 sec
Episode 9235, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010575294494628906 sec
Episode 9236, loss:0.0003, succeed, steps:4, total reward:0.2000, 0.010607719421386719 sec
Episode 9237, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010623931884765625 sec
Episode 9238, loss:0.0003, succeed, steps:4, total reward:0.2000, 0.010581731796264648 sec

Episode 9322, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011071205139160156 sec
Episode 9323, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010848522186279297 sec
Episode 9324, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010782957077026367 sec
Episode 9325, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010814189910888672 sec
Episode 9326, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010809898376464844 sec
Episode 9327, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01079249382019043 sec
Episode 9328, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010803461074829102 sec
Episode 9329, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010770082473754883 sec
Episode 9330, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010792970657348633 sec
Episode 9331, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010805368423461914 sec
Episode 9332, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010854721069335938 sec


Episode 9417, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01100921630859375 sec
Episode 9418, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010738849639892578 sec
Episode 9419, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010675430297851562 sec
Episode 9420, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010601282119750977 sec
Episode 9421, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010614156723022461 sec
Episode 9422, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010626077651977539 sec
Episode 9423, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010598182678222656 sec
Episode 9424, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01059269905090332 sec
Episode 9425, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01060628890991211 sec
Episode 9426, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010592937469482422 sec
Episode 9427, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010605096817016602 sec
Ep

Episode 9512, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010840177536010742 sec
Episode 9513, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010742664337158203 sec
Episode 9514, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010598421096801758 sec
Episode 9515, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010621070861816406 sec
Episode 9516, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01057291030883789 sec
Episode 9517, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010589838027954102 sec
Episode 9518, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01055145263671875 sec
Episode 9519, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010602474212646484 sec
Episode 9520, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010605812072753906 sec
Episode 9521, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010597944259643555 sec
Episode 9522, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010572433471679688 sec
E

Episode 9607, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011064767837524414 sec
Episode 9608, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010733604431152344 sec
Episode 9609, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010627031326293945 sec
Episode 9610, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010579824447631836 sec
Episode 9611, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010616540908813477 sec
Episode 9612, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010582923889160156 sec
Episode 9613, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010579347610473633 sec
Episode 9614, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010562658309936523 sec
Episode 9615, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010581493377685547 sec
Episode 9616, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01060795783996582 sec
Episode 9617, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010567426681518555 sec


Episode 9701, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011147737503051758 sec
Episode 9702, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01089024543762207 sec
Episode 9703, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010834932327270508 sec
Episode 9704, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010793209075927734 sec
Episode 9705, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010866165161132812 sec
Episode 9706, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01080775260925293 sec
Episode 9707, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010825157165527344 sec
Episode 9708, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01079869270324707 sec
Episode 9709, loss:0.0002, succeed, steps:4, total reward:0.2000, 0.01082158088684082 sec
Episode 9710, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010833501815795898 sec
Episode 9711, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010812044143676758 sec
Epi

Episode 9795, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011016130447387695 sec
Episode 9796, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01067662239074707 sec
Episode 9797, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010651588439941406 sec
Episode 9798, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010619163513183594 sec
Episode 9799, loss:0.0003, succeed, steps:4, total reward:0.2000, 0.01062464714050293 sec
len2 start0 Comparison 0 1 more Swap
Episode 9800, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01060938835144043 sec
Episode 9801, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01052713394165039 sec
Episode 9802, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0105743408203125 sec
Episode 9803, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010609149932861328 sec
Episode 9804, loss:0.0003, succeed, steps:4, total reward:0.2000, 0.010532140731811523 sec
Episode 9805, loss:0.0000, succeed, steps:4, total reward:0

Episode 9889, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01119375228881836 sec
Episode 9890, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010677337646484375 sec
Episode 9891, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010601997375488281 sec
Episode 9892, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010598897933959961 sec
Episode 9893, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010575056076049805 sec
Episode 9894, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010578632354736328 sec
Episode 9895, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010607719421386719 sec
Episode 9896, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010606765747070312 sec
Episode 9897, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010648250579833984 sec
Episode 9898, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010573148727416992 sec
Episode 9899, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010632991790771484 sec


Episode 9984, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011017084121704102 sec
Episode 9985, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01072835922241211 sec
Episode 9986, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010564327239990234 sec
Episode 9987, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010633707046508789 sec
Episode 9988, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010583877563476562 sec
Episode 9989, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010663509368896484 sec
Episode 9990, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010571956634521484 sec
Episode 9991, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010609149932861328 sec
Episode 9992, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010561943054199219 sec
Episode 9993, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010598897933959961 sec
Episode 9994, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01059412956237793 sec
E

Episode 10075, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011176347732543945 sec
Episode 10076, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010917186737060547 sec
Episode 10077, loss:0.0001, succeed, steps:4, total reward:0.2000, 0.010831117630004883 sec
Episode 10078, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010785102844238281 sec
Episode 10079, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010841846466064453 sec
Episode 10080, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01079559326171875 sec
Episode 10081, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010803461074829102 sec
Episode 10082, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010796308517456055 sec
Episode 10083, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010833978652954102 sec
Episode 10084, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010790109634399414 sec
Episode 10085, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010776281356

Episode 10169, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0110015869140625 sec
Episode 10170, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010741949081420898 sec
Episode 10171, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01058340072631836 sec
Episode 10172, loss:0.0001, succeed, steps:4, total reward:0.2000, 0.010614156723022461 sec
Episode 10173, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010599374771118164 sec
Episode 10174, loss:0.0002, succeed, steps:4, total reward:0.2000, 0.010580062866210938 sec
Episode 10175, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010596990585327148 sec
Episode 10176, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010540246963500977 sec
Episode 10177, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010623931884765625 sec
Episode 10178, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010561227798461914 sec
Episode 10179, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01058197021484

Episode 10264, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011157512664794922 sec
Episode 10265, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01073455810546875 sec
Episode 10266, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010611534118652344 sec
Episode 10267, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01057577133178711 sec
Episode 10268, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010651826858520508 sec
Episode 10269, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010676145553588867 sec
Episode 10270, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010560989379882812 sec
Episode 10271, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010562419891357422 sec
Episode 10272, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010579586029052734 sec
Episode 10273, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010556697845458984 sec
Episode 10274, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0105493068695

Episode 10359, loss:0.0001, succeed, steps:4, total reward:0.2000, 0.011034488677978516 sec
Episode 10360, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010729551315307617 sec
Episode 10361, loss:0.0002, succeed, steps:4, total reward:0.2000, 0.010640382766723633 sec
Episode 10362, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01061105728149414 sec
Episode 10363, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010580778121948242 sec
Episode 10364, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01058053970336914 sec
Episode 10365, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010617256164550781 sec
Episode 10366, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010561227798461914 sec
Episode 10367, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010582685470581055 sec
Episode 10368, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010569572448730469 sec
Episode 10369, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0105996131896

Episode 10453, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011072158813476562 sec
Episode 10454, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010643243789672852 sec
Episode 10455, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01060628890991211 sec
Episode 10456, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010607004165649414 sec
Episode 10457, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010584354400634766 sec
Episode 10458, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010620832443237305 sec
Episode 10459, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010596275329589844 sec
Episode 10460, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010620832443237305 sec
Episode 10461, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01058816909790039 sec
Episode 10462, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01063680648803711 sec
Episode 10463, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01059627532958

Episode 10548, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010872364044189453 sec
Episode 10549, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010605573654174805 sec
Episode 10550, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010702133178710938 sec
Episode 10551, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010767459869384766 sec
Episode 10552, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010575056076049805 sec
Episode 10553, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010595321655273438 sec
Episode 10554, loss:0.0001, succeed, steps:4, total reward:0.2000, 0.01060795783996582 sec
Episode 10555, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010578155517578125 sec
Episode 10556, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010593891143798828 sec
Episode 10557, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010557413101196289 sec
Episode 10558, loss:0.0001, succeed, steps:4, total reward:0.2000, 0.010625839233

Episode 10642, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01129913330078125 sec
Episode 10643, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010673761367797852 sec
Episode 10644, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010612726211547852 sec
Episode 10645, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010594844818115234 sec
Episode 10646, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010626792907714844 sec
Episode 10647, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010593652725219727 sec
Episode 10648, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01056671142578125 sec
Episode 10649, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010597944259643555 sec
Episode 10650, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010555744171142578 sec
Episode 10651, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010652542114257812 sec
Episode 10652, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.0105476379394

Episode 10737, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010955572128295898 sec
Episode 10738, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010734081268310547 sec
Episode 10739, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010626792907714844 sec
Episode 10740, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010568857192993164 sec
Episode 10741, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01060628890991211 sec
Episode 10742, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01065206527709961 sec
Episode 10743, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010608434677124023 sec
Episode 10744, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010606765747070312 sec
Episode 10745, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010571718215942383 sec
Episode 10746, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010550737380981445 sec
Episode 10747, loss:0.0001, succeed, steps:4, total reward:0.2000, 0.0105810165405

Episode 10830, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.011103153228759766 sec
Episode 10831, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010982036590576172 sec
Episode 10832, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01086878776550293 sec
Episode 10833, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010877847671508789 sec
Episode 10834, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010857105255126953 sec
Episode 10835, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010871648788452148 sec
Episode 10836, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010840892791748047 sec
Episode 10837, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01085519790649414 sec
Episode 10838, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01089930534362793 sec
Episode 10839, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01082754135131836 sec
Episode 10840, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010865211486816

Episode 10923, loss:0.0002, succeed, steps:4, total reward:0.2000, 0.011073112487792969 sec
Episode 10924, loss:0.0001, succeed, steps:4, total reward:0.2000, 0.010753631591796875 sec
Episode 10925, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01056218147277832 sec
Episode 10926, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010596513748168945 sec
Episode 10927, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010565996170043945 sec
Episode 10928, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010594367980957031 sec
Episode 10929, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010612010955810547 sec
Episode 10930, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010653495788574219 sec
Episode 10931, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010580778121948242 sec
Episode 10932, loss:0.0002, succeed, steps:4, total reward:0.2000, 0.010637998580932617 sec
Episode 10933, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010588169097

Episode 11015, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010918617248535156 sec
Episode 11016, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010772943496704102 sec
Episode 11017, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010620355606079102 sec
Episode 11018, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010645151138305664 sec
Episode 11019, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010621786117553711 sec
Episode 11020, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010584592819213867 sec
Episode 11021, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010654211044311523 sec
Episode 11022, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010641336441040039 sec
Episode 11023, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.01064300537109375 sec
Episode 11024, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010573148727416992 sec
Episode 11025, loss:0.0000, succeed, steps:4, total reward:0.2000, 0.010603189468


KeyboardInterrupt

