In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import math
import random

MIN_LIST_LEN = 3
MAX_LIST_LEN = 3
MAX_STEPS = 16
EPS_START = 0.9
EPS_END = 0.1
EPS_DECAY = 2000
GAMMA = 0.9
NUM_EPISODES = 100000

# Define the vocabulary
vocab = {
    'Comparison': 0,
    'Swap': 1,
    '0': 2,
    '1': 3,
    '2': 4,
    '3': 5,
    '4': 6,
    '5': 7,
    '6': 8,
    '7': 9,
    'less': 10,
    'equal': 11,
    'more': 12,
    'len1': 13,
    'len2': 14,
    'len3': 15,
    'len4': 16,
    'len5': 17,
    'len6': 18,
    'len7': 19,
    'len8': 20,
}
inv_vocab = {v: k for k, v in vocab.items()}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define the environment
class SortingEnv:
    def __init__(self):
        self.max_steps = MAX_STEPS

    def reset(self):
        self.length = random.randint(MIN_LIST_LEN, MAX_LIST_LEN)
        self.list = [random.randint(1, 100) for _ in range(self.length)]
        self.indices = None
        self.current_step = 0
        self.done = False
        initial_token = 'len{}'.format(self.length)
        return vocab[initial_token], self.list.copy()
    
    def get_list(self):
        return self.list
    
    def get_list_len(self):
        return len(self.list)

    def step(self, action_tokens):
        action = action_tokens[0]
        reward = -0.01  # default penalty
        response_token = None

        if action == vocab['Comparison']:
            if len(action_tokens) != 3:
                reward = -1.0
                self.done = True
                return response_token, reward, self.done, self.list.copy()
            index1 = action_tokens[1] - vocab['0']
            index2 = action_tokens[2] - vocab['0']
            if index1 >= self.length or index2 >= self.length or index1 < 0 or index2 < 0:
                reward = -1.0
                self.done = True
                return response_token, reward, self.done, self.list.copy()
            self.indices = (index1, index2)
            if self.list[index1] < self.list[index2]:
                response_token = vocab['less']
                reward = -0.01
            elif self.list[index1] == self.list[index2]:
                response_token = vocab['equal']
                reward = -0.02
            else:
                response_token = vocab['more']
                reward = -0.01
        elif action == vocab['Swap']:
            if self.indices is None:
                reward = -1.0
                self.done = True
                return response_token, reward, self.done, self.list.copy()
            index1, index2 = self.indices
            prev_list = self.list.copy()
            self.list[index1], self.list[index2] = self.list[index2], self.list[index1]
            if self.list == sorted(self.list):
                reward = 10.0
                self.done = True
            #elif prev_list[index1] > prev_list[index2] and self.list[index1] <= self.list[index2]:
            #    reward = 0.1
            elif (index1 < index2 and prev_list[index1] > prev_list[index2] and self.list[index1] <= self.list[index2]) or \
                (index1 > index2 and prev_list[index1] < prev_list[index2] and self.list[index1] >= self.list[index2]):
                reward = 0.1
            elif (index1 < index2 and prev_list[index1] < prev_list[index2] and self.list[index1] >= self.list[index2]) or \
                (index1 > index2 and prev_list[index1] > prev_list[index2] and self.list[index1] <= self.list[index2]):
                reward = -0.1
            else:
                reward = -0.01
            self.indices = None
        else:
            reward = -1.0
            self.done = True

        self.current_step += 1
        if self.current_step >= self.max_steps:
            self.done = True
        return response_token, reward, self.done, self.list.copy()

# Positional Encoding for Transformer
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=256):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=0.1)

        pe = torch.zeros(max_len, d_model)  # (max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)  # (max_len, 1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() *
                             (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)  # Even indices
        pe[:, 1::2] = torch.cos(position * div_term)  # Odd indices
        pe = pe.unsqueeze(1)  # (max_len, 1, d_model)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)

# Transformer Model
class TransformerModel(nn.Module):
    def __init__(self, vocab_size, d_model=128, nhead=8, num_layers=3):
        super(TransformerModel, self).__init__()
        self.model_type = 'Transformer'
        self.d_model = d_model

        self.embedding = nn.Embedding(vocab_size, d_model)
        self.pos_encoder = PositionalEncoding(d_model)
        encoder_layers = nn.TransformerEncoderLayer(d_model, nhead)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        self.decoder = nn.Linear(d_model, vocab_size)

        self.init_weights()

    def init_weights(self):
        initrange = 0.1
        nn.init.uniform_(self.embedding.weight, -initrange, initrange)
        nn.init.zeros_(self.decoder.bias)
        nn.init.uniform_(self.decoder.weight, -initrange, initrange)

    def forward(self, src):
        src = self.embedding(src) * math.sqrt(self.d_model)
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src)
        output = self.decoder(output)
        return output

def decode(input_tokens, inv_vocab):
    return ' '.join([inv_vocab[x] for x in input_tokens])

def equalize_nonzero_entries(probs):
    # Move tensor to CPU if it's on CUDA for compatibility
    probs = probs.cpu() if probs.is_cuda else probs
    
    # Find the non-zero entries
    non_zero_mask = probs != 0
    
    # Count the number of non-zero entries
    non_zero_count = non_zero_mask.sum().item()
    
    if non_zero_count == 0:
        raise ValueError("Tensor has no non-zero entries to equalize.")
    
    # Set each non-zero entry to an equal value that sums to 1
    equal_value = 1.0 / non_zero_count
    probs[non_zero_mask] = equal_value
    
    # Return the modified tensor, moving it back to CUDA if needed
    return probs.to(device)


# Training Loop
def train(verbose=False):
    torch.autograd.set_detect_anomaly(True)  # Detect anomalies in autograd
    vocab_size = len(vocab)
    model = TransformerModel(vocab_size).to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-4) 

    for episode in range(NUM_EPISODES):
        model.train()  # Set model to training mode
        env = SortingEnv()
        initial_token_id, current_list = env.reset()
        input_tokens = [initial_token_id]
        log_probs = []
        rewards = []
        state = 'expect_action'
        done = False

        while not done and len(input_tokens) < env.max_steps:
            if verbose:
                print(decode(input_tokens, inv_vocab))
                print(env.get_list())
            # Prepare input tensor
            input_seq = torch.tensor(input_tokens, dtype=torch.long, device=device).unsqueeze(1)  # (seq_len, batch_size)
            # Get model output
            output = model(input_seq)  # (seq_len, batch_size, vocab_size)
            # Get logits for the last token
            logits = output[-1, 0, :]  # (vocab_size)

            # Check for NaNs in logits
            if torch.isnan(logits).any():
                print(f"Episode {episode}, NaNs in logits before masking.")
                break

            # Get valid tokens based on state
            def get_valid_tokens(state):
                action_tokens = [vocab['Comparison'], vocab['Swap']]
                index_tokens = [vocab[str(i)] for i in range(env.length)]
                if state == 'expect_action':
                    return action_tokens
                elif state == 'expect_index1':
                    return index_tokens
                elif state == 'expect_index2':
                    return index_tokens
                else:
                    # Handle unexpected states by defaulting to expect_action
                    return action_tokens

            valid_token_ids = get_valid_tokens(state)

            # Ensure valid_token_ids are within the vocab range
            if any(idx >= vocab_size or idx < 0 for idx in valid_token_ids):
                print(f"Episode {episode}, invalid indices in valid_token_ids: {valid_token_ids}")
                break

            # Mask invalid tokens
            mask_value = -1e9  # Use a large negative value instead of -inf
            mask = torch.full_like(logits, mask_value).to(device)
            mask[valid_token_ids] = 0
            masked_logits = logits + mask

            # Check for NaNs in masked_logits
            if torch.isnan(masked_logits).any():
                print(f"Episode {episode}, NaNs in masked_logits after masking.")
                break

            # Compute probabilities
            probs = F.softmax(masked_logits, dim=0)

            # Check for NaNs in probs
            if torch.isnan(probs).any():
                print(f"Episode {episode}, NaNs in probs after softmax.")
                break

            # Sample action. Have some chance to randomly pick a valid action.
            eps_threshold = EPS_END + (EPS_START - EPS_END) * np.exp(-1.0 * episode / EPS_DECAY)
            if random.random() < eps_threshold:
                probs = equalize_nonzero_entries(probs)
            try:
                m = torch.distributions.Categorical(probs)
                action_token = m.sample()
                log_prob = m.log_prob(action_token)
            except ValueError as e:
                print(f"Episode {episode}, error in sampling action: {e}")
                break

            log_probs.append(log_prob)
            input_tokens.append(action_token.item())

            action = action_token.item()
            if state == 'expect_action':
                if action == vocab['Comparison']:
                    state = 'expect_index1'
                elif action == vocab['Swap']:
                    if env.indices is None:
                        # Can't perform Swap without a previous Comparison
                        reward = -1.0
                        rewards.append(reward)
                        done = True
                        continue
                    action_tokens = [vocab['Swap']]
                    response_token, reward, done, current_list = env.step(action_tokens)
                    if verbose:
                        print("Reward:", reward)
                    rewards.append(reward)
                    state = 'expect_action'
                else:
                    # Invalid action, end the episode
                    reward = -1.0
                    rewards.append(reward)
                    done = True
            elif state == 'expect_index1':
                index1_token = action_token
                state = 'expect_index2'
            elif state == 'expect_index2':
                index2_token = action_token
                action_tokens = [vocab['Comparison'], index1_token.item(), index2_token.item()]
                response_token, reward, done, current_list = env.step(action_tokens)
                if verbose:
                    print("Reward:", reward)
                rewards.append(reward)
                if response_token is not None:
                    input_tokens.append(response_token)
                state = 'expect_action'
            else:
                # Invalid state, end the episode
                reward = -1.0
                rewards.append(reward)
                done = True

        print(decode(input_tokens, inv_vocab))
        print(env.get_list())
                
        if len(log_probs) == 0:
            continue  # Skip if no actions were taken

        # Compute returns
        returns = []
        R = 0
        gamma = GAMMA
        for r in rewards[::-1]:
            R = r + gamma * R
            returns.insert(0, R)
        returns = torch.tensor(returns)

        # Check for NaNs in returns
        if torch.isnan(returns).any():
            print(f"Episode {episode}, NaNs in returns.")
            continue

        if returns.std() != 0 and not torch.isnan(returns.std()):
            returns = (returns - returns.mean()) / (returns.std() + 1e-9)
        else:
            pass #returns = returns - returns.mean()

        # Check for NaNs in standardized returns
        if torch.isnan(returns).any():
            print(f"Episode {episode}, NaNs in standardized returns.")
            continue

        # Compute loss
        loss = 0
        for log_prob, R in zip(log_probs, returns):
            loss -= log_prob * R

        # Check for NaNs in loss
        if torch.isnan(loss):
            print(f"Episode {episode}, NaN in loss.")
            continue

        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping
        optimizer.step()

        if episode % 1 == 0:
            print(f"Episode {episode}, loss: {loss.item():.4f}, total reward: {sum(rewards):.4f}")

if __name__ == "__main__":
    train(verbose=False)


Using device: cuda




len3 Comparison 2 2 equal Swap Comparison 0 2 more Swap
[12, 89, 96]
Episode 0, loss: -0.0312, total reward: 9.9600
len3 Swap
[88, 93, 4]
Episode 1, loss: -0.6931, total reward: -1.0000
len3 Swap
[88, 24, 28]
Episode 2, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 1 more Swap Swap
[4, 83, 2]
Episode 3, loss: -0.4682, total reward: -1.1100
len3 Comparison 0 0 equal Comparison 1 0 more Swap Comparison 1 1 equal Comparison 0
[81, 76, 96]
Episode 4, loss: -0.3639, total reward: -0.1500
len3 Comparison 1 2 more Swap
[22, 75, 79]
Episode 5, loss: 0.2867, total reward: 9.9900
len3 Swap
[36, 33, 41]
Episode 6, loss: -0.6931, total reward: -1.0000
len3 Swap
[62, 39, 46]
Episode 7, loss: -0.6931, total reward: -1.0000
len3 Swap
[61, 54, 96]
Episode 8, loss: -0.6931, total reward: -1.0000
len3 Swap
[40, 66, 70]
Episode 9, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 0 less Swap Swap
[37, 33, 60]
Episode 10, loss: -0.3280, total reward: -0.9100
len3 Comparison 1 2 less Swap Com

len3 Comparison 1 0 less Swap
[10, 41, 96]
Episode 82, loss: 0.2867, total reward: 9.9900
len3 Comparison 0 0 equal Swap Swap
[95, 73, 46]
Episode 83, loss: -0.3886, total reward: -1.0300
len3 Comparison 0 0 equal Comparison 1 2 less Comparison 1 2 less Comparison 0 0 equal
[20, 48, 62]
Episode 84, loss: -0.5772, total reward: -0.0600
len3 Comparison 0 1 less Comparison 0 0 equal Comparison 1 1 equal Comparison 0 1 less
[23, 83, 80]
Episode 85, loss: -1.0399, total reward: -0.0600
len3 Comparison 1 1 equal Swap Swap
[54, 69, 42]
Episode 86, loss: -0.3886, total reward: -1.0300
len3 Comparison 2 1 less Swap Comparison 2 0 more Comparison 0 2 less Comparison 2
[59, 20, 63]
Episode 87, loss: 0.0365, total reward: 0.0700
len3 Comparison 1 2 less Swap Swap
[61, 52, 22]
Episode 88, loss: -0.4682, total reward: -1.1100
len3 Swap
[28, 78, 17]
Episode 89, loss: -0.6931, total reward: -1.0000
len3 Comparison 0 2 less Comparison 2 2 equal Swap Swap
[80, 54, 85]
Episode 90, loss: 0.8712, total rew

len3 Comparison 2 2 equal Comparison 1 1 equal Swap Swap
[91, 61, 31]
Episode 157, loss: -0.7048, total reward: -1.0500
len3 Comparison 0 2 more Swap Comparison 0 1 less Swap Swap
[49, 5, 26]
Episode 158, loss: 1.0442, total reward: -1.0200
len3 Comparison 2 1 more Comparison 0 0 equal Swap
[14, 17, 71]
Episode 159, loss: 1.0885, total reward: 9.9700
len3 Comparison 1 2 more Swap Comparison 1 2 less Comparison 2 1 more Swap Swap
[10, 58, 5]
Episode 160, loss: -1.9926, total reward: -1.0300
len3 Comparison 0 2 less Comparison 2 2 equal Swap
[33, 44, 63]
Episode 161, loss: 0.3974, total reward: 9.9700
len3 Comparison 0 2 less Swap Swap
[59, 98, 19]
Episode 162, loss: -1.2679, total reward: -1.1100
len3 Swap
[85, 53, 54]
Episode 163, loss: -0.6931, total reward: -1.0000
len3 Comparison 0 0 equal Comparison 2 0 less Swap
[39, 52, 52]
Episode 164, loss: 1.0804, total reward: 9.9700
len3 Swap
[22, 98, 23]
Episode 165, loss: -0.6931, total reward: -1.0000
len3 Swap
[96, 47, 34]
Episode 166, l

len3 Comparison 2 0 less Comparison 2 0 less Comparison 0 1 less Comparison 1 1 equal
[72, 90, 42]
Episode 229, loss: -0.0897, total reward: -0.0500
len3 Swap
[54, 94, 27]
Episode 230, loss: -0.6931, total reward: -1.0000
len3 Swap
[75, 84, 71]
Episode 231, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Swap Comparison 2 0 less Comparison 0 1 less Comparison 0
[84, 93, 58]
Episode 232, loss: 1.0257, total reward: -0.0500
len3 Comparison 1 2 less Comparison 2 0 less Comparison 2 0 less Swap Comparison 2
[94, 48, 95]
Episode 233, loss: -0.8637, total reward: 0.0700
len3 Comparison 0 2 more Comparison 1 0 less Swap Comparison 2 0 less Comparison 2
[79, 86, 38]
Episode 234, loss: 1.0229, total reward: 0.0700
len3 Swap
[60, 60, 25]
Episode 235, loss: -0.6931, total reward: -1.0000
len3 Swap
[48, 22, 26]
Episode 236, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 1 less Comparison 1 0 more Comparison 1 2 more Swap
[31, 61, 96]
Episode 237, loss: -0.8645, total reward:

len3 Comparison 2 0 less Comparison 0 2 more Swap Comparison 0 2 less Comparison 1
[59, 24, 70]
Episode 304, loss: 0.5464, total reward: 0.0700
len3 Swap
[14, 99, 94]
Episode 305, loss: -0.6931, total reward: -1.0000
len3 Swap
[80, 60, 31]
Episode 306, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 0 more Swap Swap
[23, 33, 15]
Episode 307, loss: 0.1646, total reward: -1.1100
len3 Swap
[71, 17, 74]
Episode 308, loss: -0.6931, total reward: -1.0000
len3 Swap
[49, 59, 91]
Episode 309, loss: -0.6931, total reward: -1.0000
len3 Swap
[67, 94, 87]
Episode 310, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 0 less Comparison 0 0 equal Comparison 0 1 less Swap Swap
[61, 40, 15]
Episode 311, loss: -1.0508, total reward: -1.1400
len3 Comparison 0 1 less Comparison 0 0 equal Swap Comparison 1 0 more Comparison 1
[20, 37, 34]
Episode 312, loss: -0.7786, total reward: -0.0500
len3 Comparison 2 0 more Comparison 2 0 more Swap Swap
[85, 82, 68]
Episode 313, loss: -0.1996, total reward

len3 Comparison 2 1 less Comparison 2 0 less Comparison 1 2 more Comparison 1 0 less
[45, 40, 27]
Episode 383, loss: -0.0330, total reward: -0.0400
len3 Comparison 1 0 less Comparison 2 1 more Swap Comparison 2 0 less Comparison 2
[40, 56, 25]
Episode 384, loss: -1.4478, total reward: -0.1300
len3 Comparison 2 0 less Comparison 1 0 more Comparison 0 2 more Comparison 2 2 equal
[47, 69, 11]
Episode 385, loss: -0.8631, total reward: -0.0500
len3 Comparison 1 2 more Comparison 0 2 less Swap Comparison 0 0 equal Swap Comparison
[33, 62, 32]
Episode 386, loss: -1.4686, total reward: -0.1500
len3 Comparison 0 1 less Comparison 0 0 equal Swap Comparison 0 0 equal Comparison 1
[13, 90, 84]
Episode 387, loss: 1.3729, total reward: -0.0600
len3 Comparison 0 0 equal Swap Comparison 2 2 equal Comparison 2 0 more Swap Swap
[90, 15, 28]
Episode 388, loss: 0.7620, total reward: -1.1600
len3 Comparison 1 0 less Comparison 2 1 more Comparison 2 0 more Swap Swap
[84, 10, 67]
Episode 389, loss: -0.1588, 

Episode 457, loss: -0.6931, total reward: -1.0000
len3 Comparison 0 0 equal Swap Swap
[63, 69, 3]
Episode 458, loss: -0.0018, total reward: -1.0300
len3 Comparison 0 1 more Comparison 1 2 less Comparison 1 2 less Comparison 1 0 less
[86, 55, 59]
Episode 459, loss: 0.3830, total reward: -0.0400
len3 Comparison 0 2 less Comparison 2 0 more Comparison 0 0 equal Comparison 1 1 equal
[28, 20, 97]
Episode 460, loss: 0.4352, total reward: -0.0600
len3 Comparison 0 0 equal Swap Comparison 1 0 less Swap Swap
[38, 63, 3]
Episode 461, loss: -1.2687, total reward: -0.9400
len3 Swap
[88, 18, 92]
Episode 462, loss: -0.6931, total reward: -1.0000
len3 Comparison 0 0 equal Comparison 0 1 more Comparison 1 1 equal Swap Comparison 0
[44, 28, 37]
Episode 463, loss: 0.7790, total reward: -0.0600
len3 Comparison 0 0 equal Comparison 0 0 equal Comparison 0 0 equal Swap Comparison 0
[87, 31, 7]
Episode 464, loss: -0.4671, total reward: -0.0700
len3 Comparison 1 1 equal Comparison 0 0 equal Comparison 1 0 mor

len3 Comparison 0 0 equal Comparison 1 2 more Swap Comparison 2 2 equal Comparison 2
[70, 39, 42]
Episode 530, loss: -0.4285, total reward: 0.0500
len3 Comparison 0 0 equal Swap Comparison 1 0 equal Swap Comparison 0 1 equal Comparison
[76, 76, 6]
Episode 531, loss: -0.9033, total reward: -0.0800
len3 Comparison 0 1 less Comparison 0 0 equal Comparison 2 0 more Swap Swap
[97, 46, 12]
Episode 532, loss: 0.6070, total reward: -1.1400
len3 Comparison 1 0 less Comparison 0 0 equal Comparison 0 1 more Swap
[25, 52, 69]
Episode 533, loss: -0.8642, total reward: 9.9600
len3 Comparison 0 1 less Comparison 0 0 equal Swap
[28, 49, 91]
Episode 534, loss: 0.3974, total reward: 9.9700
len3 Comparison 2 0 more Swap Swap
[75, 51, 65]
Episode 535, loss: 0.1631, total reward: -1.1100
len3 Comparison 2 0 more Comparison 0 0 equal Swap Comparison 0 2 less Comparison 1
[13, 99, 66]
Episode 536, loss: 0.6784, total reward: -0.0500
len3 Comparison 1 2 less Comparison 2 2 equal Swap
[33, 51, 67]
Episode 537,

Episode 604, loss: -0.1948, total reward: -0.0600
len3 Comparison 2 2 equal Swap Comparison 0 0 equal Swap Swap
[93, 83, 64]
Episode 605, loss: 0.2985, total reward: -1.0600
len3 Comparison 0 0 equal Comparison 1 0 more Comparison 0 1 less Swap Swap
[94, 83, 61]
Episode 606, loss: 0.6140, total reward: -1.1400
len3 Swap
[95, 51, 88]
Episode 607, loss: -0.6931, total reward: -1.0000
len3 Comparison 1 1 equal Comparison 2 1 more Swap Comparison 0 2 more Comparison 1
[31, 67, 19]
Episode 608, loss: -0.3577, total reward: -0.1400
len3 Comparison 0 1 less Comparison 1 1 equal Swap
[29, 79, 82]
Episode 609, loss: 0.3974, total reward: 9.9700
len3 Swap
[20, 62, 80]
Episode 610, loss: -0.6931, total reward: -1.0000
len3 Swap
[73, 37, 16]
Episode 611, loss: -0.6931, total reward: -1.0000
len3 Swap
[19, 37, 84]
Episode 612, loss: -0.6931, total reward: -1.0000
len3 Comparison 0 2 more Comparison 1 2 more Comparison 0 0 equal Swap Comparison 0
[80, 55, 25]
Episode 613, loss: -0.1819, total reward

Episode 678, loss: -0.6931, total reward: -1.0000
len3 Swap
[41, 63, 42]
Episode 679, loss: -0.6931, total reward: -1.0000
len3 Comparison 0 1 less Swap Comparison 2 1 more Comparison 0 0 equal Comparison 0
[74, 52, 88]
Episode 680, loss: -0.0369, total reward: -0.1400
len3 Comparison 1 0 more Swap Comparison 1 1 equal Comparison 0 0 equal Comparison 1
[73, 60, 35]
Episode 681, loss: -0.2685, total reward: -0.1500
len3 Swap
[25, 9, 11]
Episode 682, loss: -0.6931, total reward: -1.0000
len3 Swap
[30, 41, 77]
Episode 683, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 0 more Comparison 0 2 less Comparison 2 0 more Comparison 0 2 less
[31, 26, 65]
Episode 684, loss: -0.0330, total reward: -0.0400
len3 Swap
[96, 91, 86]
Episode 685, loss: -0.6931, total reward: -1.0000
len3 Swap
[76, 13, 6]
Episode 686, loss: -0.6931, total reward: -1.0000
len3 Swap
[9, 15, 10]
Episode 687, loss: -0.6931, total reward: -1.0000
len3 Swap
[51, 60, 58]
Episode 688, loss: -0.6931, total reward: -1.0000

len3 Comparison 1 2 more Comparison 1 1 equal Comparison 0 1 less Comparison 0 2 more
[41, 46, 13]
Episode 751, loss: 0.4802, total reward: -0.0500
len3 Comparison 0 1 less Comparison 0 1 less Swap Comparison 1 1 equal Comparison 1
[91, 7, 45]
Episode 752, loss: -0.0583, total reward: -0.1400
len3 Swap
[39, 18, 44]
Episode 753, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 1 more Comparison 1 2 less Swap Comparison 1 1 equal Comparison 2
[55, 71, 2]
Episode 754, loss: -0.8628, total reward: -0.1400
len3 Comparison 1 1 equal Comparison 1 2 less Comparison 0 0 equal Comparison 1 2 less
[95, 7, 34]
Episode 755, loss: 0.8423, total reward: -0.0600
len3 Swap
[91, 64, 74]
Episode 756, loss: -0.6931, total reward: -1.0000
len3 Swap
[30, 5, 65]
Episode 757, loss: -0.6931, total reward: -1.0000
len3 Comparison 0 1 less Swap Comparison 1 1 equal Comparison 2 0 less Swap
[23, 23, 100]
Episode 758, loss: -0.5302, total reward: 9.8600
len3 Swap
[43, 13, 8]
Episode 759, loss: -0.6931, total

len3 Comparison 0 0 equal Comparison 2 0 more Comparison 0 2 less Comparison 2 0 more
[2, 26, 99]
Episode 824, loss: 0.3097, total reward: -0.0500
len3 Swap
[99, 43, 8]
Episode 825, loss: -0.6931, total reward: -1.0000
len3 Comparison 0 0 equal Swap Comparison 2 0 less Comparison 0 1 more Comparison 2
[54, 38, 49]
Episode 826, loss: 0.7149, total reward: -0.0500
len3 Swap
[51, 95, 4]
Episode 827, loss: -0.6931, total reward: -1.0000
len3 Comparison 0 0 equal Comparison 0 1 more Swap Comparison 1 0 more Swap Swap
[85, 25, 29]
Episode 828, loss: 0.2667, total reward: -1.0400
len3 Comparison 0 0 equal Swap Comparison 0 0 equal Comparison 0 1 less Comparison 2
[20, 96, 17]
Episode 829, loss: 0.2309, total reward: -0.0600
len3 Comparison 1 1 equal Comparison 2 1 more Comparison 0 2 more Comparison 0 2 more
[83, 28, 43]
Episode 830, loss: 0.1149, total reward: -0.0500
len3 Swap
[8, 52, 49]
Episode 831, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 0 more Comparison 0 1 more Comparis

len3 Comparison 1 1 equal Comparison 1 0 more Comparison 0 0 equal Swap
[33, 42, 55]
Episode 896, loss: 0.7026, total reward: 9.9500
len3 Comparison 0 0 equal Comparison 2 0 more Comparison 0 0 equal Comparison 0 0 equal
[64, 47, 83]
Episode 897, loss: 0.8335, total reward: -0.0700
len3 Swap
[50, 70, 86]
Episode 898, loss: -0.6931, total reward: -1.0000
len3 Comparison 0 0 equal Comparison 0 2 more Swap Comparison 0 0 equal Comparison 2
[24, 81, 75]
Episode 899, loss: -0.9524, total reward: 0.0500
len3 Comparison 1 0 more Comparison 2 0 less Comparison 0 1 less Comparison 2 1 less
[15, 41, 6]
Episode 900, loss: 0.2642, total reward: -0.0400
len3 Comparison 0 0 equal Comparison 0 1 more Swap
[29, 54, 82]
Episode 901, loss: -0.6791, total reward: 9.9700
len3 Comparison 0 0 equal Comparison 0 1 less Comparison 0 2 more Comparison 1 0 more
[57, 75, 54]
Episode 902, loss: -0.3097, total reward: -0.0500
len3 Comparison 2 0 more Comparison 1 0 more Comparison 0 0 equal Comparison 0 0 equal
[2

Episode 972, loss: 0.6582, total reward: -0.0700
len3 Comparison 1 0 less Comparison 1 0 less Comparison 1 0 less Swap Swap
[14, 81, 77]
Episode 973, loss: -1.0152, total reward: -0.9300
len3 Comparison 2 1 less Comparison 0 0 equal Comparison 2 1 less Comparison 2 0 less
[97, 85, 2]
Episode 974, loss: 0.3618, total reward: -0.0500
len3 Swap
[11, 77, 70]
Episode 975, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 1 less Swap Swap
[19, 18, 21]
Episode 976, loss: -0.5589, total reward: -0.9100
len3 Comparison 1 1 equal Swap Comparison 2 0 more Comparison 1 0 less Comparison 0
[53, 3, 61]
Episode 977, loss: 0.5516, total reward: -0.0500
len3 Comparison 1 0 less Swap Comparison 2 0 less Swap Comparison 0 0 equal Comparison
[15, 98, 71]
Episode 978, loss: 1.6517, total reward: 0.1600
len3 Swap
[50, 94, 31]
Episode 979, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 0 less Swap
[27, 72, 85]
Episode 980, loss: 0.6538, total reward: 9.9700
len3 Comparison 0

len3 Comparison 0 2 more Comparison 0 0 equal Comparison 0 0 equal Swap Comparison 0
[30, 71, 21]
Episode 1052, loss: -1.0398, total reward: -0.0600
len3 Comparison 0 2 less Comparison 0 0 equal Comparison 0 0 equal Swap
[5, 35, 89]
Episode 1053, loss: 0.1021, total reward: 9.9500
len3 Comparison 0 1 less Comparison 0 0 equal Comparison 0 2 more Swap Comparison 0
[3, 16, 5]
Episode 1054, loss: 0.2360, total reward: 0.0600
len3 Comparison 0 0 equal Comparison 0 2 more Comparison 2 2 equal Comparison 0 2 more
[97, 99, 52]
Episode 1055, loss: -0.3482, total reward: -0.0600
len3 Comparison 0 2 more Comparison 0 0 equal Swap Swap
[43, 91, 21]
Episode 1056, loss: 0.0564, total reward: -1.0400
len3 Swap
[22, 83, 1]
Episode 1057, loss: -1.2341, total reward: -1.0000
len3 Comparison 0 0 equal Swap Swap
[4, 58, 29]
Episode 1058, loss: -0.4729, total reward: -1.0300
len3 Comparison 1 2 more Comparison 0 0 equal Swap Comparison 2 1 less Comparison 0
[47, 91, 64]
Episode 1059, loss: 0.2188, total r

Episode 1130, loss: -0.6931, total reward: -1.0000
len3 Swap
[11, 94, 87]
Episode 1131, loss: -0.4844, total reward: -1.0000
len3 Swap
[65, 54, 54]
Episode 1132, loss: -0.5714, total reward: -1.0000
len3 Swap
[94, 14, 46]
Episode 1133, loss: -0.6931, total reward: -1.0000
len3 Swap
[61, 65, 75]
Episode 1134, loss: -0.6708, total reward: -1.0000
len3 Comparison 0 1 more Comparison 2 1 more Comparison 1 0 less Swap Comparison 0
[7, 86, 13]
Episode 1135, loss: -0.3463, total reward: 0.0700
len3 Comparison 0 2 more Comparison 0 0 equal Comparison 0 2 more Swap
[15, 51, 81]
Episode 1136, loss: -0.3580, total reward: 9.9600
len3 Comparison 2 1 more Comparison 1 1 equal Comparison 1 1 equal Swap
[6, 34, 44]
Episode 1137, loss: -1.0899, total reward: 9.9500
len3 Comparison 1 2 less Comparison 2 1 more Comparison 1 1 equal Comparison 0 2 less
[31, 18, 76]
Episode 1138, loss: -1.1102, total reward: -0.0500
len3 Swap
[99, 89, 69]
Episode 1139, loss: -0.6931, total reward: -1.0000
len3 Comparison 

Episode 1200, loss: 1.2895, total reward: -1.1300
len3 Comparison 0 0 equal Swap Comparison 0 1 less Swap Comparison 2 1 less Comparison
[98, 78, 2]
Episode 1201, loss: -1.2767, total reward: -0.1500
len3 Comparison 0 1 less Comparison 1 1 equal Comparison 0 1 less Comparison 1 0 more
[15, 52, 77]
Episode 1202, loss: -0.3486, total reward: -0.0500
len3 Comparison 0 1 less Comparison 1 1 equal Comparison 1 1 equal Comparison 1 1 equal
[8, 91, 25]
Episode 1203, loss: -0.9240, total reward: -0.0700
len3 Comparison 1 0 less Swap Comparison 0 1 less Comparison 1 1 equal Swap Comparison
[15, 33, 13]
Episode 1204, loss: 0.1209, total reward: 0.0500
len3 Swap
[88, 60, 77]
Episode 1205, loss: -0.6931, total reward: -1.0000
len3 Swap
[88, 2, 19]
Episode 1206, loss: -0.1484, total reward: -1.0000
len3 Comparison 0 1 less Swap Swap
[63, 15, 27]
Episode 1207, loss: 0.1638, total reward: -1.1100
len3 Comparison 1 0 more Swap Comparison 2 1 more Comparison 1 0 less Swap Comparison
[17, 88, 47]
Episod

len3 Comparison 2 2 equal Comparison 0 0 equal Comparison 2 2 equal Swap Comparison 0
[60, 2, 67]
Episode 1279, loss: -0.1927, total reward: -0.0700
len3 Swap
[59, 22, 51]
Episode 1280, loss: -0.8406, total reward: -1.0000
len3 Swap
[85, 55, 28]
Episode 1281, loss: -0.8374, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 0 equal Comparison 2 0 more Comparison 0 0 equal
[1, 61, 58]
Episode 1282, loss: -0.8449, total reward: -0.0700
len3 Comparison 2 1 more Comparison 0 2 less Comparison 2 2 equal Comparison 1 2 less
[25, 66, 72]
Episode 1283, loss: -1.1355, total reward: -0.0500
len3 Swap
[95, 88, 96]
Episode 1284, loss: -0.6931, total reward: -1.0000
len3 Swap
[23, 32, 25]
Episode 1285, loss: -0.6931, total reward: -1.0000
len3 Swap
[23, 63, 11]
Episode 1286, loss: -0.8319, total reward: -1.0000
len3 Swap
[60, 12, 39]
Episode 1287, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 0 more Comparison 2 2 equal Comparison 2 1 more Swap Comparison 2
[21, 91, 43]
Episode 1

len3 Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 0 1 more Comparison 0
[39, 20, 27]
Episode 1361, loss: 0.4728, total reward: -0.0600
len3 Swap
[87, 15, 39]
Episode 1362, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal
[1, 80, 30]
Episode 1363, loss: -0.6173, total reward: -0.0700
len3 Comparison 0 2 less Comparison 2 1 more Comparison 2 2 equal Comparison 2 0 more
[37, 38, 72]
Episode 1364, loss: -1.1659, total reward: -0.0500
len3 Comparison 2 1 less Comparison 2 2 equal Swap Comparison 2 1 less Comparison 2
[64, 71, 13]
Episode 1365, loss: -0.1223, total reward: -0.0500
len3 Swap
[71, 4, 3]
Episode 1366, loss: -0.9524, total reward: -1.0000
len3 Comparison 0 1 more Comparison 1 0 less Comparison 0 2 more Comparison 1 2 more
[80, 50, 40]
Episode 1367, loss: -0.8628, total reward: -0.0400
len3 Comparison 2 2 equal Comparison 0 0 equal Comparison 2 0 less Comparison 1 2 more
[51, 91, 44]
Episode 

len3 Comparison 0 1 more Comparison 2 2 equal Swap Comparison 2 0 less Comparison 2
[86, 72, 81]
Episode 1436, loss: -0.8096, total reward: -0.0500
len3 Swap
[11, 59, 7]
Episode 1437, loss: -0.6749, total reward: -1.0000
len3 Swap
[87, 54, 40]
Episode 1438, loss: -0.7366, total reward: -1.0000
len3 Swap
[15, 64, 12]
Episode 1439, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 2 0 less Swap Swap
[64, 52, 66]
Episode 1440, loss: 0.2613, total reward: -0.9600
len3 Comparison 2 1 more Comparison 2 2 equal Comparison 2 2 equal Swap
[24, 40, 80]
Episode 1441, loss: -0.3910, total reward: 9.9500
len3 Swap
[62, 34, 60]
Episode 1442, loss: -0.8710, total reward: -1.0000
len3 Swap
[83, 54, 98]
Episode 1443, loss: -0.6931, total reward: -1.0000
len3 Swap
[94, 9, 57]
Episode 1444, loss: -0.6931, total reward: -1.0000
len3 Swap
[75, 72, 2]
Episode 1445, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison

Episode 1513, loss: -0.5569, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more Comparison 1 2 more
[15, 68, 2]
Episode 1514, loss: -0.3352, total reward: -0.0600
len3 Swap
[31, 3, 98]
Episode 1515, loss: -0.4811, total reward: -1.0000
len3 Swap
[82, 40, 45]
Episode 1516, loss: -0.6931, total reward: -1.0000
len3 Comparison 0 2 more Swap Comparison 2 2 equal Swap Comparison 2 2 equal Comparison
[11, 3, 24]
Episode 1517, loss: 0.4166, total reward: 0.0400
len3 Comparison 2 0 more Comparison 2 2 equal Comparison 0 2 less Swap Comparison 2
[59, 25, 31]
Episode 1518, loss: 0.8476, total reward: -0.1400
len3 Swap
[44, 42, 8]
Episode 1519, loss: -0.5158, total reward: -1.0000
len3 Comparison 1 2 less Swap Comparison 2 2 equal Comparison 0 2 less Comparison 1
[15, 68, 37]
Episode 1520, loss: -1.0422, total reward: -0.1400
len3 Swap
[46, 93, 39]
Episode 1521, loss: -0.4493, total reward: -1.0000
len3 Comparison 2 2 equal Swap Comparison 0 2 more Comparison

len3 Comparison 1 2 more Comparison 2 0 more Comparison 0 0 equal Comparison 2 2 equal
[7, 98, 73]
Episode 1590, loss: -0.9967, total reward: -0.0600
len3 Swap
[33, 55, 100]
Episode 1591, loss: -0.6931, total reward: -1.0000
len3 Swap
[1, 1, 21]
Episode 1592, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Swap Comparison 2 2 equal Swap Comparison 2 2 equal Comparison
[58, 76, 5]
Episode 1593, loss: -0.3690, total reward: -0.0800
len3 Swap
[23, 30, 48]
Episode 1594, loss: -0.6931, total reward: -1.0000
len3 Swap
[24, 33, 22]
Episode 1595, loss: -0.6931, total reward: -1.0000
len3 Swap
[78, 29, 80]
Episode 1596, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 2
[37, 39, 10]
Episode 1597, loss: 0.1946, total reward: -0.0700
len3 Swap
[80, 95, 48]
Episode 1598, loss: -0.9134, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 0 less Comparison 2 2 equal Comparison 2 0 less
[89, 13, 30]
Episod

len3 Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 2 2 equal Comparison 2
[99, 62, 2]
Episode 1663, loss: 0.2856, total reward: -0.0700
len3 Swap
[53, 21, 25]
Episode 1664, loss: -0.8799, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 2
[98, 89, 26]
Episode 1665, loss: -0.1306, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 2
[85, 71, 95]
Episode 1666, loss: -0.7752, total reward: -0.0700
len3 Comparison 2 0 more Comparison 2 2 equal Comparison 2 1 less Comparison 2 1 less
[8, 46, 31]
Episode 1667, loss: 0.1306, total reward: -0.0500
len3 Swap
[82, 92, 67]
Episode 1668, loss: -0.6931, total reward: -1.0000
len3 Swap
[50, 54, 54]
Episode 1669, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 0 more Comparison 0 0 equal Comparison 2 0 more
[57, 75, 89]
Episode 1670, loss: 0.0616, total reward: -0.0600
len3 Comparison 2 1 more Compari

Episode 1736, loss: -0.3751, total reward: -0.0800
len3 Comparison 0 0 equal Comparison 2 0 less Comparison 1 2 less Swap Comparison 0
[59, 42, 33]
Episode 1737, loss: 0.4986, total reward: -0.1400
len3 Swap
[92, 53, 54]
Episode 1738, loss: -1.0627, total reward: -1.0000
len3 Comparison 0 2 more Comparison 2 1 less Comparison 2 2 equal Comparison 2 2 equal
[10, 53, 1]
Episode 1739, loss: -0.8347, total reward: -0.0600
len3 Comparison 2 2 equal Swap Swap
[25, 64, 48]
Episode 1740, loss: 0.3104, total reward: -1.0300
len3 Comparison 0 1 less Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 0
[86, 88, 38]
Episode 1741, loss: 0.4735, total reward: -0.0600
len3 Comparison 2 1 less Comparison 2 1 less Swap Comparison 2 2 equal Comparison 2
[76, 71, 89]
Episode 1742, loss: 1.4926, total reward: 0.0600
len3 Swap
[23, 60, 73]
Episode 1743, loss: -1.1234, total reward: -1.0000
len3 Comparison 1 2 less Comparison 2 1 more Swap Comparison 2 0 less Comparison 1
[98, 54, 2]
Episode 1744, lo

len3 Comparison 1 2 more Comparison 1 2 more Swap Comparison 2 2 equal Comparison 2
[49, 31, 91]
Episode 1810, loss: 0.5537, total reward: 0.0600
len3 Comparison 1 0 more Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[11, 50, 43]
Episode 1811, loss: -0.0345, total reward: -0.0600
len3 Swap
[69, 100, 44]
Episode 1812, loss: -1.0475, total reward: -1.0000
len3 Comparison 2 2 equal Swap
[13, 13, 51]
Episode 1813, loss: -0.3158, total reward: 9.9800
len3 Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 1 2 less Swap Comparison
[43, 69, 26]
Episode 1814, loss: -0.2722, total reward: -0.1600
len3 Swap
[64, 60, 69]
Episode 1815, loss: -1.0089, total reward: -1.0000
len3 Swap
[9, 100, 35]
Episode 1816, loss: -0.9476, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 0
[32, 52, 4]
Episode 1817, loss: -0.4654, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 0 less Comparison 2 0 les

len3 Comparison 0 2 less Comparison 0 2 less Comparison 2 2 equal Comparison 2 0 more
[33, 35, 46]
Episode 1883, loss: -0.8145, total reward: -0.0500
len3 Comparison 2 1 less Comparison 0 0 equal Comparison 1 0 more Swap Comparison 2
[90, 20, 36]
Episode 1884, loss: 1.0099, total reward: -0.1400
len3 Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 2
[98, 34, 22]
Episode 1885, loss: -0.7861, total reward: -0.0700
len3 Comparison 2 2 equal Swap Comparison 2 1 more Swap Comparison 2 0 less Comparison
[70, 94, 49]
Episode 1886, loss: -0.5009, total reward: -0.1500
len3 Comparison 1 1 equal Comparison 2 0 less Comparison 2 0 less Comparison 2 2 equal
[83, 77, 30]
Episode 1887, loss: -0.0935, total reward: -0.0600
len3 Comparison 2 2 equal Swap Comparison 0 1 more Comparison 2 2 equal Swap Comparison
[28, 10, 79]
Episode 1888, loss: -0.3517, total reward: -0.0700
len3 Swap
[50, 28, 16]
Episode 1889, loss: -1.0478, total reward: -1.0000
len3 Comparison 2 2 equal

len3 Comparison 2 1 less Comparison 2 1 less Comparison 2 2 equal Comparison 0 0 equal
[99, 92, 20]
Episode 1957, loss: -1.1715, total reward: -0.0600
len3 Comparison 1 2 less Swap Comparison 0 2 more Swap Comparison 1 2 more Comparison
[66, 76, 68]
Episode 1958, loss: -0.6784, total reward: -0.0300
len3 Swap
[71, 91, 27]
Episode 1959, loss: -0.6625, total reward: -1.0000
len3 Comparison 2 1 more Comparison 1 0 less Comparison 0 0 equal Comparison 2 2 equal
[79, 2, 78]
Episode 1960, loss: -0.6325, total reward: -0.0600
len3 Comparison 0 2 less Comparison 0 2 less Comparison 2 2 equal Comparison 1 2 less
[61, 2, 88]
Episode 1961, loss: -0.2129, total reward: -0.0500
len3 Swap
[37, 43, 9]
Episode 1962, loss: -0.6931, total reward: -1.0000
len3 Swap
[71, 64, 68]
Episode 1963, loss: -0.7025, total reward: -1.0000
len3 Comparison 2 0 less Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 1
[72, 34, 48]
Episode 1964, loss: -0.1008, total reward: -0.0600
len3 Swap
[92, 1, 1]
Episode 1

Episode 2030, loss: -1.1439, total reward: -0.0500
len3 Comparison 2 2 equal Comparison 2 1 more Comparison 2 0 less Comparison 2 2 equal
[100, 11, 97]
Episode 2031, loss: -0.2168, total reward: -0.0600
len3 Comparison 2 2 equal Swap Comparison 0 0 equal Comparison 2 2 equal Swap Comparison
[35, 57, 47]
Episode 2032, loss: 0.8852, total reward: -0.0800
len3 Comparison 2 1 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[48, 59, 56]
Episode 2033, loss: -0.0619, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 0 more Comparison 1 2 more Swap
[33, 59, 73]
Episode 2034, loss: -0.1302, total reward: 9.9600
len3 Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 1 0 more Comparison 2
[83, 87, 43]
Episode 2035, loss: 0.2564, total reward: -0.0600
len3 Swap
[37, 71, 4]
Episode 2036, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 2 2 equal Comparison 2
[95, 45, 13]
Episode 2037, loss: 0.0799, total rewar

Episode 2104, loss: -0.6931, total reward: -1.0000
len3 Swap
[99, 72, 93]
Episode 2105, loss: -0.6931, total reward: -1.0000
len3 Swap
[38, 52, 26]
Episode 2106, loss: -0.8514, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 0 less Comparison 2 2 equal
[77, 88, 10]
Episode 2107, loss: 0.0251, total reward: -0.0700
len3 Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 2 2 equal Swap Swap
[8, 66, 29]
Episode 2108, loss: -1.5193, total reward: -1.0800
len3 Swap
[70, 55, 84]
Episode 2109, loss: -0.6931, total reward: -1.0000
len3 Comparison 1 1 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[97, 94, 64]
Episode 2110, loss: -0.8611, total reward: -0.0800
len3 Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 0 2 more Comparison 2
[79, 61, 32]
Episode 2111, loss: 0.2317, total reward: -0.0600
len3 Comparison 0 2 less Comparison 1 2 less Comparison 1 0 less Comparison 2 2 equal
[58, 48, 59]
Episode 2112, loss: -1.0753, t

Episode 2177, loss: -0.4244, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 0 more Comparison 2 2 equal Swap
[26, 48, 61]
Episode 2178, loss: -0.0583, total reward: 9.9500
len3 Comparison 1 2 more Swap Swap
[97, 46, 89]
Episode 2179, loss: 0.6152, total reward: -0.9100
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Swap Comparison 2
[5, 88, 69]
Episode 2180, loss: 0.3782, total reward: -0.0600
len3 Comparison 1 0 less Swap Comparison 2 2 equal Comparison 1 2 more Comparison 2
[78, 94, 65]
Episode 2181, loss: -0.0065, total reward: 0.0600
len3 Comparison 1 2 less Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 2
[73, 44, 82]
Episode 2182, loss: -0.2957, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 1 less Swap Comparison 2
[59, 15, 30]
Episode 2183, loss: -0.8195, total reward: 0.0500
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[97, 62, 44]
Episode 2184, loss: 0.48

len3 Comparison 2 1 more Comparison 2 2 equal Comparison 0 1 less Comparison 2 2 equal
[27, 40, 72]
Episode 2250, loss: -0.0997, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 1 more Comparison 2 0 more Comparison 2 2 equal
[33, 13, 62]
Episode 2251, loss: 0.0825, total reward: -0.0600
len3 Comparison 1 2 more Comparison 2 2 equal Swap Swap
[48, 61, 40]
Episode 2252, loss: 0.9399, total reward: -1.0400
len3 Swap
[8, 9, 43]
Episode 2253, loss: -0.9423, total reward: -1.0000
len3 Comparison 1 2 less Comparison 1 0 less Comparison 1 2 less Comparison 1 2 less
[86, 20, 90]
Episode 2254, loss: -0.4113, total reward: -0.0400
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 0 2 less Comparison 2 1 more
[33, 5, 70]
Episode 2255, loss: -0.9112, total reward: -0.0500
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal
[39, 72, 52]
Episode 2256, loss: 0.0510, total reward: -0.0700
len3 Comparison 0 0 equal Swap
[21, 53, 55]
Episode 2257, l

len3 Comparison 2 2 equal Swap Comparison 2 1 more Comparison 0 2 more Swap Comparison
[54, 33, 78]
Episode 2319, loss: -0.4828, total reward: 0.0500
len3 Swap
[82, 76, 42]
Episode 2320, loss: -0.6897, total reward: -1.0000
len3 Swap
[91, 47, 34]
Episode 2321, loss: -0.6931, total reward: -1.0000
len3 Swap
[28, 39, 4]
Episode 2322, loss: -0.6931, total reward: -1.0000
len3 Comparison 1 0 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 1 more
[16, 51, 80]
Episode 2323, loss: -1.0714, total reward: -0.0600
len3 Swap
[1, 5, 20]
Episode 2324, loss: -0.6839, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 1 less Comparison 2 2 equal
[20, 67, 34]
Episode 2325, loss: -0.2354, total reward: -0.0700
len3 Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[10, 6, 20]
Episode 2326, loss: -1.3407, total reward: -0.0600
len3 Swap
[27, 13, 48]
Episode 2327, loss: -0.6993, total reward: -1.0000
len3 Comparison 2 2 equal Swap

Episode 2394, loss: -0.1578, total reward: -0.0500
len3 Comparison 2 1 less Swap Comparison 2 2 equal Comparison 2 2 equal Comparison 2
[56, 18, 96]
Episode 2395, loss: -1.0977, total reward: 0.0500
len3 Swap
[18, 55, 30]
Episode 2396, loss: -0.6968, total reward: -1.0000
len3 Swap
[74, 80, 55]
Episode 2397, loss: -0.6931, total reward: -1.0000
len3 Comparison 0 1 more Comparison 2 1 more Comparison 2 2 equal Comparison 1 2 less
[57, 36, 73]
Episode 2398, loss: -1.0660, total reward: -0.0500
len3 Swap
[5, 54, 26]
Episode 2399, loss: -0.6931, total reward: -1.0000
len3 Swap
[75, 98, 59]
Episode 2400, loss: -0.8073, total reward: -1.0000
len3 Swap
[91, 36, 58]
Episode 2401, loss: -0.7389, total reward: -1.0000
len3 Comparison 1 0 less Comparison 2 1 more Comparison 2 2 equal Comparison 2 2 equal
[26, 7, 21]
Episode 2402, loss: -1.1586, total reward: -0.0600
len3 Swap
[48, 86, 87]
Episode 2403, loss: -0.7886, total reward: -1.0000
len3 Swap
[18, 75, 17]
Episode 2404, loss: -0.6931, total 

len3 Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[24, 69, 41]
Episode 2469, loss: -1.6603, total reward: -0.0700
len3 Swap
[48, 23, 27]
Episode 2470, loss: -0.4320, total reward: -1.0000
len3 Swap
[80, 94, 50]
Episode 2471, loss: -0.4936, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 1 2 more Comparison 2
[67, 20, 10]
Episode 2472, loss: -0.1302, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[10, 81, 40]
Episode 2473, loss: -0.8462, total reward: -0.0700
len3 Swap
[78, 51, 53]
Episode 2474, loss: -0.6931, total reward: -1.0000
len3 Comparison 1 2 more Comparison 2 0 less Swap
[20, 73, 99]
Episode 2475, loss: -0.7172, total reward: 9.9800
len3 Swap
[100, 14, 49]
Episode 2476, loss: -0.5262, total reward: -1.0000
len3 Swap
[12, 56, 43]
Episode 2477, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 l

Episode 2545, loss: 6.0059, total reward: -0.0400
len3 Comparison 2 1 less Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 2
[16, 68, 20]
Episode 2546, loss: -1.0398, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal
[53, 14, 32]
Episode 2547, loss: -0.6453, total reward: -0.0700
len3 Swap
[37, 24, 95]
Episode 2548, loss: -0.7733, total reward: -1.0000
len3 Swap
[66, 37, 83]
Episode 2549, loss: -0.6931, total reward: -1.0000
len3 Swap
[47, 32, 100]
Episode 2550, loss: -0.8557, total reward: -1.0000
len3 Swap
[9, 54, 94]
Episode 2551, loss: -0.6931, total reward: -1.0000
len3 Swap
[77, 26, 1]
Episode 2552, loss: -0.8568, total reward: -1.0000
len3 Swap
[97, 82, 17]
Episode 2553, loss: -0.8741, total reward: -1.0000
len3 Swap
[40, 48, 96]
Episode 2554, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 0 less Comparison 2 2 equal Comparison 2 2 equal
[51, 31, 25]
Episode 2555, loss: -0.5840, to

Episode 2621, loss: -0.1435, total reward: -0.0500
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 0 more
[34, 83, 98]
Episode 2622, loss: -0.0121, total reward: -0.0700
len3 Comparison 2 0 more Swap Comparison 2 2 equal Comparison 2 1 less Comparison 2
[39, 40, 37]
Episode 2623, loss: 0.0696, total reward: -0.1400
len3 Comparison 2 2 equal Comparison 2 0 less Swap
[25, 55, 73]
Episode 2624, loss: -0.3790, total reward: 9.9700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal
[89, 38, 22]
Episode 2625, loss: -0.6941, total reward: -0.0700
len3 Comparison 1 1 equal Comparison 2 2 equal Swap Comparison 2 0 less Comparison 2
[5, 42, 2]
Episode 2626, loss: -0.4481, total reward: -0.0600
len3 Swap
[84, 71, 33]
Episode 2627, loss: -0.7463, total reward: -1.0000
len3 Swap
[51, 95, 5]
Episode 2628, loss: -0.8118, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[

len3 Comparison 0 2 more Comparison 0 0 equal Comparison 2 1 less Comparison 2 2 equal
[85, 90, 36]
Episode 2696, loss: -0.8889, total reward: -0.0600
len3 Comparison 0 2 less Comparison 0 0 equal Comparison 0 2 less Comparison 0 0 equal
[72, 69, 79]
Episode 2697, loss: -0.8542, total reward: -0.0600
len3 Swap
[89, 82, 19]
Episode 2698, loss: -0.8991, total reward: -1.0000
len3 Swap
[49, 84, 46]
Episode 2699, loss: -0.8872, total reward: -1.0000
len3 Comparison 1 2 more Comparison 2 0 more Comparison 0 2 less Comparison 0 2 less
[27, 96, 31]
Episode 2700, loss: -1.2323, total reward: -0.0400
len3 Comparison 0 2 more Comparison 2 0 less Comparison 2 2 equal Comparison 2 1 less
[30, 89, 9]
Episode 2701, loss: -0.9173, total reward: -0.0500
len3 Comparison 1 1 equal Comparison 2 2 equal Comparison 1 0 less Comparison 0 2 more
[96, 76, 47]
Episode 2702, loss: -0.4028, total reward: -0.0600
len3 Comparison 1 2 more Comparison 2 1 less Comparison 1 0 more Swap Comparison 2
[30, 22, 13]
Episo

len3 Comparison 1 1 equal Comparison 2 2 equal Comparison 1 1 equal Comparison 2 2 equal
[57, 91, 44]
Episode 2764, loss: -0.9795, total reward: -0.0800
len3 Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[43, 28, 82]
Episode 2765, loss: -1.3244, total reward: -0.0700
len3 Comparison 1 2 more Comparison 2 1 less Comparison 2 0 more Comparison 1 0 more
[40, 92, 58]
Episode 2766, loss: -1.3938, total reward: -0.0400
len3 Swap
[55, 56, 41]
Episode 2767, loss: -0.3513, total reward: -1.0000
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal
[95, 86, 43]
Episode 2768, loss: -0.9734, total reward: -0.0600
len3 Swap
[66, 15, 90]
Episode 2769, loss: -0.6931, total reward: -1.0000
len3 Swap
[7, 80, 51]
Episode 2770, loss: -0.6931, total reward: -1.0000
len3 Swap
[71, 41, 33]
Episode 2771, loss: -0.6931, total reward: -1.0000
len3 Swap
[93, 75, 95]
Episode 2772, loss: -0.3702, total reward: -1.0000
len3 Swap
[90, 79, 27]
Epi

len3 Comparison 0 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 0 equal
[90, 24, 90]
Episode 2843, loss: -0.6499, total reward: -0.0800
len3 Comparison 1 2 more Comparison 1 2 more Comparison 2 2 equal Comparison 1 2 more
[52, 55, 13]
Episode 2844, loss: -0.5425, total reward: -0.0500
len3 Swap
[64, 82, 36]
Episode 2845, loss: -0.6931, total reward: -1.0000
len3 Swap
[53, 99, 91]
Episode 2846, loss: -1.2808, total reward: -1.0000
len3 Comparison 0 2 less Comparison 1 2 less Comparison 1 2 less Swap Comparison 1
[67, 87, 86]
Episode 2847, loss: -0.1204, total reward: -0.1300
len3 Swap
[12, 12, 83]
Episode 2848, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 0 less Comparison 2 2 equal Comparison 0 2 more Comparison 2 1 more
[80, 8, 28]
Episode 2849, loss: -0.4336, total reward: -0.0500
len3 Comparison 1 2 more Comparison 0 2 more Comparison 0 2 more Swap
[12, 16, 37]
Episode 2850, loss: -0.5324, total reward: 9.9700
len3 Comparison 1 2 more Comparison 2 2 equal 

len3 Comparison 0 2 less Comparison 2 2 equal Comparison 0 2 less Comparison 2 0 more
[21, 17, 65]
Episode 2911, loss: -1.8417, total reward: -0.0500
len3 Comparison 1 2 more Comparison 2 0 less Comparison 0 2 more Comparison 2 2 equal
[59, 75, 32]
Episode 2912, loss: -1.2241, total reward: -0.0500
len3 Swap
[12, 11, 81]
Episode 2913, loss: -0.5548, total reward: -1.0000
len3 Swap
[85, 75, 94]
Episode 2914, loss: -0.6025, total reward: -1.0000
len3 Swap
[61, 75, 13]
Episode 2915, loss: -0.4731, total reward: -1.0000
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal
[24, 14, 82]
Episode 2916, loss: -1.0037, total reward: -0.0600
len3 Comparison 2 0 less Swap Swap
[50, 37, 96]
Episode 2917, loss: -0.3117, total reward: -0.9100
len3 Swap
[9, 73, 84]
Episode 2918, loss: -0.5747, total reward: -1.0000
len3 Swap
[11, 65, 15]
Episode 2919, loss: -0.6931, total reward: -1.0000
len3 Swap
[70, 77, 53]
Episode 2920, loss: -0.5967, total reward: -1.0000
len3 Sw

len3 Comparison 0 2 less Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 2
[25, 53, 31]
Episode 2989, loss: -0.9725, total reward: -0.0600
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 0 less
[98, 31, 53]
Episode 2990, loss: -0.4553, total reward: -0.0600
len3 Comparison 1 2 less Comparison 2 2 equal Swap Comparison 1 2 less Comparison 0
[57, 18, 46]
Episode 2991, loss: 0.0128, total reward: -0.0500
len3 Comparison 1 2 less Comparison 2 1 more Comparison 0 2 less Comparison 2 2 equal
[15, 8, 16]
Episode 2992, loss: -0.5648, total reward: -0.0500
len3 Comparison 1 2 less Swap Swap
[72, 86, 11]
Episode 2993, loss: -0.0839, total reward: -1.1100
len3 Swap
[21, 44, 49]
Episode 2994, loss: -0.6931, total reward: -1.0000
len3 Comparison 0 2 less Comparison 2 2 equal Swap
[8, 23, 60]
Episode 2995, loss: -0.3283, total reward: 9.9700
len3 Comparison 1 2 more Swap
[7, 19, 30]
Episode 2996, loss: 0.2644, total reward: 9.9900
len3 Comparison 1 2 more Co

len3 Comparison 1 1 equal Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[64, 47, 11]
Episode 3060, loss: -0.3973, total reward: -0.0700
len3 Swap
[50, 89, 95]
Episode 3061, loss: -0.6931, total reward: -1.0000
len3 Swap
[35, 95, 8]
Episode 3062, loss: -0.6931, total reward: -1.0000
len3 Swap
[92, 32, 25]
Episode 3063, loss: -0.6931, total reward: -1.0000
len3 Swap
[88, 74, 12]
Episode 3064, loss: -0.7591, total reward: -1.0000
len3 Swap
[56, 33, 78]
Episode 3065, loss: -0.9122, total reward: -1.0000
len3 Swap
[96, 83, 7]
Episode 3066, loss: -0.9114, total reward: -1.0000
len3 Swap
[62, 9, 45]
Episode 3067, loss: -0.9537, total reward: -1.0000
len3 Comparison 1 2 less Swap Comparison 2 2 equal Comparison 0 2 more Comparison 2
[56, 77, 37]
Episode 3068, loss: -0.9295, total reward: -0.1400
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[19, 55, 96]
Episode 3069, loss: -0.4501, total reward: -0.0700
len3 Comparison 0 2 less Comparis

len3 Comparison 0 2 more Comparison 0 1 more Comparison 2 2 equal Comparison 2 2 equal
[49, 13, 21]
Episode 3136, loss: -0.5904, total reward: -0.0600
len3 Comparison 0 2 less Comparison 2 2 equal Comparison 2 0 more Comparison 1 2 less
[90, 43, 96]
Episode 3137, loss: -0.6227, total reward: -0.0500
len3 Swap
[21, 35, 83]
Episode 3138, loss: -1.0831, total reward: -1.0000
len3 Comparison 1 2 less Comparison 2 1 more Comparison 2 2 equal Comparison 2 2 equal
[42, 11, 71]
Episode 3139, loss: -1.0163, total reward: -0.0600
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 0
[78, 17, 60]
Episode 3140, loss: -0.8706, total reward: -0.0600
len3 Swap
[6, 56, 72]
Episode 3141, loss: -1.0607, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more
[78, 60, 17]
Episode 3142, loss: -0.9581, total reward: -0.0700
len3 Comparison 1 2 more Comparison 0 1 less Comparison 0 1 less Comparison 1 2 more
[30, 68, 64]
Ep

len3 Comparison 0 2 equal Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 2
[28, 1, 28]
Episode 3211, loss: -1.2497, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal Swap Comparison 2
[44, 67, 27]
Episode 3212, loss: -1.2603, total reward: -0.0600
len3 Comparison 1 2 more Swap
[10, 16, 45]
Episode 3213, loss: 0.4625, total reward: 9.9900
len3 Swap
[83, 80, 10]
Episode 3214, loss: -1.0221, total reward: -1.0000
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 2 1 less Comparison 2 2 equal
[67, 92, 41]
Episode 3215, loss: -0.8387, total reward: -0.0600
len3 Swap
[30, 100, 60]
Episode 3216, loss: -0.9292, total reward: -1.0000
len3 Comparison 0 2 more Comparison 2 0 less Comparison 2 2 equal Comparison 2 2 equal
[70, 43, 42]
Episode 3217, loss: -0.7445, total reward: -0.0600
len3 Swap
[55, 51, 42]
Episode 3218, loss: -1.1035, total reward: -1.0000
len3 Swap
[17, 93, 80]
Episode 3219, loss: -0.9686, total reward: -1.0000
len3 Swap
[

len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 1 more Comparison 2 2 equal
[19, 18, 26]
Episode 3284, loss: -1.1183, total reward: -0.0600
len3 Comparison 0 2 less Comparison 2 2 equal Comparison 2 0 more Comparison 2 2 equal
[7, 11, 48]
Episode 3285, loss: 0.0594, total reward: -0.0600
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 2 1 more Comparison 1 2 less
[91, 5, 65]
Episode 3286, loss: -0.7057, total reward: -0.0500
len3 Swap
[67, 48, 70]
Episode 3287, loss: -0.9627, total reward: -1.0000
len3 Comparison 0 2 less Comparison 2 2 equal Comparison 2 0 more Comparison 2 2 equal
[29, 34, 63]
Episode 3288, loss: -0.6977, total reward: -0.0600
len3 Comparison 0 1 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[43, 85, 80]
Episode 3289, loss: 0.4585, total reward: -0.0700
len3 Swap
[87, 88, 19]
Episode 3290, loss: -0.8314, total reward: -1.0000
len3 Swap
[6, 18, 53]
Episode 3291, loss: -0.6931, total reward: -1.0000
len3 Swap
[86, 98, 15]
Episo

len3 Comparison 1 2 more Comparison 2 1 less Comparison 2 0 less Comparison 2 2 equal
[19, 41, 1]
Episode 3363, loss: -1.2402, total reward: -0.0500
len3 Comparison 0 2 less Swap Swap
[75, 65, 60]
Episode 3364, loss: -0.5323, total reward: -1.1100
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[77, 99, 49]
Episode 3365, loss: -0.9156, total reward: -0.0800
len3 Comparison 0 0 equal Comparison 2 2 equal Comparison 2 1 more Comparison 2 2 equal
[43, 2, 23]
Episode 3366, loss: -0.3203, total reward: -0.0700
len3 Swap
[20, 50, 79]
Episode 3367, loss: -0.9592, total reward: -1.0000
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[4, 11, 70]
Episode 3368, loss: -0.8837, total reward: -0.0700
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[21, 67, 16]
Episode 3369, loss: -0.4485, total reward: -0.0700
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Comp

Episode 3434, loss: -0.7203, total reward: -0.1500
len3 Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[82, 76, 91]
Episode 3435, loss: -0.7330, total reward: -0.0700
len3 Swap
[94, 51, 96]
Episode 3436, loss: -0.6931, total reward: -1.0000
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[60, 8, 51]
Episode 3437, loss: -0.7754, total reward: -0.0700
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[71, 4, 89]
Episode 3438, loss: -0.7224, total reward: -0.0600
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more
[36, 61, 4]
Episode 3439, loss: -1.1029, total reward: -0.0600
len3 Comparison 1 2 more Swap
[19, 30, 89]
Episode 3440, loss: 0.0201, total reward: 9.9900
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[25, 74, 20]
Episode 3441, loss: -0.5182, total reward: -0.0600
len3 Swap
[32, 61, 55]
Episod

Episode 3512, loss: -0.6738, total reward: -1.0000
len3 Swap
[65, 33, 4]
Episode 3513, loss: -0.6931, total reward: -1.0000
len3 Swap
[8, 65, 98]
Episode 3514, loss: -0.7141, total reward: -1.0000
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 0 less Comparison 2 0 less
[36, 89, 20]
Episode 3515, loss: -1.0102, total reward: -0.0500
len3 Swap
[100, 97, 56]
Episode 3516, loss: -0.7816, total reward: -1.0000
len3 Swap
[41, 48, 24]
Episode 3517, loss: -0.7636, total reward: -1.0000
len3 Comparison 2 1 less Comparison 2 2 equal Comparison 0 0 equal Comparison 0 2 less
[3, 51, 29]
Episode 3518, loss: -1.0396, total reward: -0.0600
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Swap
[28, 38, 53]
Episode 3519, loss: -0.8775, total reward: 9.9500
len3 Comparison 1 2 more Comparison 2 0 less Comparison 2 2 equal Comparison 2 2 equal
[23, 52, 15]
Episode 3520, loss: -1.0644, total reward: -0.0600
len3 Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 0 2 e

Episode 3584, loss: -0.9394, total reward: -0.0500
len3 Swap
[60, 12, 4]
Episode 3585, loss: -0.7732, total reward: -1.0000
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Swap
[59, 63, 65]
Episode 3586, loss: -1.1373, total reward: 9.9500
len3 Swap
[49, 52, 21]
Episode 3587, loss: -0.7073, total reward: -1.0000
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 2 1 less Comparison 2 2 equal
[98, 42, 38]
Episode 3588, loss: -1.0577, total reward: -0.0600
len3 Swap
[17, 56, 48]
Episode 3589, loss: -0.6931, total reward: -1.0000
len3 Swap
[89, 39, 76]
Episode 3590, loss: -0.6712, total reward: -1.0000
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 1 2 less Comparison 0 2 more
[93, 14, 27]
Episode 3591, loss: -0.5722, total reward: -0.0500
len3 Comparison 0 2 less Comparison 2 2 equal Comparison 0 2 less Comparison 0 2 less
[21, 67, 27]
Episode 3592, loss: -1.2265, total reward: -0.0500
len3 Swap
[41, 29, 95]
Episode 3593, loss: -0.6921, total reward: -1

Episode 3656, loss: -0.5375, total reward: -0.0600
len3 Swap
[2, 25, 9]
Episode 3657, loss: -0.8016, total reward: -1.0000
len3 Swap
[76, 15, 75]
Episode 3658, loss: -0.7017, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Comparison 2 1 more
[16, 65, 69]
Episode 3659, loss: -1.0721, total reward: -0.0600
len3 Swap
[21, 74, 39]
Episode 3660, loss: -0.6969, total reward: -1.0000
len3 Swap
[30, 68, 36]
Episode 3661, loss: -0.8069, total reward: -1.0000
len3 Swap
[40, 7, 50]
Episode 3662, loss: -0.7211, total reward: -1.0000
len3 Comparison 0 0 equal Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal
[21, 9, 57]
Episode 3663, loss: -0.8926, total reward: -0.0700
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal
[72, 53, 71]
Episode 3664, loss: -1.1297, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 0 equal Comparison 0 0 equal
[86, 81, 79]
Episode 3665, loss: -1.7584

Episode 3731, loss: -0.8406, total reward: -1.0000
len3 Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[82, 53, 96]
Episode 3732, loss: 0.3826, total reward: -0.0700
len3 Swap
[47, 64, 22]
Episode 3733, loss: -0.9715, total reward: -1.0000
len3 Swap
[8, 54, 77]
Episode 3734, loss: -0.9843, total reward: -1.0000
len3 Swap
[9, 14, 58]
Episode 3735, loss: -0.8978, total reward: -1.0000
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal
[87, 29, 70]
Episode 3736, loss: -1.1998, total reward: -0.0600
len3 Swap
[85, 16, 81]
Episode 3737, loss: -0.9725, total reward: -1.0000
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal
[23, 10, 83]
Episode 3738, loss: -1.1962, total reward: -0.0600
len3 Comparison 1 2 more Comparison 0 2 more Comparison 2 2 equal Comparison 0 2 more
[91, 68, 17]
Episode 3739, loss: -1.0277, total reward: -0.0500
len3 Comparison 1 2 less Swap Comparison 0 2 more Co

Episode 3802, loss: -0.4817, total reward: -0.1400
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 0
[42, 81, 19]
Episode 3803, loss: -0.9579, total reward: -0.0700
len3 Comparison 1 2 more Swap Comparison 2 2 equal Swap Comparison 0 2 less Comparison
[62, 6, 78]
Episode 3804, loss: 1.1761, total reward: 0.0500
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 1 more Comparison 2 0 less
[99, 23, 32]
Episode 3805, loss: -1.1035, total reward: -0.0500
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 1 less Comparison 2 2 equal
[82, 94, 59]
Episode 3806, loss: -1.0256, total reward: -0.0700
len3 Swap
[30, 68, 45]
Episode 3807, loss: -0.9318, total reward: -1.0000
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[7, 54, 32]
Episode 3808, loss: -1.1667, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 0 less Comparison 2 2 equal Comparison 2 2 equal
[100, 59, 47]
Episode 3809, loss: -1

Episode 3870, loss: 0.5052, total reward: 9.9800
len3 Swap
[61, 13, 81]
Episode 3871, loss: -0.9128, total reward: -1.0000
len3 Comparison 1 2 more Comparison 0 2 less Comparison 0 2 less Swap Comparison 1
[83, 86, 21]
Episode 3872, loss: -0.2458, total reward: -0.1300
len3 Swap
[83, 30, 4]
Episode 3873, loss: -0.6931, total reward: -1.0000
len3 Swap
[17, 74, 67]
Episode 3874, loss: -0.8499, total reward: -1.0000
len3 Swap
[90, 49, 64]
Episode 3875, loss: -0.6931, total reward: -1.0000
len3 Comparison 0 0 equal Comparison 0 0 equal Comparison 0 2 more Comparison 0 2 more
[94, 33, 6]
Episode 3876, loss: -0.3285, total reward: -0.0600
len3 Comparison 1 2 more Comparison 0 1 more Comparison 1 2 more Comparison 1 2 more
[65, 55, 43]
Episode 3877, loss: -1.0558, total reward: -0.0400
len3 Comparison 2 2 equal Swap Comparison 0 2 more Comparison 1 2 less Comparison 2
[92, 6, 24]
Episode 3878, loss: -0.0609, total reward: -0.0500
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 0 2 mo

Episode 3942, loss: -0.6931, total reward: -1.0000
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 1 less
[52, 56, 37]
Episode 3943, loss: -1.3260, total reward: -0.0600
len3 Swap
[56, 26, 22]
Episode 3944, loss: -0.7459, total reward: -1.0000
len3 Comparison 1 0 more Comparison 0 2 less Comparison 0 0 equal Comparison 0 2 less
[39, 47, 53]
Episode 3945, loss: -1.1511, total reward: -0.0500
len3 Comparison 1 2 more Comparison 0 0 equal Comparison 0 2 less Comparison 2 2 equal
[47, 77, 56]
Episode 3946, loss: -1.3478, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 0 1 less Comparison 0 2 less Comparison 0 2 less
[11, 58, 86]
Episode 3947, loss: -0.5983, total reward: -0.0500
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal Swap Comparison 0
[84, 11, 59]
Episode 3948, loss: -1.0605, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 0 2 more Comparison 1
[71, 100, 18]
Episode 3949, loss: -1.25

Episode 4016, loss: -0.9044, total reward: -0.0500
len3 Comparison 2 1 more Comparison 0 2 less Comparison 2 2 equal Comparison 0 0 equal
[60, 15, 71]
Episode 4017, loss: -1.1918, total reward: -0.0600
len3 Comparison 0 2 less Comparison 0 2 less Comparison 0 2 less Comparison 2 2 equal
[52, 83, 67]
Episode 4018, loss: -1.0255, total reward: -0.0500
len3 Comparison 2 1 more Comparison 0 2 more Comparison 0 2 more Comparison 0 2 more
[97, 74, 94]
Episode 4019, loss: -0.8918, total reward: -0.0400
len3 Swap
[57, 4, 78]
Episode 4020, loss: -0.6931, total reward: -1.0000
len3 Swap
[100, 78, 86]
Episode 4021, loss: -0.6740, total reward: -1.0000
len3 Comparison 0 2 less Comparison 2 0 more Comparison 2 2 equal Comparison 1 2 less
[32, 45, 82]
Episode 4022, loss: -1.0455, total reward: -0.0500
len3 Swap
[78, 1, 60]
Episode 4023, loss: -0.5742, total reward: -1.0000
len3 Swap
[8, 3, 42]
Episode 4024, loss: -0.6313, total reward: -1.0000
len3 Comparison 0 2 less Comparison 0 2 less Comparison 

Episode 4090, loss: -1.0824, total reward: -0.0600
len3 Comparison 2 0 less Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 2
[97, 76, 47]
Episode 4091, loss: -0.5834, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal
[76, 23, 41]
Episode 4092, loss: -1.1974, total reward: -0.0700
len3 Swap
[99, 61, 52]
Episode 4093, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 1 more
[95, 13, 76]
Episode 4094, loss: -0.6693, total reward: -0.0700
len3 Swap
[75, 46, 6]
Episode 4095, loss: -0.6931, total reward: -1.0000
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[33, 62, 27]
Episode 4096, loss: -1.2007, total reward: -0.0700
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 0 more
[8, 90, 55]
Episode 4097, loss: -1.3160, total reward: -0.0600
len3 Swap
[50, 2, 26]
Episode 4098, loss: -0.70

len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 1 less
[29, 37, 32]
Episode 4163, loss: -0.6620, total reward: -0.0600
len3 Comparison 2 0 more Comparison 2 2 equal Swap Comparison 2 1 less Comparison 0
[19, 92, 67]
Episode 4164, loss: -0.7755, total reward: -0.0500
len3 Swap
[86, 33, 53]
Episode 4165, loss: -0.7055, total reward: -1.0000
len3 Swap
[25, 5, 17]
Episode 4166, loss: -0.6194, total reward: -1.0000
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 2 1 less Comparison 2 2 equal
[45, 12, 6]
Episode 4167, loss: -1.4526, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 1 less Comparison 2 2 equal
[23, 87, 5]
Episode 4168, loss: -1.1342, total reward: -0.0700
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[22, 5, 14]
Episode 4169, loss: -0.4991, total reward: -0.0600
len3 Swap
[6, 73, 5]
Episode 4170, loss: -0.6931, total reward: -1.0000
len3 Comparison 0 2 less Compar

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[79, 60, 76]
Episode 4238, loss: -1.0923, total reward: -0.0800
len3 Comparison 2 1 more Comparison 1 2 less Comparison 0 2 less Comparison 2 2 equal
[21, 15, 37]
Episode 4239, loss: 0.0933, total reward: -0.0500
len3 Comparison 0 2 more Comparison 1 2 more Comparison 0 2 more Comparison 2 2 equal
[90, 93, 85]
Episode 4240, loss: -0.1616, total reward: -0.0500
len3 Comparison 0 2 more Comparison 0 1 more Comparison 0 2 more Comparison 0 2 more
[48, 36, 11]
Episode 4241, loss: -1.2411, total reward: -0.0400
len3 Comparison 0 2 more Comparison 1 2 less Comparison 2 2 equal Swap Comparison 2
[92, 29, 37]
Episode 4242, loss: -1.0027, total reward: -0.0500
len3 Comparison 0 2 more Comparison 0 0 equal Comparison 1 2 less Swap Comparison 0
[89, 37, 35]
Episode 4243, loss: -1.3782, total reward: -0.1400
len3 Comparison 1 2 less Comparison 1 2 less Comparison 0 2 more Swap Swap
[60, 37, 80]
Episode 4244, l

Episode 4308, loss: -0.8051, total reward: -1.0000
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[53, 83, 76]
Episode 4309, loss: -1.2629, total reward: -0.0700
len3 Comparison 0 0 equal Comparison 0 2 more Comparison 2 1 less Comparison 0 1 less
[45, 84, 41]
Episode 4310, loss: -0.6664, total reward: -0.0500
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less
[12, 65, 54]
Episode 4311, loss: -0.7346, total reward: -0.0700
len3 Swap
[22, 67, 76]
Episode 4312, loss: -0.8385, total reward: -1.0000
len3 Swap
[92, 6, 69]
Episode 4313, loss: -0.7254, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less Comparison 1 2 more
[5, 16, 9]
Episode 4314, loss: -0.9312, total reward: -0.0600
len3 Swap
[58, 85, 33]
Episode 4315, loss: -0.7832, total reward: -1.0000
len3 Swap
[40, 78, 51]
Episode 4316, loss: -0.8404, total reward: -1.0000
len3 Comparison 1 2 more Comparison 2 2 equal Compa

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less Swap Comparison 0
[20, 82, 51]
Episode 4379, loss: -1.0900, total reward: -0.1500
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 1 2 less Comparison 2 2 equal
[31, 9, 30]
Episode 4380, loss: -0.4011, total reward: -0.0600
len3 Comparison 1 2 less Comparison 1 0 less Comparison 0 2 more Comparison 0 1 more
[40, 16, 20]
Episode 4381, loss: -0.6832, total reward: -0.0400
len3 Swap
[76, 3, 19]
Episode 4382, loss: -0.6926, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more Comparison 0 2 more
[64, 86, 20]
Episode 4383, loss: -1.1308, total reward: -0.0600
len3 Swap
[97, 12, 50]
Episode 4384, loss: -0.6798, total reward: -1.0000
len3 Swap
[48, 80, 25]
Episode 4385, loss: -0.6460, total reward: -1.0000
len3 Swap
[56, 51, 44]
Episode 4386, loss: -0.6931, total reward: -1.0000
len3 Swap
[17, 21, 49]
Episode 4387, loss: -0.6291, total reward: -1.0000
len3 Swap
[60, 91, 48]
Episode 4

Episode 4455, loss: -1.0360, total reward: -0.0700
len3 Swap
[26, 79, 12]
Episode 4456, loss: -0.8636, total reward: -1.0000
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 0 2 more Comparison 1 0 less
[55, 1, 13]
Episode 4457, loss: -1.4723, total reward: -0.0500
len3 Comparison 0 0 equal Comparison 2 2 equal Swap
[6, 25, 32]
Episode 4458, loss: 0.5972, total reward: 9.9600
len3 Swap
[92, 66, 3]
Episode 4459, loss: -0.9401, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal
[19, 4, 67]
Episode 4460, loss: -0.3717, total reward: -0.0700
len3 Swap
[52, 8, 20]
Episode 4461, loss: -1.0050, total reward: -1.0000
len3 Swap
[33, 38, 38]
Episode 4462, loss: -0.9547, total reward: -1.0000
len3 Swap
[11, 23, 70]
Episode 4463, loss: -0.9401, total reward: -1.0000
len3 Comparison 2 1 less Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[34, 96, 36]
Episode 4464, loss: 0.5595, total reward: -0.0600
len3 Comparison 2 

Episode 4527, loss: -0.4034, total reward: -0.0600
len3 Swap
[58, 57, 28]
Episode 4528, loss: -0.8028, total reward: -1.0000
len3 Comparison 0 2 less Swap Comparison 0 2 more Comparison 2 1 less Comparison 2
[96, 79, 53]
Episode 4529, loss: -0.9892, total reward: -0.1300
len3 Swap
[98, 69, 79]
Episode 4530, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[25, 66, 75]
Episode 4531, loss: -0.6988, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[70, 76, 82]
Episode 4532, loss: -0.7113, total reward: -0.0800
len3 Swap
[69, 100, 52]
Episode 4533, loss: -0.8720, total reward: -1.0000
len3 Swap
[95, 46, 79]
Episode 4534, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less
[65, 57, 69]
Episode 4535, loss: -1.0245, total reward: -0.0700
len3 Comparison 2 2 equal Swap
[27, 31, 90]
Epi

len3 Comparison 2 2 equal Comparison 2 0 more Comparison 0 2 less Comparison 2 2 equal
[32, 24, 95]
Episode 4602, loss: -0.5640, total reward: -0.0600
len3 Comparison 1 2 more Swap Comparison 2 0 less Comparison 2 2 equal Comparison 0
[83, 1, 36]
Episode 4603, loss: 1.9505, total reward: 0.0600
len3 Comparison 0 1 less Comparison 2 2 equal Swap Comparison 0 2 more Comparison 2
[18, 29, 14]
Episode 4604, loss: -0.8103, total reward: -0.0500
len3 Comparison 0 2 less Comparison 0 2 less Comparison 0 2 less Comparison 0 2 less
[66, 55, 96]
Episode 4605, loss: -1.3768, total reward: -0.0400
len3 Swap
[95, 61, 83]
Episode 4606, loss: -0.7081, total reward: -1.0000
len3 Swap
[29, 12, 16]
Episode 4607, loss: -0.6931, total reward: -1.0000
len3 Swap
[14, 12, 34]
Episode 4608, loss: -0.8110, total reward: -1.0000
len3 Comparison 0 2 less Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 0
[31, 10, 88]
Episode 4609, loss: -1.4688, total reward: -0.0600
len3 Swap
[74, 76, 59]
Episode 4610,

Episode 4673, loss: -1.4021, total reward: -0.0600
len3 Comparison 1 2 more Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal
[62, 39, 9]
Episode 4674, loss: -1.5311, total reward: -0.0600
len3 Swap
[53, 12, 2]
Episode 4675, loss: -0.6897, total reward: -1.0000
len3 Swap
[32, 47, 52]
Episode 4676, loss: -0.6745, total reward: -1.0000
len3 Swap
[96, 40, 40]
Episode 4677, loss: -0.6701, total reward: -1.0000
len3 Comparison 0 2 more Comparison 2 1 less Comparison 0 2 more Comparison 2 2 equal
[78, 99, 61]
Episode 4678, loss: -1.3672, total reward: -0.0500
len3 Swap
[64, 37, 38]
Episode 4679, loss: -0.6036, total reward: -1.0000
len3 Comparison 2 0 more Comparison 2 2 equal Swap Swap
[19, 58, 44]
Episode 4680, loss: 0.6550, total reward: -1.0400
len3 Swap
[10, 43, 98]
Episode 4681, loss: -0.6463, total reward: -1.0000
len3 Swap
[28, 53, 86]
Episode 4682, loss: -0.6988, total reward: -1.0000
len3 Swap
[37, 9, 75]
Episode 4683, loss: -0.6697, total reward: -1.0000
len3 Swap
[54,

Episode 4747, loss: -0.7976, total reward: -0.0800
len3 Swap
[94, 82, 84]
Episode 4748, loss: -0.6931, total reward: -1.0000
len3 Swap
[55, 37, 22]
Episode 4749, loss: -0.6931, total reward: -1.0000
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[40, 38, 66]
Episode 4750, loss: -2.1337, total reward: -0.0600
len3 Swap
[34, 91, 76]
Episode 4751, loss: -0.8445, total reward: -1.0000
len3 Swap
[28, 21, 53]
Episode 4752, loss: -0.8720, total reward: -1.0000
len3 Swap
[45, 100, 81]
Episode 4753, loss: -0.9311, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[96, 26, 27]
Episode 4754, loss: 0.0678, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 0 less Swap Comparison 2
[27, 35, 34]
Episode 4755, loss: -0.9697, total reward: 0.0500
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal
[12, 89, 81]
Episode 4756, loss: -0.650

Episode 4817, loss: -1.4920, total reward: -0.0700
len3 Swap
[16, 40, 40]
Episode 4818, loss: -0.8045, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 1 equal
[47, 7, 86]
Episode 4819, loss: -0.1368, total reward: -0.0800
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[38, 67, 37]
Episode 4820, loss: -1.2986, total reward: -0.0700
len3 Swap
[70, 15, 39]
Episode 4821, loss: -0.8652, total reward: -1.0000
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 0 2 less Comparison 1 0 more
[24, 48, 68]
Episode 4822, loss: -1.4846, total reward: -0.0500
len3 Swap
[69, 74, 45]
Episode 4823, loss: -0.6931, total reward: -1.0000
len3 Swap
[37, 23, 29]
Episode 4824, loss: -0.8687, total reward: -1.0000
len3 Comparison 0 2 less Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal
[32, 43, 36]
Episode 4825, loss: -1.0266, total reward: -0.0600
len3 Swap
[11, 15, 87]
Episode 4826, loss: -0.77

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[95, 17, 53]
Episode 4890, loss: -1.0288, total reward: -0.0800
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[99, 31, 91]
Episode 4891, loss: -1.8599, total reward: -0.0700
len3 Swap
[34, 95, 100]
Episode 4892, loss: -0.6940, total reward: -1.0000
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 0 more Comparison 2 2 equal
[17, 36, 61]
Episode 4893, loss: -1.0217, total reward: -0.0600
len3 Swap
[19, 86, 86]
Episode 4894, loss: -0.6062, total reward: -1.0000
len3 Swap
[51, 9, 15]
Episode 4895, loss: -0.5598, total reward: -1.0000
len3 Swap
[91, 45, 58]
Episode 4896, loss: -0.6114, total reward: -1.0000
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 1 less Swap
[8, 31, 90]
Episode 4897, loss: -1.5210, total reward: 9.9600
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more
[90, 98, 38]
Episode 4898, l

Episode 4966, loss: -0.7138, total reward: -0.0700
len3 Swap
[60, 95, 70]
Episode 4967, loss: -0.6931, total reward: -1.0000
len3 Swap
[78, 46, 90]
Episode 4968, loss: -1.1160, total reward: -1.0000
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[19, 41, 72]
Episode 4969, loss: -2.2036, total reward: -0.0700
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more
[48, 17, 13]
Episode 4970, loss: -2.0551, total reward: -0.0600
len3 Swap
[41, 11, 31]
Episode 4971, loss: -1.0599, total reward: -1.0000
len3 Swap
[39, 32, 89]
Episode 4972, loss: -1.0345, total reward: -1.0000
len3 Comparison 0 0 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[2, 89, 78]
Episode 4973, loss: -0.5043, total reward: -0.0800
len3 Swap
[42, 10, 63]
Episode 4974, loss: -1.1339, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 0 less
[92, 85, 51]
Episode 4975, loss: -0.

len3 Comparison 2 2 equal Comparison 0 1 more Swap Comparison 2 2 equal Comparison 2
[79, 89, 86]
Episode 5040, loss: 0.1982, total reward: 0.0500
len3 Swap
[91, 31, 20]
Episode 5041, loss: -0.6931, total reward: -1.0000
len3 Swap
[6, 25, 52]
Episode 5042, loss: -1.0892, total reward: -1.0000
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 0 more Comparison 2 2 equal
[11, 73, 14]
Episode 5043, loss: -1.4125, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 1 less Comparison 2 2 equal Swap Comparison 2
[41, 68, 65]
Episode 5044, loss: -0.5484, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[34, 60, 46]
Episode 5045, loss: -0.5822, total reward: -0.0800
len3 Swap
[84, 32, 44]
Episode 5046, loss: -1.2809, total reward: -1.0000
len3 Swap
[79, 23, 80]
Episode 5047, loss: -1.1735, total reward: -1.0000
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[100, 45, 79]
Epi

len3 Comparison 2 2 equal Comparison 0 2 less Comparison 0 2 less Swap Comparison 0
[29, 11, 22]
Episode 5110, loss: -0.2551, total reward: -0.1400
len3 Swap
[33, 3, 72]
Episode 5111, loss: -0.8355, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 0
[57, 72, 45]
Episode 5112, loss: -0.9335, total reward: -0.0700
len3 Swap
[59, 20, 30]
Episode 5113, loss: -0.9247, total reward: -1.0000
len3 Swap
[16, 35, 4]
Episode 5114, loss: -0.8468, total reward: -1.0000
len3 Swap
[42, 54, 96]
Episode 5115, loss: -0.6931, total reward: -1.0000
len3 Swap
[30, 57, 29]
Episode 5116, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more Comparison 0 2 more
[73, 58, 30]
Episode 5117, loss: -0.8212, total reward: -0.0600
len3 Comparison 0 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[3, 49, 3]
Episode 5118, loss: -1.1657, total reward: -0.0800
len3 Swap
[6, 69, 28]
Episode 51

len3 Comparison 2 2 equal Comparison 0 2 less Comparison 1 2 more Swap
[17, 43, 91]
Episode 5190, loss: -0.5796, total reward: 9.9600
len3 Comparison 0 1 less Comparison 2 1 more Comparison 2 2 equal Comparison 0 2 less
[30, 65, 76]
Episode 5191, loss: -0.7284, total reward: -0.0500
len3 Swap
[60, 7, 28]
Episode 5192, loss: -1.3033, total reward: -1.0000
len3 Comparison 2 2 equal Swap
[10, 25, 34]
Episode 5193, loss: 0.5664, total reward: 9.9800
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 1 0 more Comparison 0 2 more
[31, 77, 10]
Episode 5194, loss: -0.8562, total reward: -0.0500
len3 Swap
[2, 81, 75]
Episode 5195, loss: -1.3215, total reward: -1.0000
len3 Swap
[94, 8, 83]
Episode 5196, loss: -1.4415, total reward: -1.0000
len3 Comparison 1 2 less Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal
[71, 1, 7]
Episode 5197, loss: -0.7863, total reward: -0.0600
len3 Swap
[76, 29, 55]
Episode 5198, loss: -1.3421, total reward: -1.0000
len3 Comparison 0 2 more Compar

len3 Comparison 2 2 equal Comparison 0 1 more Comparison 2 2 equal Swap Comparison 0
[61, 44, 19]
Episode 5260, loss: -0.6972, total reward: -0.0600
len3 Swap
[68, 100, 36]
Episode 5261, loss: -0.8132, total reward: -1.0000
len3 Swap
[47, 42, 78]
Episode 5262, loss: -0.7788, total reward: -1.0000
len3 Swap
[70, 79, 75]
Episode 5263, loss: -0.8175, total reward: -1.0000
len3 Comparison 2 2 equal Swap
[29, 84, 86]
Episode 5264, loss: -0.2144, total reward: 9.9800
len3 Comparison 2 2 equal Comparison 0 2 less Swap Comparison 1 1 equal Comparison 2
[61, 57, 42]
Episode 5265, loss: -0.5964, total reward: -0.1500
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[19, 44, 48]
Episode 5266, loss: -0.7144, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more
[53, 100, 85]
Episode 5267, loss: -0.3819, total reward: -0.0700
len3 Swap
[59, 85, 93]
Episode 5268, loss: -0.6931, total reward: -1.0000
len3 

Episode 5336, loss: -0.7092, total reward: -0.0700
len3 Swap
[98, 99, 80]
Episode 5337, loss: -1.0554, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal
[37, 20, 21]
Episode 5338, loss: -0.9696, total reward: -0.0700
len3 Swap
[35, 96, 16]
Episode 5339, loss: -1.0528, total reward: -1.0000
len3 Swap
[74, 86, 84]
Episode 5340, loss: -1.0105, total reward: -1.0000
len3 Comparison 0 2 less Comparison 0 2 less Comparison 0 2 less Comparison 0 2 less
[87, 23, 95]
Episode 5341, loss: -1.1474, total reward: -0.0400
len3 Swap
[10, 88, 86]
Episode 5342, loss: -0.6931, total reward: -1.0000
len3 Comparison 0 0 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more
[61, 99, 70]
Episode 5343, loss: 0.0751, total reward: -0.0700
len3 Swap
[51, 44, 94]
Episode 5344, loss: -1.1507, total reward: -1.0000
len3 Swap
[65, 4, 68]
Episode 5345, loss: -1.0316, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Compari

Episode 5406, loss: 0.0628, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[12, 14, 33]
Episode 5407, loss: -0.6409, total reward: -0.0800
len3 Comparison 2 0 more Comparison 2 2 equal Swap
[29, 47, 59]
Episode 5408, loss: 0.5914, total reward: 9.9700
len3 Swap
[75, 81, 18]
Episode 5409, loss: -0.7982, total reward: -1.0000
len3 Swap
[96, 95, 32]
Episode 5410, loss: -0.8205, total reward: -1.0000
len3 Comparison 1 1 equal Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal
[69, 33, 11]
Episode 5411, loss: -0.7492, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more
[88, 4, 14]
Episode 5412, loss: -0.7355, total reward: -0.0700
len3 Swap
[45, 1, 63]
Episode 5413, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal
[28, 77, 63]
Episode 5414, loss: -0.8014, total reward: -0.0700
len3 Swa

len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Comparison 2 0 more
[42, 66, 92]
Episode 5483, loss: -0.8122, total reward: -0.0600
len3 Swap
[11, 70, 72]
Episode 5484, loss: -0.6931, total reward: -1.0000
len3 Swap
[63, 93, 3]
Episode 5485, loss: -0.7692, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 0 equal Comparison 2 2 equal
[38, 17, 76]
Episode 5486, loss: -0.8585, total reward: -0.0800
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 0 more Comparison 2 2 equal
[44, 67, 91]
Episode 5487, loss: -0.6440, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 0 less
[83, 19, 33]
Episode 5488, loss: -0.8430, total reward: -0.0700
len3 Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 0 more
[96, 37, 97]
Episode 5489, loss: -1.5737, total reward: -0.0600
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[57, 36

Episode 5555, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less
[15, 98, 44]
Episode 5556, loss: -0.8726, total reward: -0.0700
len3 Swap
[10, 32, 60]
Episode 5557, loss: -0.8213, total reward: -1.0000
len3 Swap
[69, 39, 16]
Episode 5558, loss: -0.8364, total reward: -1.0000
len3 Swap
[66, 92, 86]
Episode 5559, loss: -0.6931, total reward: -1.0000
len3 Swap
[96, 11, 15]
Episode 5560, loss: -0.8575, total reward: -1.0000
len3 Comparison 0 2 more Comparison 0 1 less Comparison 0 0 equal Comparison 2 2 equal
[63, 88, 49]
Episode 5561, loss: -1.1208, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 0 more Comparison 2 2 equal Comparison 2 2 equal
[13, 2, 20]
Episode 5562, loss: -0.7219, total reward: -0.0700
len3 Comparison 0 2 less Comparison 2 2 equal Comparison 0 2 less Comparison 1 1 equal
[6, 11, 100]
Episode 5563, loss: -1.3382, total reward: -0.0600
len3 Swap
[46, 31, 56]
Episode 5564, loss: -0.69

len3 Comparison 2 1 more Comparison 0 2 less Comparison 2 2 equal Comparison 2 0 more
[28, 29, 38]
Episode 5629, loss: -0.8356, total reward: -0.0500
len3 Swap
[51, 1, 79]
Episode 5630, loss: -0.7694, total reward: -1.0000
len3 Comparison 0 2 more Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal
[49, 39, 4]
Episode 5631, loss: -1.3841, total reward: -0.0600
len3 Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less
[73, 75, 74]
Episode 5632, loss: -1.5900, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 1 2 less Comparison 2 2 equal
[6, 10, 49]
Episode 5633, loss: -0.9601, total reward: -0.0600
len3 Swap
[63, 57, 6]
Episode 5634, loss: -0.7594, total reward: -1.0000
len3 Comparison 0 2 less Comparison 0 2 less Swap Swap
[95, 39, 49]
Episode 5635, loss: 1.1670, total reward: -1.1200
len3 Swap
[80, 26, 94]
Episode 5636, loss: -0.7218, total reward: -1.0000
len3 Comparison 2 0 less Comparison 1 2 less Comparison 2 2 e

Episode 5699, loss: -0.0283, total reward: -0.0600
len3 Comparison 2 0 more Comparison 1 2 less Comparison 2 2 equal Comparison 1 1 equal
[3, 13, 79]
Episode 5700, loss: -0.6401, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more Comparison 0 2 more
[87, 32, 6]
Episode 5701, loss: -0.8650, total reward: -0.0600
len3 Swap
[56, 12, 6]
Episode 5702, loss: -0.7727, total reward: -1.0000
len3 Swap
[52, 62, 85]
Episode 5703, loss: -0.6933, total reward: -1.0000
len3 Swap
[31, 62, 6]
Episode 5704, loss: -0.7225, total reward: -1.0000
len3 Swap
[40, 98, 14]
Episode 5705, loss: -0.7055, total reward: -1.0000
len3 Swap
[61, 43, 30]
Episode 5706, loss: -0.6931, total reward: -1.0000
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 0 1 less
[64, 91, 27]
Episode 5707, loss: -1.4533, total reward: -0.0600
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 2 1 less Comparison 1 2 more
[60, 81, 21]
Episode 5708, loss: -1.5110, to

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 0 more Comparison 2 2 equal
[65, 76, 70]
Episode 5773, loss: -0.9306, total reward: -0.0700
len3 Swap
[16, 36, 61]
Episode 5774, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more Swap
[21, 54, 66]
Episode 5775, loss: -0.9973, total reward: 9.9500
len3 Swap
[55, 67, 83]
Episode 5776, loss: -0.7426, total reward: -1.0000
len3 Swap
[13, 21, 100]
Episode 5777, loss: -0.8812, total reward: -1.0000
len3 Comparison 2 1 less Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal
[57, 55, 32]
Episode 5778, loss: -0.9192, total reward: -0.0600
len3 Comparison 1 1 equal Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal
[94, 48, 8]
Episode 5779, loss: -0.9798, total reward: -0.0700
len3 Swap
[60, 80, 79]
Episode 5780, loss: -0.7857, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal
[36, 56, 5]
Episode 5781, 

Episode 5840, loss: -1.1108, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 0 2 less Swap Comparison 2 2 equal Comparison 2
[10, 3, 7]
Episode 5841, loss: -0.7383, total reward: -0.1500
len3 Swap
[46, 69, 11]
Episode 5842, loss: -0.5559, total reward: -1.0000
len3 Swap
[73, 34, 33]
Episode 5843, loss: -0.5900, total reward: -1.0000
len3 Swap
[60, 86, 44]
Episode 5844, loss: -0.5754, total reward: -1.0000
len3 Swap
[53, 23, 62]
Episode 5845, loss: -0.5871, total reward: -1.0000
len3 Swap
[72, 4, 99]
Episode 5846, loss: -0.7170, total reward: -1.0000
len3 Comparison 0 2 more Comparison 0 1 less Comparison 0 2 more Comparison 0 2 more
[38, 46, 3]
Episode 5847, loss: -1.2083, total reward: -0.0400
len3 Swap
[19, 72, 80]
Episode 5848, loss: -0.6931, total reward: -1.0000
len3 Swap
[69, 18, 80]
Episode 5849, loss: -0.6061, total reward: -1.0000
len3 Swap
[40, 46, 29]
Episode 5850, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 1 2 mo

Episode 5915, loss: -0.7980, total reward: -1.0000
len3 Comparison 0 2 more Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal
[50, 83, 34]
Episode 5916, loss: -1.0677, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[72, 95, 85]
Episode 5917, loss: -0.9311, total reward: -0.0800
len3 Comparison 0 1 more Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal
[58, 29, 8]
Episode 5918, loss: -0.8086, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 1 2 more Comparison 0 2 more
[100, 83, 70]
Episode 5919, loss: -0.8792, total reward: -0.0500
len3 Swap
[44, 68, 12]
Episode 5920, loss: -0.7580, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 1 less Comparison 0 2 more
[37, 7, 6]
Episode 5921, loss: -1.0512, total reward: -0.0600
len3 Comparison 0 2 more Comparison 0 0 equal Comparison 2 2 equal Comparison 2 2 equal
[36, 92, 13]
Episode 5922, loss: 

Episode 5987, loss: -1.6179, total reward: -0.0400
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less
[51, 84, 65]
Episode 5988, loss: -0.9032, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 0 less Comparison 2 2 equal Comparison 0 2 more
[74, 25, 5]
Episode 5989, loss: -0.8412, total reward: -0.0600
len3 Swap
[81, 94, 32]
Episode 5990, loss: -0.7733, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[7, 78, 88]
Episode 5991, loss: -0.9334, total reward: -0.0800
len3 Swap
[1, 100, 96]
Episode 5992, loss: -0.7999, total reward: -1.0000
len3 Swap
[89, 1, 31]
Episode 5993, loss: -0.7876, total reward: -1.0000
len3 Swap
[93, 45, 59]
Episode 5994, loss: -0.8249, total reward: -1.0000
len3 Swap
[55, 67, 78]
Episode 5995, loss: -0.7742, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 1 2 more Comparison 0 2 more
[17, 67, 15]
Episode 5996, loss: -0.9755

len3 Comparison 2 2 equal Comparison 0 2 less Comparison 0 2 less Comparison 2 2 equal
[14, 62, 88]
Episode 6060, loss: -0.8368, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 0 0 equal
[43, 63, 18]
Episode 6061, loss: -0.9010, total reward: -0.0800
len3 Comparison 0 2 less Comparison 1 2 more Comparison 2 2 equal Comparison 2 0 more
[5, 91, 39]
Episode 6062, loss: -1.1560, total reward: -0.0500
len3 Swap
[68, 76, 35]
Episode 6063, loss: -0.7370, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[58, 88, 18]
Episode 6064, loss: -0.9155, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal
[18, 68, 97]
Episode 6065, loss: -0.8350, total reward: -0.0700
len3 Swap
[95, 44, 1]
Episode 6066, loss: -0.7257, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal
[2, 35,

len3 Comparison 0 2 less Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal
[32, 49, 63]
Episode 6136, loss: -0.1983, total reward: -0.0600
len3 Swap
[5, 23, 62]
Episode 6137, loss: -0.8453, total reward: -1.0000
len3 Swap
[42, 70, 58]
Episode 6138, loss: -0.7703, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more
[95, 25, 10]
Episode 6139, loss: -0.9559, total reward: -0.0700
len3 Swap
[27, 99, 37]
Episode 6140, loss: -0.8648, total reward: -1.0000
len3 Swap
[66, 64, 22]
Episode 6141, loss: -0.9033, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 less Swap Comparison 2 2 equal Comparison 2
[39, 25, 6]
Episode 6142, loss: -0.5155, total reward: -0.1500
len3 Swap
[27, 62, 24]
Episode 6143, loss: -0.8374, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more
[40, 99, 13]
Episode 6144, loss: 0.0576, total reward: -0.0700
len3 Comparison 2 2 equal Com

len3 Comparison 2 1 more Comparison 0 2 less Comparison 0 2 less Comparison 2 2 equal
[5, 24, 91]
Episode 6209, loss: -0.4278, total reward: -0.0500
len3 Comparison 2 1 less Comparison 2 2 equal Comparison 1 2 more Comparison 0 2 more
[99, 81, 35]
Episode 6210, loss: -0.3205, total reward: -0.0500
len3 Comparison 1 2 less Comparison 0 2 more Comparison 0 2 more Comparison 0 2 more
[72, 14, 43]
Episode 6211, loss: -1.2436, total reward: -0.0400
len3 Swap
[41, 100, 77]
Episode 6212, loss: -0.6931, total reward: -1.0000
len3 Swap
[73, 27, 14]
Episode 6213, loss: -0.8386, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 1 2 more Comparison 2 2 equal
[91, 81, 4]
Episode 6214, loss: -0.8655, total reward: -0.0600
len3 Swap
[4, 59, 36]
Episode 6215, loss: -0.7654, total reward: -1.0000
len3 Swap
[5, 80, 75]
Episode 6216, loss: -0.7750, total reward: -1.0000
len3 Swap
[46, 29, 30]
Episode 6217, loss: -0.8376, total reward: -1.0000
len3 Comparison 2 2 equal Compari

Episode 6287, loss: 0.2727, total reward: 0.0500
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[82, 38, 7]
Episode 6288, loss: -0.8667, total reward: -0.0700
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 0 1 more Comparison 0 2 less
[58, 28, 99]
Episode 6289, loss: -1.7214, total reward: -0.0500
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[23, 57, 13]
Episode 6290, loss: -1.1352, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 0 2 less Comparison 2 2 equal
[15, 46, 67]
Episode 6291, loss: -0.9774, total reward: -0.0600
len3 Comparison 0 2 more Comparison 0 2 more Comparison 2 2 equal Comparison 0 2 more
[53, 37, 27]
Episode 6292, loss: -0.8910, total reward: -0.0500
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 1 2 less Comparison 2 2 equal
[18, 49, 90]
Episode 6293, loss: -0.0602, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 0 2 more 

Episode 6356, loss: -0.7739, total reward: -0.0700
len3 Swap
[52, 8, 26]
Episode 6357, loss: -0.8960, total reward: -1.0000
len3 Swap
[22, 44, 1]
Episode 6358, loss: -0.8382, total reward: -1.0000
len3 Comparison 0 2 less Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal
[52, 65, 71]
Episode 6359, loss: -1.1650, total reward: -0.0600
len3 Comparison 0 0 equal Comparison 0 2 less Comparison 0 2 less Comparison 2 2 equal
[41, 57, 58]
Episode 6360, loss: -0.4534, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal
[82, 70, 64]
Episode 6361, loss: -0.8225, total reward: -0.0700
len3 Comparison 0 2 more Comparison 0 2 more Comparison 0 1 more Comparison 2 2 equal
[99, 4, 81]
Episode 6362, loss: -1.1321, total reward: -0.0500
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Swap Swap
[79, 55, 36]
Episode 6363, loss: 0.5723, total reward: -1.0700
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 

len3 Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal
[33, 60, 38]
Episode 6426, loss: -0.8753, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal
[22, 22, 52]
Episode 6427, loss: -0.8707, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 1 2 more Comparison 2 2 equal
[54, 96, 25]
Episode 6428, loss: -1.0077, total reward: -0.0600
len3 Swap
[6, 86, 65]
Episode 6429, loss: -0.6948, total reward: -1.0000
len3 Swap
[4, 9, 19]
Episode 6430, loss: -0.7383, total reward: -1.0000
len3 Swap
[89, 81, 31]
Episode 6431, loss: -0.7925, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[30, 24, 88]
Episode 6432, loss: -0.8951, total reward: -0.0800
len3 Swap
[91, 85, 51]
Episode 6433, loss: -0.6931, total reward: -1.0000
len3 Swap
[80, 64, 46]
Episode 6434, loss: -0.6931, total reward: -1.0000
len3 Swap
[39, 30, 84]
Ep

len3 Comparison 2 1 more Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal
[61, 13, 92]
Episode 6501, loss: -0.0884, total reward: -0.0600
len3 Swap
[95, 91, 72]
Episode 6502, loss: -0.8527, total reward: -1.0000
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[37, 34, 32]
Episode 6503, loss: -1.7974, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal
[84, 54, 3]
Episode 6504, loss: 0.0922, total reward: -0.0700
len3 Swap
[16, 22, 19]
Episode 6505, loss: -0.7959, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[16, 68, 64]
Episode 6506, loss: -0.6937, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more
[59, 2, 10]
Episode 6507, loss: -0.7508, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 0 2 more Comparison 0 2 more
[89, 36, 

len3 Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 2
[34, 32, 39]
Episode 6574, loss: -2.1019, total reward: -0.0600
len3 Comparison 2 1 equal Comparison 0 1 more Swap Comparison 2 1 less Comparison 2
[48, 66, 48]
Episode 6575, loss: 1.0483, total reward: 0.0600
len3 Swap
[25, 30, 80]
Episode 6576, loss: -0.5025, total reward: -1.0000
len3 Swap
[50, 24, 25]
Episode 6577, loss: -0.5315, total reward: -1.0000
len3 Swap
[42, 66, 84]
Episode 6578, loss: -0.5163, total reward: -1.0000
len3 Swap
[63, 55, 69]
Episode 6579, loss: -0.5649, total reward: -1.0000
len3 Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 2 2 equal Comparison 2
[67, 76, 75]
Episode 6580, loss: -0.4175, total reward: -0.0700
len3 Swap
[79, 9, 1]
Episode 6581, loss: -0.6931, total reward: -1.0000
len3 Swap
[22, 74, 25]
Episode 6582, loss: -0.5703, total reward: -1.0000
len3 Swap
[90, 12, 98]
Episode 6583, loss: -0.5163, total reward: -1.0000
len3 Swap
[91, 56, 73]
Episode 6584, lo

Episode 6652, loss: -0.7054, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 2 2 equal Swap Comparison
[50, 61, 6]
Episode 6653, loss: -0.4808, total reward: -0.0800
len3 Swap
[33, 79, 69]
Episode 6654, loss: -0.8191, total reward: -1.0000
len3 Swap
[62, 54, 91]
Episode 6655, loss: -0.8633, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[22, 29, 55]
Episode 6656, loss: -0.7452, total reward: -0.0800
len3 Swap
[61, 87, 42]
Episode 6657, loss: -0.8624, total reward: -1.0000
len3 Swap
[27, 73, 91]
Episode 6658, loss: -0.8692, total reward: -1.0000
len3 Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[56, 91, 89]
Episode 6659, loss: -1.9262, total reward: -0.0700
len3 Swap
[21, 67, 100]
Episode 6660, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 1 less Comparison 0 2 more Swap Comparison 2 2 equal Comparison 2
[15, 69, 59]
Episode 6661, loss: 1.09

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[51, 83, 13]
Episode 6721, loss: -0.9966, total reward: -0.0800
len3 Comparison 2 1 less Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more
[94, 61, 54]
Episode 6722, loss: -0.5953, total reward: -0.0600
len3 Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 2 2 equal Comparison 2
[78, 31, 40]
Episode 6723, loss: -0.2845, total reward: -0.0700
len3 Swap
[84, 73, 11]
Episode 6724, loss: -0.5172, total reward: -1.0000
len3 Swap
[58, 11, 25]
Episode 6725, loss: -0.5624, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Comparison 2 1 more
[42, 60, 87]
Episode 6726, loss: -1.0915, total reward: -0.0600
len3 Swap
[20, 82, 38]
Episode 6727, loss: -0.4815, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal
[96, 44, 95]
Episode 6728, loss: -0.9959, total reward: -0.0700
len3 Comparison 2 2 e

len3 Comparison 2 0 more Comparison 2 2 equal Comparison 2 2 equal Swap
[15, 48, 78]
Episode 6800, loss: 0.5598, total reward: 9.9500
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[96, 5, 61]
Episode 6801, loss: 0.1497, total reward: -0.0800
len3 Swap
[17, 11, 36]
Episode 6802, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 less Swap Comparison 0 2 more Comparison 2
[44, 73, 8]
Episode 6803, loss: -0.4162, total reward: -0.1400
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 1 less Comparison 2 2 equal
[34, 87, 79]
Episode 6804, loss: -1.8742, total reward: -0.0600
len3 Comparison 2 2 equal Swap Swap
[16, 51, 14]
Episode 6805, loss: 0.5115, total reward: -1.0300
len3 Comparison 2 2 equal Swap Comparison 0 2 less Comparison 2 2 equal Comparison 2
[57, 13, 85]
Episode 6806, loss: -0.0204, total reward: -0.0600
len3 Swap
[80, 21, 18]
Episode 6807, loss: -0.9210, total reward: -1.0000
len3 Comparison 2 1 l

Episode 6869, loss: -1.5371, total reward: -0.0600
len3 Swap
[96, 97, 81]
Episode 6870, loss: -0.6343, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 1 less
[55, 84, 38]
Episode 6871, loss: -1.0649, total reward: -0.0700
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 0 2 more Comparison 1 2 more
[83, 67, 8]
Episode 6872, loss: -1.9908, total reward: -0.0500
len3 Comparison 2 2 equal Comparison 0 0 equal Comparison 0 2 less Comparison 2 2 equal
[42, 57, 65]
Episode 6873, loss: -0.9416, total reward: -0.0700
len3 Swap
[46, 8, 71]
Episode 6874, loss: -0.6053, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[92, 65, 7]
Episode 6875, loss: -1.0491, total reward: -0.0700
len3 Swap
[99, 31, 53]
Episode 6876, loss: -0.5830, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal Comparison 0 2 less
[51, 64, 96]
Episode 6877, loss: -1.2

len3 Comparison 2 2 equal Swap
[29, 76, 87]
Episode 6944, loss: -0.1416, total reward: 9.9800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[18, 72, 63]
Episode 6945, loss: -1.0078, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal
[25, 69, 64]
Episode 6946, loss: -0.5626, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal Comparison 2 0 less
[87, 7, 31]
Episode 6947, loss: -0.9617, total reward: -0.0600
len3 Comparison 1 2 more Comparison 0 2 more Comparison 1 2 more Comparison 2 1 less
[45, 89, 25]
Episode 6948, loss: -1.6349, total reward: -0.0400
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[89, 59, 6]
Episode 6949, loss: -1.0863, total reward: -0.0800
len3 Swap
[63, 21, 85]
Episode 6950, loss: -0.6445, total reward: -1.0000
len3 Comparison 1 2 more Comparison 2 2 equal Swap Comparison 2 2 equal Comp

Episode 7015, loss: -0.8111, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 1 2 more Comparison 2 2 equal
[39, 46, 21]
Episode 7016, loss: -0.8376, total reward: -0.0600
len3 Swap
[98, 41, 49]
Episode 7017, loss: -0.7838, total reward: -1.0000
len3 Swap
[62, 52, 35]
Episode 7018, loss: -0.8123, total reward: -1.0000
len3 Swap
[62, 47, 27]
Episode 7019, loss: -0.7176, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal
[3, 2, 7]
Episode 7020, loss: -0.9064, total reward: -0.0700
len3 Comparison 2 2 equal Swap
[5, 37, 80]
Episode 7021, loss: -0.1221, total reward: 9.9800
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 0 2 less Swap Comparison 0
[74, 32, 12]
Episode 7022, loss: -0.8521, total reward: -0.1400
len3 Swap
[41, 23, 36]
Episode 7023, loss: -0.7934, total reward: -1.0000
len3 Swap
[13, 77, 91]
Episode 7024, loss: -0.7530, total reward: -1.0000
len3 Swap
[90, 47, 36]
Episode 7025, l

Episode 7090, loss: -0.6931, total reward: -1.0000
len3 Swap
[40, 76, 75]
Episode 7091, loss: -0.8043, total reward: -1.0000
len3 Swap
[64, 99, 20]
Episode 7092, loss: -0.7583, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 0 more Comparison 2 2 equal
[60, 39, 77]
Episode 7093, loss: -0.8757, total reward: -0.0600
len3 Swap
[84, 76, 82]
Episode 7094, loss: -0.7978, total reward: -1.0000
len3 Swap
[61, 37, 52]
Episode 7095, loss: -0.7455, total reward: -1.0000
len3 Swap
[11, 14, 95]
Episode 7096, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 equal Comparison 2 2 equal Comparison 2 2 equal
[94, 82, 82]
Episode 7097, loss: -0.8781, total reward: -0.0800
len3 Swap
[77, 67, 47]
Episode 7098, loss: -0.7325, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal Comparison 2 0 less
[55, 49, 7]
Episode 7099, loss: -0.6557, total reward: -0.0600
len3 Swap
[44, 82, 10]
Episode 7100, loss: -0.7239,

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal
[21, 13, 35]
Episode 7164, loss: -0.9749, total reward: -0.0700
len3 Swap
[70, 73, 13]
Episode 7165, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[45, 8, 71]
Episode 7166, loss: -0.9797, total reward: -0.0800
len3 Swap
[29, 35, 82]
Episode 7167, loss: -0.6794, total reward: -1.0000
len3 Swap
[82, 41, 18]
Episode 7168, loss: -0.6633, total reward: -1.0000
len3 Comparison 2 0 less Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal
[30, 89, 13]
Episode 7169, loss: -0.7668, total reward: -0.0600
len3 Swap
[76, 64, 38]
Episode 7170, loss: -0.6562, total reward: -1.0000
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 1 0 more Comparison 1 2 more
[87, 100, 47]
Episode 7171, loss: -1.7170, total reward: -0.0500
len3 Swap
[17, 4, 73]
Episode 7172, loss: -0.6800, total reward: -1.0000
len3 Swap
[37, 3, 21]
Epi

Episode 7242, loss: -0.8494, total reward: -1.0000
len3 Swap
[95, 71, 45]
Episode 7243, loss: -1.0238, total reward: -1.0000
len3 Swap
[98, 93, 73]
Episode 7244, loss: -0.8954, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[40, 20, 61]
Episode 7245, loss: -0.9065, total reward: -0.0700
len3 Swap
[65, 86, 91]
Episode 7246, loss: -0.9019, total reward: -1.0000
len3 Swap
[44, 38, 71]
Episode 7247, loss: -0.9381, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal
[80, 59, 79]
Episode 7248, loss: -0.6166, total reward: -0.0700
len3 Swap
[66, 99, 23]
Episode 7249, loss: -1.0095, total reward: -1.0000
len3 Swap
[45, 12, 3]
Episode 7250, loss: -1.1181, total reward: -1.0000
len3 Swap
[74, 76, 71]
Episode 7251, loss: -1.0073, total reward: -1.0000
len3 Swap
[31, 72, 60]
Episode 7252, loss: -1.0473, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 more Comparis

Episode 7311, loss: -0.5902, total reward: -0.0500
len3 Comparison 1 2 more Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal
[17, 84, 18]
Episode 7312, loss: -1.0013, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 0 less Comparison 2 2 equal Comparison 1 2 less
[54, 19, 36]
Episode 7313, loss: -0.6806, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal
[72, 95, 13]
Episode 7314, loss: -0.7602, total reward: -0.0700
len3 Swap
[68, 53, 49]
Episode 7315, loss: -0.9338, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal
[64, 47, 67]
Episode 7316, loss: -0.9512, total reward: -0.0700
len3 Comparison 2 0 less Comparison 1 2 more Comparison 2 2 equal Comparison 1 2 more
[71, 66, 20]
Episode 7317, loss: -0.7123, total reward: -0.0500
len3 Swap
[47, 21, 82]
Episode 7318, loss: -0.9359, total reward: -1.0000
len3 Comparison 1 2 less Comparison 2 2 equal

len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more
[75, 57, 42]
Episode 7388, loss: -1.2533, total reward: -0.0600
len3 Swap
[27, 81, 55]
Episode 7389, loss: -0.6931, total reward: -1.0000
len3 Swap
[63, 74, 57]
Episode 7390, loss: -0.8131, total reward: -1.0000
len3 Swap
[37, 94, 35]
Episode 7391, loss: -0.8041, total reward: -1.0000
len3 Swap
[7, 80, 86]
Episode 7392, loss: -0.7917, total reward: -1.0000
len3 Swap
[94, 17, 22]
Episode 7393, loss: -0.9217, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 1 more Comparison 2 2 equal Comparison 2 2 equal
[69, 17, 50]
Episode 7394, loss: -0.7787, total reward: -0.0700
len3 Comparison 2 1 less Comparison 1 2 more Comparison 0 2 more Comparison 1 2 more
[92, 92, 83]
Episode 7395, loss: -0.5068, total reward: -0.0400
len3 Comparison 0 2 more Comparison 1 0 more Comparison 2 2 equal Comparison 2 2 equal
[57, 99, 2]
Episode 7396, loss: -1.1343, total reward: -0.0600
len3 Comparison 2 2 equal Com

len3 Comparison 0 2 less Comparison 2 2 equal Comparison 1 2 less Comparison 1 2 less
[28, 27, 89]
Episode 7459, loss: -1.7916, total reward: -0.0500
len3 Comparison 0 2 equal Comparison 0 2 equal Comparison 0 2 equal Swap Comparison 2
[89, 25, 89]
Episode 7460, loss: -0.8717, total reward: -0.0700
len3 Swap
[66, 79, 40]
Episode 7461, loss: -0.6965, total reward: -1.0000
len3 Swap
[65, 81, 29]
Episode 7462, loss: -0.6822, total reward: -1.0000
len3 Swap
[40, 50, 95]
Episode 7463, loss: -0.6808, total reward: -1.0000
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 1 2 less Comparison 0 0 equal
[3, 2, 94]
Episode 7464, loss: -1.1989, total reward: -0.0600
len3 Swap
[79, 55, 28]
Episode 7465, loss: -0.7295, total reward: -1.0000
len3 Swap
[94, 85, 50]
Episode 7466, loss: -0.6931, total reward: -1.0000
len3 Swap
[95, 27, 44]
Episode 7467, loss: -0.7477, total reward: -1.0000
len3 Swap
[58, 36, 15]
Episode 7468, loss: -0.7049, total reward: -1.0000
len3 Comparison 0 2 less Comparis

len3 Comparison 1 2 more Comparison 1 2 more Comparison 2 2 equal Comparison 0 2 less
[14, 83, 45]
Episode 7538, loss: -0.9932, total reward: -0.0500
len3 Comparison 2 2 equal Swap Comparison 0 1 more Comparison 2 2 equal Comparison 2
[23, 2, 12]
Episode 7539, loss: -0.1119, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[38, 59, 99]
Episode 7540, loss: -0.9180, total reward: -0.0800
len3 Swap
[3, 38, 86]
Episode 7541, loss: -0.9298, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less
[19, 51, 36]
Episode 7542, loss: -0.8601, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[11, 43, 84]
Episode 7543, loss: -1.0896, total reward: -0.0800
len3 Swap
[32, 15, 2]
Episode 7544, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Swap Swap
[87, 82, 18]
Episode 7545, loss: 0.5193, total reward: -1.0300

len3 Comparison 1 2 more Comparison 1 2 more Swap
[55, 81, 89]
Episode 7608, loss: -0.5694, total reward: 9.9800
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 1 2 less Comparison 1 2 less
[71, 11, 58]
Episode 7609, loss: -1.6629, total reward: -0.0500
len3 Swap
[2, 92, 83]
Episode 7610, loss: -0.7912, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 0 2 more Comparison 2 2 equal
[89, 27, 55]
Episode 7611, loss: -0.9039, total reward: -0.0600
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more
[44, 96, 32]
Episode 7612, loss: -1.1961, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 0 2 more Swap
[13, 56, 74]
Episode 7613, loss: -0.9962, total reward: 9.9600
len3 Comparison 0 2 more Comparison 1 2 more Comparison 0 2 more Comparison 2 2 equal
[90, 35, 24]
Episode 7614, loss: -1.3588, total reward: -0.0500
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 

Episode 7680, loss: -0.9981, total reward: -0.0700
len3 Swap
[85, 19, 27]
Episode 7681, loss: -0.7395, total reward: -1.0000
len3 Swap
[45, 21, 97]
Episode 7682, loss: -0.7896, total reward: -1.0000
len3 Swap
[8, 56, 26]
Episode 7683, loss: -0.7608, total reward: -1.0000
len3 Swap
[34, 66, 79]
Episode 7684, loss: -0.7416, total reward: -1.0000
len3 Swap
[75, 81, 14]
Episode 7685, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[79, 47, 99]
Episode 7686, loss: -1.2437, total reward: -0.0800
len3 Swap
[78, 61, 64]
Episode 7687, loss: -0.8044, total reward: -1.0000
len3 Comparison 1 2 more Comparison 0 2 less Comparison 0 2 less Comparison 1 2 more
[33, 92, 41]
Episode 7688, loss: -1.1722, total reward: -0.0400
len3 Swap
[89, 24, 28]
Episode 7689, loss: -0.8043, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal
[96, 96, 26]
Episode 7690, loss: -0.7987, 

Episode 7754, loss: -1.7359, total reward: -0.0500
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[76, 9, 4]
Episode 7755, loss: -2.1969, total reward: -0.0700
len3 Comparison 2 1 less Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[99, 99, 93]
Episode 7756, loss: -0.2609, total reward: -0.0600
len3 Swap
[79, 65, 75]
Episode 7757, loss: -0.6931, total reward: -1.0000
len3 Swap
[30, 66, 73]
Episode 7758, loss: -0.7499, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 0 2 more Comparison 2 2 equal
[75, 55, 23]
Episode 7759, loss: -0.8605, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 equal Comparison 2 2 equal
[40, 48, 48]
Episode 7760, loss: -0.8652, total reward: -0.0800
len3 Comparison 2 0 less Comparison 2 1 less Comparison 2 2 equal Comparison 2 2 equal
[93, 75, 2]
Episode 7761, loss: -0.7635, total reward: -0.0600
len3 Swap
[43, 38, 97]
Episode 7762, loss: -0.

len3 Comparison 2 2 equal Comparison 0 2 more Comparison 2 1 less Comparison 1 2 more
[38, 33, 32]
Episode 7826, loss: -1.0622, total reward: -0.0500
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 0 2 less Comparison 2 2 equal
[66, 94, 82]
Episode 7827, loss: -1.0020, total reward: -0.0600
len3 Swap
[41, 18, 61]
Episode 7828, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[21, 41, 43]
Episode 7829, loss: -0.9512, total reward: -0.0800
len3 Swap
[26, 76, 25]
Episode 7830, loss: -0.6047, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 1 more Comparison 2 2 equal Comparison 2 2 equal
[89, 3, 55]
Episode 7831, loss: -0.9248, total reward: -0.0700
len3 Comparison 0 2 less Comparison 1 2 less Comparison 1 2 less Comparison 2 2 equal
[78, 58, 98]
Episode 7832, loss: -1.2758, total reward: -0.0500
len3 Swap
[42, 78, 8]
Episode 7833, loss: -0.6402, total reward: -1.0000
len3 Swap
[71, 16, 75]
E

Episode 7899, loss: -0.9308, total reward: -0.0700
len3 Swap
[12, 21, 83]
Episode 7900, loss: -0.7172, total reward: -1.0000
len3 Swap
[55, 91, 27]
Episode 7901, loss: -0.7278, total reward: -1.0000
len3 Swap
[31, 50, 83]
Episode 7902, loss: -0.7229, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less Comparison 1 2 less
[20, 12, 93]
Episode 7903, loss: -0.9362, total reward: -0.0600
len3 Swap
[63, 28, 52]
Episode 7904, loss: -0.6678, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 0 less
[90, 68, 88]
Episode 7905, loss: -0.8372, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more
[88, 38, 69]
Episode 7906, loss: -0.8567, total reward: -0.0700
len3 Comparison 0 2 less Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal
[43, 20, 88]
Episode 7907, loss: -1.7694, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal 

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more Comparison 1 2 more
[15, 89, 76]
Episode 7973, loss: -0.8471, total reward: -0.0600
len3 Swap
[54, 44, 58]
Episode 7974, loss: -0.7533, total reward: -1.0000
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less
[44, 10, 96]
Episode 7975, loss: -1.3427, total reward: -0.0600
len3 Swap
[32, 4, 24]
Episode 7976, loss: -0.6931, total reward: -1.0000
len3 Swap
[62, 46, 90]
Episode 7977, loss: -0.7399, total reward: -1.0000
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 1 2 less Comparison 1 2 less
[91, 66, 76]
Episode 7978, loss: -1.3162, total reward: -0.0500
len3 Swap
[46, 96, 44]
Episode 7979, loss: -0.8142, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 0 more
[54, 100, 71]
Episode 7980, loss: -0.8214, total reward: -0.0700
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[92, 69, 85]

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[45, 66, 67]
Episode 8048, loss: -0.7932, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal
[9, 3, 72]
Episode 8049, loss: -0.7675, total reward: -0.0700
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[35, 90, 37]
Episode 8050, loss: -1.2690, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[38, 1, 6]
Episode 8051, loss: -0.7824, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Swap
[39, 94, 94]
Episode 8052, loss: -0.6047, total reward: 9.9600
len3 Comparison 2 2 equal Comparison 0 2 less Swap Comparison 2 2 equal Comparison 2
[93, 97, 28]
Episode 8053, loss: -0.4832, total reward: -0.1500
len3 Swap
[10, 4, 38]
Episode 8054, loss: -0.8095, total reward: -1.0000
len3 Swap
[94, 85, 79]
Episode 8055, loss: -0.7939, total r

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 1 more Comparison 2 2 equal
[80, 3, 37]
Episode 8119, loss: -0.9392, total reward: -0.0700
len3 Swap
[86, 47, 47]
Episode 8120, loss: -0.6424, total reward: -1.0000
len3 Swap
[41, 63, 80]
Episode 8121, loss: -0.6931, total reward: -1.0000
len3 Swap
[11, 18, 67]
Episode 8122, loss: -0.6301, total reward: -1.0000
len3 Swap
[92, 32, 15]
Episode 8123, loss: -0.6077, total reward: -1.0000
len3 Swap
[27, 55, 41]
Episode 8124, loss: -0.6560, total reward: -1.0000
len3 Comparison 0 2 more Comparison 1 0 more Comparison 2 2 equal Comparison 2 2 equal
[50, 55, 29]
Episode 8125, loss: -0.1921, total reward: -0.0600
len3 Swap
[54, 8, 48]
Episode 8126, loss: -0.6493, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more
[18, 63, 10]
Episode 8127, loss: -0.8703, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal
[36, 13, 9]
Epi

Episode 8194, loss: -0.6931, total reward: -1.0000
len3 Swap
[42, 91, 70]
Episode 8195, loss: -0.7594, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 0 more Comparison 2 2 equal
[20, 78, 23]
Episode 8196, loss: -0.9155, total reward: -0.0700
len3 Swap
[28, 21, 79]
Episode 8197, loss: -0.8282, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[29, 53, 51]
Episode 8198, loss: -0.7223, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[91, 92, 32]
Episode 8199, loss: -0.7160, total reward: -0.0800
len3 Swap
[47, 59, 84]
Episode 8200, loss: -0.8048, total reward: -1.0000
len3 Swap
[20, 97, 61]
Episode 8201, loss: -0.9048, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 0 2 more Comparison 2 2 equal
[73, 66, 6]
Episode 8202, loss: -0.7894, total reward: -0.0600
len3 Swap
[23, 64, 16]
Episode 8203, loss: -0

Episode 8265, loss: -0.7623, total reward: -0.0700
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[99, 27, 31]
Episode 8266, loss: -1.6913, total reward: -0.0700
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[30, 29, 4]
Episode 8267, loss: -1.2136, total reward: -0.0700
len3 Swap
[98, 34, 87]
Episode 8268, loss: -0.6931, total reward: -1.0000
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 1 2 more Swap Comparison 2
[61, 5, 18]
Episode 8269, loss: -1.1770, total reward: 0.0600
len3 Swap
[51, 9, 90]
Episode 8270, loss: -0.8607, total reward: -1.0000
len3 Swap
[54, 80, 5]
Episode 8271, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal
[43, 72, 52]
Episode 8272, loss: -0.6995, total reward: -0.0700
len3 Swap
[36, 16, 34]
Episode 8273, loss: -0.8213, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Compari

Episode 8338, loss: -0.9384, total reward: -0.0700
len3 Swap
[25, 43, 3]
Episode 8339, loss: -0.6190, total reward: -1.0000
len3 Comparison 1 2 less Comparison 2 2 equal Swap
[6, 38, 77]
Episode 8340, loss: -0.8345, total reward: 9.9700
len3 Swap
[83, 99, 11]
Episode 8341, loss: -0.6931, total reward: -1.0000
len3 Swap
[61, 28, 90]
Episode 8342, loss: -0.6823, total reward: -1.0000
len3 Swap
[6, 53, 61]
Episode 8343, loss: -0.5990, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[79, 45, 92]
Episode 8344, loss: -0.9611, total reward: -0.0800
len3 Swap
[18, 70, 65]
Episode 8345, loss: -0.6338, total reward: -1.0000
len3 Swap
[36, 4, 30]
Episode 8346, loss: -0.5536, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 less Swap Comparison 2 2 equal Comparison 2
[49, 71, 15]
Episode 8347, loss: -0.5674, total reward: -0.1500
len3 Comparison 2 2 equal Comparison 1 1 equal Comparison 2 2 equal Comparison 2 2 equal
[99,

len3 Comparison 2 2 equal Comparison 0 2 less Comparison 1 0 more Comparison 2 2 equal
[21, 31, 38]
Episode 8416, loss: -0.8103, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[64, 15, 92]
Episode 8417, loss: -0.7364, total reward: -0.0800
len3 Swap
[76, 27, 45]
Episode 8418, loss: -0.8790, total reward: -1.0000
len3 Swap
[1, 47, 92]
Episode 8419, loss: -0.8463, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[54, 62, 85]
Episode 8420, loss: -0.6986, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[20, 56, 51]
Episode 8421, loss: -0.7284, total reward: -0.0800
len3 Swap
[48, 99, 49]
Episode 8422, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal
[19, 53, 62]
Episode 8423, loss: -0.7598, total reward: -0.0700
len3 Swap
[93, 44

Episode 8484, loss: -0.6379, total reward: -1.0000
len3 Swap
[76, 54, 72]
Episode 8485, loss: -0.6931, total reward: -1.0000
len3 Swap
[17, 41, 22]
Episode 8486, loss: -0.5178, total reward: -1.0000
len3 Swap
[13, 71, 47]
Episode 8487, loss: -0.5726, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[7, 48, 42]
Episode 8488, loss: -1.0188, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal
[20, 29, 56]
Episode 8489, loss: -0.8580, total reward: -0.0700
len3 Swap
[44, 71, 9]
Episode 8490, loss: -0.6401, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[14, 43, 24]
Episode 8491, loss: -0.8855, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal
[50, 84, 2]
Episode 8492, loss: -1.0979, total reward: -0.0700
len3 Swap
[51, 25, 94]
Episode 8493, loss: -0.

len3 Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 2 2 equal Comparison 1
[15, 54, 41]
Episode 8564, loss: 0.0622, total reward: -0.0700
len3 Swap
[39, 95, 2]
Episode 8565, loss: -0.8102, total reward: -1.0000
len3 Swap
[46, 17, 59]
Episode 8566, loss: -0.8162, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[92, 98, 46]
Episode 8567, loss: -0.6240, total reward: -0.0800
len3 Swap
[47, 17, 77]
Episode 8568, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more
[93, 38, 68]
Episode 8569, loss: 0.1697, total reward: -0.0700
len3 Swap
[2, 82, 3]
Episode 8570, loss: -0.7963, total reward: -1.0000
len3 Swap
[61, 37, 9]
Episode 8571, loss: -0.9103, total reward: -1.0000
len3 Swap
[20, 70, 30]
Episode 8572, loss: -0.8719, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[90, 20, 94]
Episo

Episode 8632, loss: -0.8834, total reward: -0.0700
len3 Comparison 2 1 less Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal
[34, 69, 53]
Episode 8633, loss: -0.7198, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 1
[76, 34, 76]
Episode 8634, loss: -1.0027, total reward: -0.0700
len3 Swap
[69, 94, 11]
Episode 8635, loss: -0.6861, total reward: -1.0000
len3 Swap
[8, 92, 70]
Episode 8636, loss: -0.6638, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[19, 20, 19]
Episode 8637, loss: -0.9107, total reward: -0.0800
len3 Swap
[56, 63, 95]
Episode 8638, loss: -0.6332, total reward: -1.0000
len3 Swap
[92, 19, 74]
Episode 8639, loss: -0.7091, total reward: -1.0000
len3 Swap
[54, 18, 53]
Episode 8640, loss: -0.6569, total reward: -1.0000
len3 Swap
[47, 51, 54]
Episode 8641, loss: -0.6062, total reward: -1.0000
len3 Swap
[77, 10, 64]
Episode 8642, loss: -0.6073, 

len3 Comparison 2 2 equal Comparison 1 2 less Swap Comparison 2 2 equal Comparison 2
[72, 26, 11]
Episode 8705, loss: -0.4874, total reward: -0.1500
len3 Swap
[6, 17, 86]
Episode 8706, loss: -0.6337, total reward: -1.0000
len3 Swap
[57, 35, 15]
Episode 8707, loss: -0.6158, total reward: -1.0000
len3 Swap
[66, 55, 4]
Episode 8708, loss: -0.6346, total reward: -1.0000
len3 Swap
[13, 33, 33]
Episode 8709, loss: -0.5843, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 more Swap Comparison 2 2 equal Comparison 2
[21, 12, 48]
Episode 8710, loss: 0.5047, total reward: 0.0500
len3 Swap
[39, 19, 25]
Episode 8711, loss: -0.5995, total reward: -1.0000
len3 Swap
[34, 23, 72]
Episode 8712, loss: -0.5914, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more
[24, 97, 56]
Episode 8713, loss: -0.9391, total reward: -0.0700
len3 Swap
[87, 12, 29]
Episode 8714, loss: -0.6481, total reward: -1.0000
len3 Swap
[23, 27, 59]
Episode 8715,

Episode 8778, loss: -0.7750, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less Comparison 1 2 less
[90, 28, 100]
Episode 8779, loss: -1.2097, total reward: -0.0600
len3 Comparison 0 2 less Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal
[93, 76, 98]
Episode 8780, loss: -1.3664, total reward: -0.0600
len3 Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[34, 88, 50]
Episode 8781, loss: -2.3644, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[11, 23, 25]
Episode 8782, loss: -0.8662, total reward: -0.0800
len3 Comparison 0 0 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[6, 11, 78]
Episode 8783, loss: -1.5220, total reward: -0.0800
len3 Swap
[98, 43, 30]
Episode 8784, loss: -0.6508, total reward: -1.0000
len3 Swap
[42, 32, 28]
Episode 8785, loss: -0.6991, total reward: -1.0000
len3 Comparison 1 2 less Comparison 1 2 l

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[73, 45, 78]
Episode 8851, loss: -1.2434, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 1 less Comparison 2 2 equal Swap Comparison 2
[10, 89, 28]
Episode 8852, loss: -0.7625, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 0 more
[39, 51, 80]
Episode 8853, loss: -0.8717, total reward: -0.0700
len3 Swap
[75, 78, 40]
Episode 8854, loss: -0.6982, total reward: -1.0000
len3 Swap
[58, 35, 44]
Episode 8855, loss: -0.7062, total reward: -1.0000
len3 Swap
[66, 8, 1]
Episode 8856, loss: -0.6177, total reward: -1.0000
len3 Swap
[15, 43, 98]
Episode 8857, loss: -0.6931, total reward: -1.0000
len3 Swap
[30, 15, 3]
Episode 8858, loss: -0.7051, total reward: -1.0000
len3 Swap
[98, 12, 100]
Episode 8859, loss: -0.6171, total reward: -1.0000
len3 Swap
[91, 78, 74]
Episode 8860, loss: -0.6825, total reward: -1.0000
len3 Comparison 2 2 equal Comp

len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 0 more Swap Swap
[51, 99, 24]
Episode 8928, loss: -0.2854, total reward: -1.1400
len3 Swap
[94, 19, 86]
Episode 8929, loss: -0.7767, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 less Swap Comparison 2 2 equal Comparison 2
[59, 73, 58]
Episode 8930, loss: -0.4449, total reward: -0.1500
len3 Swap
[47, 20, 9]
Episode 8931, loss: -0.8313, total reward: -1.0000
len3 Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 2 2 equal Swap Comparison
[100, 11, 48]
Episode 8932, loss: -0.4508, total reward: -0.0800
len3 Swap
[83, 55, 82]
Episode 8933, loss: -0.8212, total reward: -1.0000
len3 Comparison 1 2 less Swap Comparison 2 2 equal Comparison 2 2 equal Comparison 0
[66, 90, 26]
Episode 8934, loss: -1.3945, total reward: -0.1500
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[69, 46, 99]
Episode 8935, loss: -0.7806, total reward: -0.0700
len3 Comparison 2 2 equal Compariso

Episode 8999, loss: -1.2356, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[66, 18, 46]
Episode 9000, loss: -0.9274, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[50, 51, 69]
Episode 9001, loss: -0.8578, total reward: -0.0700
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal
[70, 95, 29]
Episode 9002, loss: -1.5191, total reward: -0.0600
len3 Swap
[20, 84, 39]
Episode 9003, loss: -0.8059, total reward: -1.0000
len3 Swap
[56, 1, 33]
Episode 9004, loss: -0.7646, total reward: -1.0000
len3 Swap
[81, 54, 59]
Episode 9005, loss: -0.6962, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 1 2 more Comparison 2 2 equal
[85, 43, 9]
Episode 9006, loss: -0.9461, total reward: -0.0600
len3 Swap
[73, 26, 70]
Episode 9007, loss: -0.6931, total reward: -1.0000
len3 Comparison 1 2 more Comparison 2 2 equal Com

Episode 9073, loss: -0.8337, total reward: -0.0600
len3 Swap
[49, 18, 37]
Episode 9074, loss: -0.8542, total reward: -1.0000
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 1 1 equal Comparison 2 2 equal
[93, 64, 73]
Episode 9075, loss: -1.3924, total reward: -0.0700
len3 Comparison 0 2 more Comparison 2 1 less Comparison 2 2 equal Comparison 2 2 equal
[55, 98, 49]
Episode 9076, loss: -1.6513, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal
[33, 2, 35]
Episode 9077, loss: -0.7489, total reward: -0.0700
len3 Swap
[83, 58, 61]
Episode 9078, loss: -0.7701, total reward: -1.0000
len3 Swap
[31, 75, 39]
Episode 9079, loss: -0.6931, total reward: -1.0000
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 0 2 less Comparison 2 0 more
[50, 65, 66]
Episode 9080, loss: -1.4018, total reward: -0.0500
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal
[74, 48, 69]
Episode 9081, loss: -0

Episode 9148, loss: -0.7713, total reward: -1.0000
len3 Swap
[84, 32, 87]
Episode 9149, loss: -0.8807, total reward: -1.0000
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[28, 93, 6]
Episode 9150, loss: -1.5567, total reward: -0.0700
len3 Comparison 2 1 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[29, 65, 91]
Episode 9151, loss: 0.4723, total reward: -0.0700
len3 Swap
[56, 25, 22]
Episode 9152, loss: -0.8293, total reward: -1.0000
len3 Swap
[85, 31, 51]
Episode 9153, loss: -0.7658, total reward: -1.0000
len3 Swap
[1, 21, 68]
Episode 9154, loss: -0.8906, total reward: -1.0000
len3 Swap
[56, 32, 48]
Episode 9155, loss: -0.6931, total reward: -1.0000
len3 Comparison 1 2 less Comparison 2 1 more Comparison 0 2 more Comparison 0 2 more
[74, 2, 52]
Episode 9156, loss: -1.4209, total reward: -0.0400
len3 Swap
[68, 69, 93]
Episode 9157, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Compariso

Episode 9219, loss: -0.1716, total reward: -0.0600
len3 Swap
[46, 35, 25]
Episode 9220, loss: -0.8055, total reward: -1.0000
len3 Swap
[53, 87, 91]
Episode 9221, loss: -0.8124, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal Comparison 0 2 less
[69, 71, 79]
Episode 9222, loss: -0.7024, total reward: -0.0600
len3 Swap
[5, 11, 32]
Episode 9223, loss: -0.8492, total reward: -1.0000
len3 Swap
[36, 62, 28]
Episode 9224, loss: -0.8703, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 0 more Comparison 1 2 less
[13, 28, 76]
Episode 9225, loss: -0.8567, total reward: -0.0600
len3 Swap
[15, 91, 42]
Episode 9226, loss: -0.8481, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Comparison 0 2 less
[5, 85, 41]
Episode 9227, loss: -0.7769, total reward: -0.0600
len3 Comparison 1 2 less Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal
[64, 15, 50]
Episode 9228, loss: -1.0425,

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more Comparison 0 2 more
[15, 9, 6]
Episode 9293, loss: -0.6681, total reward: -0.0600
len3 Comparison 1 2 more Comparison 0 2 equal Comparison 1 2 more Comparison 2 2 equal
[47, 83, 47]
Episode 9294, loss: -0.6352, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal
[42, 94, 53]
Episode 9295, loss: -0.6492, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal
[52, 17, 49]
Episode 9296, loss: -0.4245, total reward: -0.0700
len3 Swap
[40, 59, 91]
Episode 9297, loss: -1.0074, total reward: -1.0000
len3 Swap
[84, 72, 87]
Episode 9298, loss: -0.9371, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal
[94, 81, 98]
Episode 9299, loss: -0.7337, total reward: -0.0700
len3 Swap
[76, 94, 74]
Episode 9300, loss: -0.9401, total reward: -1.0000
len3 Comparison 2 2 eq

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[60, 57, 92]
Episode 9366, loss: -0.8618, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 0 more Comparison 2 0 more Comparison 0 2 less
[49, 50, 51]
Episode 9367, loss: -0.8154, total reward: -0.0500
len3 Comparison 1 2 less Comparison 1 2 less Comparison 1 2 less Comparison 2 2 equal
[52, 6, 81]
Episode 9368, loss: -0.5122, total reward: -0.0500
len3 Comparison 0 0 equal Comparison 1 2 more Comparison 0 2 more Comparison 2 2 equal
[64, 15, 5]
Episode 9369, loss: -0.7989, total reward: -0.0600
len3 Swap
[6, 22, 2]
Episode 9370, loss: -0.7143, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[46, 6, 51]
Episode 9371, loss: -0.8611, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal
[64, 57, 40]
Episode 9372, loss: -0.8771, total reward: -0.0700
len3 Comparison 1 2 mor

Episode 9437, loss: -1.3728, total reward: -0.0500
len3 Comparison 0 0 equal Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal
[52, 92, 82]
Episode 9438, loss: -0.2968, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 1 2 more Comparison 1 2 more
[75, 89, 83]
Episode 9439, loss: -0.8617, total reward: -0.0500
len3 Swap
[16, 85, 14]
Episode 9440, loss: -0.7990, total reward: -1.0000
len3 Swap
[69, 94, 95]
Episode 9441, loss: -0.8033, total reward: -1.0000
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more
[94, 40, 57]
Episode 9442, loss: -1.2557, total reward: -0.0600
len3 Swap
[39, 13, 26]
Episode 9443, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 1 less Comparison 1 2 more Comparison 2 2 equal Comparison 1 2 more
[82, 86, 48]
Episode 9444, loss: -0.7455, total reward: -0.0500
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[11, 98, 80]
Episode 9445, loss: -1.

Episode 9509, loss: -0.9709, total reward: -0.0600
len3 Swap
[87, 98, 74]
Episode 9510, loss: -0.7213, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[19, 99, 68]
Episode 9511, loss: -0.0160, total reward: -0.0800
len3 Swap
[18, 86, 90]
Episode 9512, loss: -0.7898, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal
[38, 9, 22]
Episode 9513, loss: -1.0049, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[87, 32, 84]
Episode 9514, loss: -0.9369, total reward: -0.0800
len3 Swap
[19, 45, 52]
Episode 9515, loss: -0.6778, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 1 more Comparison 2 2 equal
[4, 20, 65]
Episode 9516, loss: -1.0156, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more Swap Comparison 2
[77, 67, 75]
Episode 9517, loss: -1

Episode 9580, loss: -0.6502, total reward: 0.0400
len3 Swap
[22, 20, 19]
Episode 9581, loss: -0.6292, total reward: -1.0000
len3 Comparison 1 0 less Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal
[96, 71, 87]
Episode 9582, loss: -1.2206, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 0 2 less Comparison 0 2 less
[1, 32, 22]
Episode 9583, loss: -0.9609, total reward: -0.0500
len3 Swap
[32, 31, 42]
Episode 9584, loss: -0.6939, total reward: -1.0000
len3 Comparison 2 0 less Comparison 2 0 less Comparison 1 2 more Comparison 2 2 equal
[49, 72, 38]
Episode 9585, loss: -0.6232, total reward: -0.0500
len3 Swap
[81, 11, 23]
Episode 9586, loss: -0.6957, total reward: -1.0000
len3 Swap
[28, 34, 72]
Episode 9587, loss: -0.6539, total reward: -1.0000
len3 Swap
[23, 52, 64]
Episode 9588, loss: -0.6034, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[24, 3, 29]
Episode 9589, loss: -0.9585, 

len3 Comparison 1 2 more Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal
[77, 59, 55]
Episode 9654, loss: -1.6823, total reward: -0.0600
len3 Swap
[85, 50, 82]
Episode 9655, loss: -0.6547, total reward: -1.0000
len3 Swap
[51, 2, 37]
Episode 9656, loss: -0.6832, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 0 2 less Comparison 2 2 equal
[14, 93, 39]
Episode 9657, loss: -0.9602, total reward: -0.0600
len3 Swap
[17, 90, 50]
Episode 9658, loss: -0.7354, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 1 2 less Comparison 1 2 less
[66, 21, 38]
Episode 9659, loss: -0.9496, total reward: -0.0500
len3 Swap
[82, 30, 78]
Episode 9660, loss: -0.7656, total reward: -1.0000
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[80, 15, 10]
Episode 9661, loss: -1.6405, total reward: -0.0600
len3 Swap
[22, 14, 53]
Episode 9662, loss: -0.7294, total reward: -1.0000
len3 Comparison 2 2 equal Co

len3 Comparison 2 2 equal Comparison 1 2 less Comparison 0 2 more Comparison 2 2 equal
[96, 5, 83]
Episode 9724, loss: -1.0076, total reward: -0.0600
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 0 2 more Swap Comparison 1
[33, 72, 46]
Episode 9725, loss: -0.8341, total reward: 0.0600
len3 Swap
[81, 100, 77]
Episode 9726, loss: -0.6548, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[88, 58, 68]
Episode 9727, loss: -0.2089, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal
[33, 6, 54]
Episode 9728, loss: -1.0181, total reward: -0.0700
len3 Comparison 2 1 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[24, 92, 3]
Episode 9729, loss: -0.6800, total reward: -0.0700
len3 Swap
[19, 37, 10]
Episode 9730, loss: -0.6037, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Comparison 0 2 less
[47, 76, 86]

Episode 9797, loss: -0.5027, total reward: -0.0600
len3 Comparison 1 2 less Comparison 0 2 less Comparison 1 2 less Comparison 2 0 more
[62, 80, 87]
Episode 9798, loss: -1.4285, total reward: -0.0400
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal
[23, 53, 29]
Episode 9799, loss: -1.1468, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[35, 9, 35]
Episode 9800, loss: -0.2834, total reward: -0.0700
len3 Swap
[74, 72, 14]
Episode 9801, loss: -0.5443, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 2
[35, 24, 93]
Episode 9802, loss: -0.9524, total reward: -0.0700
len3 Swap
[86, 55, 28]
Episode 9803, loss: -0.6931, total reward: -1.0000
len3 Swap
[81, 15, 45]
Episode 9804, loss: -0.5427, total reward: -1.0000
len3 Swap
[1, 84, 44]
Episode 9805, loss: -0.5515, total reward: -1.0000
len3 Comparison 2 2 equal Swap Comparison 2 0 less C

Episode 9873, loss: -1.0577, total reward: -0.0700
len3 Comparison 2 1 less Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal
[83, 76, 65]
Episode 9874, loss: -0.9201, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 1 1 equal Comparison 0 2 more
[24, 15, 6]
Episode 9875, loss: -0.9320, total reward: -0.0600
len3 Swap
[93, 33, 11]
Episode 9876, loss: -0.7161, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal Comparison 0 2 less
[57, 20, 92]
Episode 9877, loss: -0.9811, total reward: -0.0600
len3 Swap
[97, 64, 29]
Episode 9878, loss: -0.6008, total reward: -1.0000
len3 Swap
[45, 77, 72]
Episode 9879, loss: -0.6266, total reward: -1.0000
len3 Swap
[55, 63, 23]
Episode 9880, loss: -0.6944, total reward: -1.0000
len3 Swap
[73, 86, 99]
Episode 9881, loss: -0.6359, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less
[50, 63, 99]
Episode 9882, loss: -1.048

Episode 9945, loss: -1.0164, total reward: -0.0700
len3 Swap
[82, 16, 43]
Episode 9946, loss: -0.6931, total reward: -1.0000
len3 Swap
[59, 8, 43]
Episode 9947, loss: -0.6902, total reward: -1.0000
len3 Swap
[22, 7, 91]
Episode 9948, loss: -0.7195, total reward: -1.0000
len3 Swap
[95, 37, 86]
Episode 9949, loss: -0.6308, total reward: -1.0000
len3 Comparison 0 2 more Comparison 1 0 more Comparison 2 2 equal Comparison 1 2 more
[12, 45, 10]
Episode 9950, loss: -1.1895, total reward: -0.0500
len3 Swap
[99, 49, 94]
Episode 9951, loss: -0.6475, total reward: -1.0000
len3 Swap
[44, 52, 88]
Episode 9952, loss: -0.6927, total reward: -1.0000
len3 Swap
[78, 73, 29]
Episode 9953, loss: -0.7121, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 0 2 more Comparison 2 2 equal
[80, 96, 75]
Episode 9954, loss: -0.9076, total reward: -0.0600
len3 Swap
[28, 81, 18]
Episode 9955, loss: -0.6931, total reward: -1.0000
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 1

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal
[58, 11, 60]
Episode 10020, loss: -0.8669, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 0 less
[100, 8, 98]
Episode 10021, loss: -0.8539, total reward: -0.0700
len3 Swap
[67, 62, 45]
Episode 10022, loss: -0.8043, total reward: -1.0000
len3 Comparison 1 2 more Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal
[16, 76, 43]
Episode 10023, loss: -1.2804, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 0 1 more Comparison 2 2 equal Comparison 2 2 equal
[90, 29, 99]
Episode 10024, loss: -0.8712, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal
[1, 38, 23]
Episode 10025, loss: -0.7393, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[46, 69, 23]
Episode 10026, loss: -0.8327, total reward: -0.0800
len3 Swa

len3 Comparison 2 2 equal Swap Comparison 0 1 less Comparison 1 2 more Comparison 1
[4, 75, 57]
Episode 10091, loss: -0.3717, total reward: -0.0500
len3 Comparison 1 2 less Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal
[58, 25, 75]
Episode 10092, loss: -1.4978, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[58, 89, 86]
Episode 10093, loss: -0.9306, total reward: -0.0800
len3 Swap
[15, 62, 57]
Episode 10094, loss: -0.6735, total reward: -1.0000
len3 Swap
[71, 96, 96]
Episode 10095, loss: -0.6324, total reward: -1.0000
len3 Swap
[90, 18, 86]
Episode 10096, loss: -0.6667, total reward: -1.0000
len3 Swap
[69, 64, 15]
Episode 10097, loss: -0.6103, total reward: -1.0000
len3 Comparison 1 2 more Comparison 1 2 more Comparison 1 2 more Comparison 1 2 more
[11, 74, 31]
Episode 10098, loss: -1.5922, total reward: -0.0400
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal Comparison 1 2 more
[32, 86, 

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more Comparison 1 2 more
[39, 44, 38]
Episode 10164, loss: -1.1308, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 0 2 more Comparison 2 2 equal
[20, 2, 13]
Episode 10165, loss: -1.0659, total reward: -0.0600
len3 Swap
[56, 63, 10]
Episode 10166, loss: -0.5326, total reward: -1.0000
len3 Swap
[65, 85, 74]
Episode 10167, loss: -0.5649, total reward: -1.0000
len3 Swap
[97, 59, 87]
Episode 10168, loss: -0.6002, total reward: -1.0000
len3 Swap
[10, 45, 3]
Episode 10169, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal
[99, 20, 6]
Episode 10170, loss: -1.0318, total reward: -0.0700
len3 Swap
[14, 82, 86]
Episode 10171, loss: -0.6484, total reward: -1.0000
len3 Comparison 1 2 less Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal
[48, 36, 64]
Episode 10172, loss: -1.4285, total reward: -0.0600
len3 Comparison 2 2

len3 Comparison 2 2 equal Comparison 0 2 more Comparison 2 0 less Comparison 2 2 equal
[42, 6, 8]
Episode 10241, loss: -0.9004, total reward: -0.0600
len3 Swap
[48, 63, 37]
Episode 10242, loss: -0.7622, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal
[61, 32, 96]
Episode 10243, loss: -0.8077, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more Comparison 1 2 more
[29, 6, 1]
Episode 10244, loss: -0.8914, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal Comparison 1 2 less
[10, 18, 74]
Episode 10245, loss: -0.7874, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more Comparison 2 1 less
[75, 95, 3]
Episode 10246, loss: -1.1457, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[57, 18, 67]
Episode 10247, loss: -0.8284, total reward: -0.0800
len3 Comparison

Episode 10312, loss: -0.9679, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[15, 49, 52]
Episode 10313, loss: -0.9835, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[28, 51, 93]
Episode 10314, loss: -0.9693, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[12, 94, 12]
Episode 10315, loss: -1.3745, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[39, 41, 35]
Episode 10316, loss: -1.0955, total reward: -0.0700
len3 Swap
[26, 41, 64]
Episode 10317, loss: -0.6191, total reward: -1.0000
len3 Swap
[89, 25, 68]
Episode 10318, loss: -0.6062, total reward: -1.0000
len3 Comparison 2 0 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[40, 38, 38]
Episode 10319, loss: -0.7442, total reward: -0.0700
len3 Swap
[84, 69, 94]
Episode 

len3 Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal Swap Comparison 2
[100, 49, 35]
Episode 10387, loss: -0.9158, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal
[41, 47, 7]
Episode 10388, loss: -0.8067, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 0 more Comparison 2 2 equal
[58, 4, 94]
Episode 10389, loss: -0.9470, total reward: -0.0700
len3 Swap
[30, 49, 28]
Episode 10390, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Comparison 1 2 less
[57, 76, 98]
Episode 10391, loss: -0.8956, total reward: -0.0600
len3 Comparison 1 2 more Comparison 2 2 equal Swap Comparison 1 2 more Comparison 2
[47, 33, 10]
Episode 10392, loss: -1.1012, total reward: -0.0500
len3 Swap
[72, 98, 27]
Episode 10393, loss: -0.6817, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[75, 

len3 Comparison 2 1 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[22, 75, 100]
Episode 10457, loss: -0.5296, total reward: -0.0700
len3 Swap
[68, 58, 1]
Episode 10458, loss: -0.5382, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[13, 37, 78]
Episode 10459, loss: -0.5468, total reward: -0.0700
len3 Swap
[52, 74, 36]
Episode 10460, loss: -0.5703, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 1 less Comparison 2 2 equal
[39, 86, 40]
Episode 10461, loss: -1.1556, total reward: -0.0700
len3 Swap
[14, 79, 29]
Episode 10462, loss: -0.5185, total reward: -1.0000
len3 Swap
[58, 44, 36]
Episode 10463, loss: -0.4198, total reward: -1.0000
len3 Comparison 1 2 less Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal
[89, 38, 85]
Episode 10464, loss: -1.6042, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal


Episode 10533, loss: -2.1719, total reward: -0.0500
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more
[36, 97, 18]
Episode 10534, loss: -1.7086, total reward: -0.0600
len3 Swap
[9, 73, 13]
Episode 10535, loss: -0.8052, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[8, 51, 3]
Episode 10536, loss: -0.3603, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 1 0 more Comparison 2 2 equal Comparison 2 2 equal
[53, 84, 65]
Episode 10537, loss: -0.7458, total reward: -0.0700
len3 Comparison 2 2 equal Swap Comparison 0 2 less Comparison 2 2 equal Comparison 0
[74, 26, 96]
Episode 10538, loss: -0.0910, total reward: -0.0600
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less
[40, 16, 99]
Episode 10539, loss: -1.7574, total reward: -0.0600
len3 Swap
[36, 65, 82]
Episode 10540, loss: -0.7810, total reward: -1.0000
len3 Swap
[24, 40, 85]
Episode 10541, lo

Episode 10605, loss: -0.8462, total reward: -0.0800
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[4, 12, 71]
Episode 10606, loss: -2.0720, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 0 more Comparison 2 2 equal Comparison 2 2 equal
[13, 6, 76]
Episode 10607, loss: -0.8521, total reward: -0.0700
len3 Comparison 2 2 equal Swap
[65, 82, 82]
Episode 10608, loss: -0.2728, total reward: 9.9800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[59, 36, 8]
Episode 10609, loss: -0.8447, total reward: -0.0700
len3 Swap
[59, 88, 43]
Episode 10610, loss: -0.7212, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 0 2 more Comparison 2 2 equal
[84, 53, 28]
Episode 10611, loss: -0.9493, total reward: -0.0600
len3 Swap
[69, 33, 98]
Episode 10612, loss: -0.7624, total reward: -1.0000
len3 Swap
[68, 14, 45]
Episode 10613, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 

len3 Comparison 2 0 less Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal
[34, 39, 17]
Episode 10680, loss: -0.1626, total reward: -0.0600
len3 Swap
[58, 99, 38]
Episode 10681, loss: -0.7787, total reward: -1.0000
len3 Swap
[82, 14, 64]
Episode 10682, loss: -0.6931, total reward: -1.0000
len3 Comparison 0 2 less Comparison 1 2 less Comparison 1 2 less Comparison 2 2 equal
[24, 23, 69]
Episode 10683, loss: -0.7999, total reward: -0.0500
len3 Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[69, 18, 82]
Episode 10684, loss: -2.1908, total reward: -0.0700
len3 Swap
[51, 30, 28]
Episode 10685, loss: -0.7705, total reward: -1.0000
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 0 2 more Swap
[27, 28, 34]
Episode 10686, loss: -1.8578, total reward: 9.9600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[21, 73, 26]
Episode 10687, loss: -0.8505, total reward: -0.0800
len3 Comparison 2 2 equal Comp

len3 Comparison 2 2 equal Comparison 0 2 less Comparison 1 2 more Comparison 2 2 equal
[12, 55, 52]
Episode 10753, loss: -0.9540, total reward: -0.0600
len3 Swap
[40, 65, 51]
Episode 10754, loss: -0.6931, total reward: -1.0000
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[60, 95, 94]
Episode 10755, loss: -1.2938, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 1 more Comparison 2 2 equal
[63, 24, 56]
Episode 10756, loss: -0.8449, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Swap
[31, 66, 71]
Episode 10757, loss: -0.6644, total reward: 9.9600
len3 Swap
[53, 83, 53]
Episode 10758, loss: -0.7939, total reward: -1.0000
len3 Swap
[22, 59, 85]
Episode 10759, loss: -0.7756, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal
[25, 61, 50]
Episode 10760, loss: -0.7983, total reward: -0.0700
len3 Swap
[19, 73, 27]
Episode 10761, loss: -0.7

len3 Comparison 2 2 equal Comparison 1 0 less Comparison 2 1 more Comparison 1 2 less
[32, 14, 76]
Episode 10826, loss: -0.3515, total reward: -0.0500
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more Comparison 0 2 more
[84, 25, 66]
Episode 10827, loss: -0.7981, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal
[58, 25, 55]
Episode 10828, loss: -0.7438, total reward: -0.0700
len3 Swap
[22, 22, 20]
Episode 10829, loss: -0.8241, total reward: -1.0000
len3 Swap
[52, 82, 20]
Episode 10830, loss: -0.7816, total reward: -1.0000
len3 Swap
[4, 58, 8]
Episode 10831, loss: -0.7769, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal
[4, 59, 85]
Episode 10832, loss: -0.2933, total reward: -0.0700
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 0 more Comparison 0 2 less
[28, 12, 71]
Episode 10833, loss: -1.3746, total reward: -0.0500
len3 Swap
[97, 4, 

len3 Comparison 2 2 equal Comparison 2 1 less Comparison 2 2 equal Comparison 2 2 equal
[92, 27, 12]
Episode 10897, loss: -0.9188, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[96, 63, 20]
Episode 10898, loss: -0.0844, total reward: -0.0800
len3 Swap
[57, 39, 57]
Episode 10899, loss: -0.6693, total reward: -1.0000
len3 Swap
[92, 79, 62]
Episode 10900, loss: -0.6443, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal Comparison 1 2 less
[14, 36, 41]
Episode 10901, loss: -1.0977, total reward: -0.0600
len3 Swap
[89, 72, 87]
Episode 10902, loss: -0.6559, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 equal
[31, 50, 31]
Episode 10903, loss: -0.8655, total reward: -0.0800
len3 Swap
[71, 2, 37]
Episode 10904, loss: -0.7050, total reward: -1.0000
len3 Swap
[33, 30, 69]
Episode 10905, loss: -0.7002, total reward: -1.0000
len3 Swap
[90

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal
[59, 30, 77]
Episode 10975, loss: -0.7781, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less
[5, 53, 92]
Episode 10976, loss: -0.6943, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less Swap Comparison 1
[99, 5, 30]
Episode 10977, loss: -0.7517, total reward: -0.1500
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[74, 56, 64]
Episode 10978, loss: -0.9134, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[53, 7, 5]
Episode 10979, loss: -0.7016, total reward: -0.0800
len3 Swap
[54, 26, 18]
Episode 10980, loss: -0.9766, total reward: -1.0000
len3 Swap
[92, 50, 61]
Episode 10981, loss: -0.8951, total reward: -1.0000
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[6

Episode 11043, loss: -1.8136, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal Comparison 1 2 more
[7, 78, 29]
Episode 11044, loss: -0.7153, total reward: -0.0600
len3 Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 2 2 equal Comparison 1
[95, 3, 52]
Episode 11045, loss: 0.1763, total reward: -0.0700
len3 Swap
[45, 6, 67]
Episode 11046, loss: -0.8509, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 2
[44, 61, 51]
Episode 11047, loss: -0.2041, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 0 2 less Comparison 2 2 equal
[42, 84, 76]
Episode 11048, loss: -0.7430, total reward: -0.0600
len3 Swap
[22, 41, 48]
Episode 11049, loss: -0.8618, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 0 less
[66, 59, 50]
Episode 11050, loss: -1.1317, total reward: -0.0700
len3 Comparison 1 2 more Comparison 2 2 equ

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[24, 46, 38]
Episode 11117, loss: -0.7996, total reward: -0.0700
len3 Swap
[70, 33, 44]
Episode 11118, loss: -0.6931, total reward: -1.0000
len3 Comparison 1 0 less Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more
[74, 23, 18]
Episode 11119, loss: -0.9123, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[99, 31, 50]
Episode 11120, loss: -0.6997, total reward: -0.0800
len3 Swap
[10, 15, 81]
Episode 11121, loss: -0.8815, total reward: -1.0000
len3 Swap
[64, 74, 17]
Episode 11122, loss: -0.8343, total reward: -1.0000
len3 Swap
[71, 97, 70]
Episode 11123, loss: -0.6931, total reward: -1.0000
len3 Comparison 1 2 less Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal
[85, 5, 12]
Episode 11124, loss: -1.5477, total reward: -0.0600
len3 Swap
[92, 18, 14]
Episode 11125, loss: -0.9039, total reward: -1.0000
len3 Swap
[74, 

Episode 11187, loss: -0.5675, total reward: -0.0700
len3 Swap
[72, 93, 47]
Episode 11188, loss: -0.7380, total reward: -1.0000
len3 Swap
[25, 95, 67]
Episode 11189, loss: -0.6686, total reward: -1.0000
len3 Swap
[20, 75, 53]
Episode 11190, loss: -0.6901, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal
[73, 12, 75]
Episode 11191, loss: -0.8370, total reward: -0.0700
len3 Swap
[76, 25, 21]
Episode 11192, loss: -0.7341, total reward: -1.0000
len3 Swap
[65, 86, 65]
Episode 11193, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[70, 68, 56]
Episode 11194, loss: -0.9033, total reward: -0.0800
len3 Comparison 2 0 less Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[58, 9, 4]
Episode 11195, loss: -0.2819, total reward: -0.0600
len3 Comparison 2 1 less Swap Comparison 1 2 less Comparison 2 2 equal Comparison 2
[71, 12, 17]
Episode 11196, loss

Episode 11260, loss: -0.7502, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[18, 4, 91]
Episode 11261, loss: -0.7916, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[45, 71, 59]
Episode 11262, loss: -0.7379, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[1, 60, 99]
Episode 11263, loss: -1.1434, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 0 2 more Comparison 2 2 equal
[85, 8, 47]
Episode 11264, loss: -0.7908, total reward: -0.0600
len3 Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 2 2 equal Comparison 2
[99, 78, 18]
Episode 11265, loss: 0.1267, total reward: -0.0700
len3 Comparison 2 0 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[68, 3, 97]
Episode 11266, loss: -0.4020, total reward: -0.0700
len3 Swap
[14, 68, 5]
Episode 11267

len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[78, 77, 20]
Episode 11330, loss: -1.9602, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal
[83, 14, 36]
Episode 11331, loss: -0.9950, total reward: -0.0700
len3 Swap
[80, 14, 77]
Episode 11332, loss: -0.6115, total reward: -1.0000
len3 Swap
[10, 69, 81]
Episode 11333, loss: -0.6042, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[50, 48, 27]
Episode 11334, loss: -0.8535, total reward: -0.0800
len3 Swap
[90, 40, 26]
Episode 11335, loss: -0.6615, total reward: -1.0000
len3 Swap
[11, 45, 73]
Episode 11336, loss: -0.6453, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[47, 11, 18]
Episode 11337, loss: -0.9293, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 mor

Episode 11404, loss: -0.1662, total reward: -0.0700
len3 Swap
[51, 30, 87]
Episode 11405, loss: -0.7288, total reward: -1.0000
len3 Swap
[53, 91, 31]
Episode 11406, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal
[84, 60, 54]
Episode 11407, loss: -0.7799, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more Swap Comparison 2
[22, 17, 60]
Episode 11408, loss: -0.8051, total reward: 0.0500
len3 Swap
[15, 57, 45]
Episode 11409, loss: -0.7198, total reward: -1.0000
len3 Swap
[77, 94, 31]
Episode 11410, loss: -0.7478, total reward: -1.0000
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[48, 48, 27]
Episode 11411, loss: -2.1471, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal
[7, 57, 14]
Episode 11412, loss: -0.8822, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 1 

len3 Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal
[72, 73, 31]
Episode 11480, loss: -1.0944, total reward: -0.0700
len3 Swap
[23, 82, 37]
Episode 11481, loss: -0.6429, total reward: -1.0000
len3 Comparison 0 2 more Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal
[52, 42, 30]
Episode 11482, loss: -0.9323, total reward: -0.0600
len3 Swap
[71, 2, 63]
Episode 11483, loss: -0.6330, total reward: -1.0000
len3 Swap
[87, 94, 95]
Episode 11484, loss: -0.6931, total reward: -1.0000
len3 Swap
[66, 95, 8]
Episode 11485, loss: -0.6931, total reward: -1.0000
len3 Swap
[74, 74, 2]
Episode 11486, loss: -0.6931, total reward: -1.0000
len3 Swap
[99, 68, 6]
Episode 11487, loss: -0.6266, total reward: -1.0000
len3 Swap
[15, 29, 32]
Episode 11488, loss: -0.6852, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[9, 66, 40]
Episode 11489, loss: -0.8809, total reward: -0.0800
len3 Swap
[52, 96, 84

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[60, 94, 81]
Episode 11557, loss: -0.7246, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal
[43, 21, 75]
Episode 11558, loss: -0.6955, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more Comparison 2 1 less
[96, 90, 58]
Episode 11559, loss: -0.9325, total reward: -0.0600
len3 Swap
[31, 89, 80]
Episode 11560, loss: -0.8580, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 1 less
[31, 76, 69]
Episode 11561, loss: -0.7215, total reward: -0.0700
len3 Swap
[83, 85, 3]
Episode 11562, loss: -0.8643, total reward: -1.0000
len3 Comparison 0 2 more Comparison 0 2 more Comparison 2 2 equal Comparison 1 2 more
[82, 42, 23]
Episode 11563, loss: -1.6868, total reward: -0.0500
len3 Swap
[97, 85, 17]
Episode 11564, loss: -0.8585, total reward: -1.0000
len3 Swap
[33,

Episode 11629, loss: -1.5748, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[9, 37, 82]
Episode 11630, loss: -0.6306, total reward: -0.0700
len3 Comparison 0 2 less Comparison 1 2 less Comparison 1 2 less Comparison 2 2 equal
[71, 74, 92]
Episode 11631, loss: -1.5988, total reward: -0.0500
len3 Comparison 2 0 more Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less
[26, 81, 61]
Episode 11632, loss: -0.2930, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 0 0 equal
[6, 82, 8]
Episode 11633, loss: -0.6372, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal Comparison 0 2 more
[46, 71, 41]
Episode 11634, loss: -0.6453, total reward: -0.0600
len3 Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more
[16, 54, 20]
Episode 11635, loss: -1.8852, total reward: -0.0600
len3 Comparison 1 2 less Comparison 2

len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less
[2, 80, 19]
Episode 11699, loss: -1.8723, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal
[99, 26, 64]
Episode 11700, loss: -0.7400, total reward: -0.0700
len3 Swap
[26, 74, 32]
Episode 11701, loss: -0.7376, total reward: -1.0000
len3 Swap
[56, 82, 89]
Episode 11702, loss: -0.8326, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 1 more
[61, 49, 95]
Episode 11703, loss: -0.7195, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Swap
[29, 76, 81]
Episode 11704, loss: -0.5939, total reward: 9.9600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[88, 46, 57]
Episode 11705, loss: -0.7869, total reward: -0.0800
len3 Swap
[22, 55, 8]
Episode 11706, loss: -0.8169, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Co

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[89, 46, 27]
Episode 11769, loss: -0.9161, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal
[31, 10, 68]
Episode 11770, loss: -0.8611, total reward: -0.0700
len3 Swap
[83, 19, 41]
Episode 11771, loss: -0.7078, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal
[67, 87, 16]
Episode 11772, loss: -0.9433, total reward: -0.0700
len3 Swap
[19, 8, 59]
Episode 11773, loss: -0.6694, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal Comparison 0 2 less
[69, 96, 89]
Episode 11774, loss: -0.8886, total reward: -0.0600
len3 Swap
[53, 5, 54]
Episode 11775, loss: -0.6931, total reward: -1.0000
len3 Swap
[44, 85, 83]
Episode 11776, loss: -0.7007, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[4

len3 Swap
[89, 16, 97]
Episode 11845, loss: -0.8450, total reward: -1.0000
len3 Swap
[45, 70, 36]
Episode 11846, loss: -0.8605, total reward: -1.0000
len3 Swap
[85, 87, 23]
Episode 11847, loss: -0.8317, total reward: -1.0000
len3 Swap
[18, 98, 29]
Episode 11848, loss: -0.9116, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[34, 88, 9]
Episode 11849, loss: -0.6998, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal
[2, 43, 100]
Episode 11850, loss: -0.7800, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more
[4, 65, 14]
Episode 11851, loss: -0.7140, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 1 less Comparison 2 2 equal
[10, 56, 17]
Episode 11852, loss: -0.6824, total reward: -0.0700
len3 Swap
[18, 7, 56]
Episode 11853, loss: -0.8978, total reward: -1.0000
len3 Comparison

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less
[10, 3, 74]
Episode 11922, loss: -0.7330, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 2
[58, 36, 49]
Episode 11923, loss: -0.7307, total reward: -0.0700
len3 Comparison 2 0 more Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal
[12, 76, 95]
Episode 11924, loss: -0.6027, total reward: -0.0600
len3 Swap
[80, 11, 11]
Episode 11925, loss: -0.8444, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 1 less Swap Comparison 1
[43, 17, 93]
Episode 11926, loss: -0.7599, total reward: 0.0500
len3 Swap
[81, 74, 24]
Episode 11927, loss: -0.9142, total reward: -1.0000
len3 Swap
[62, 12, 75]
Episode 11928, loss: -0.9152, total reward: -1.0000
len3 Comparison 2 1 less Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more
[52, 53, 52]
Episode 11929, loss: 0.3422, total reward: -0.0600
len3 Swap
[34, 73, 5

Episode 11988, loss: -0.9438, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[18, 38, 52]
Episode 11989, loss: -0.9523, total reward: -0.0800
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[25, 26, 19]
Episode 11990, loss: -1.3917, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[91, 45, 61]
Episode 11991, loss: -0.9839, total reward: -0.0800
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[73, 63, 2]
Episode 11992, loss: -1.6743, total reward: -0.0700
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 0 less
[73, 53, 51]
Episode 11993, loss: -1.8108, total reward: -0.0600
len3 Swap
[48, 69, 58]
Episode 11994, loss: -0.7087, total reward: -1.0000
len3 Swap
[2, 80, 39]
Episode 11995, loss: -0.6218, total reward: -1.0000
len3 Swap
[79, 52, 49]
Episode 119

len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal
[84, 1, 60]
Episode 12065, loss: -0.8027, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 1 1 equal Comparison 0 2 more Comparison 2 2 equal
[47, 95, 46]
Episode 12066, loss: -0.8800, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[67, 97, 14]
Episode 12067, loss: -0.9751, total reward: -0.0800
len3 Swap
[14, 49, 77]
Episode 12068, loss: -0.7691, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal
[57, 29, 52]
Episode 12069, loss: -0.8316, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal
[41, 41, 9]
Episode 12070, loss: -0.8903, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[17, 69, 17]
Episode 12071, loss: -0.9701, total reward: -0.0800
len3 Sw

Episode 12136, loss: -0.7600, total reward: -0.0600
len3 Comparison 0 2 less Comparison 1 2 less Comparison 2 2 equal Comparison 0 2 less
[41, 34, 84]
Episode 12137, loss: -1.5598, total reward: -0.0500
len3 Comparison 1 2 more Swap
[28, 50, 84]
Episode 12138, loss: 0.5631, total reward: 9.9900
len3 Swap
[60, 80, 29]
Episode 12139, loss: -0.8085, total reward: -1.0000
len3 Swap
[69, 19, 21]
Episode 12140, loss: -0.8215, total reward: -1.0000
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Swap
[5, 49, 67]
Episode 12141, loss: -1.3315, total reward: 9.9500
len3 Comparison 1 2 more Comparison 0 2 more Comparison 1 2 more Comparison 0 2 more
[23, 11, 2]
Episode 12142, loss: -1.5256, total reward: -0.0400
len3 Swap
[53, 26, 98]
Episode 12143, loss: -0.8488, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 1 1 equal Comparison 2
[69, 68, 69]
Episode 12144, loss: -1.0026, total reward: -0.0700
len3 Swap
[32, 6, 90]
Episode 12145, loss: -

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[13, 97, 66]
Episode 12204, loss: -0.9074, total reward: -0.0700
len3 Comparison 1 2 less Comparison 2 1 more Comparison 1 2 less Comparison 2 0 more
[2, 8, 12]
Episode 12205, loss: -1.5298, total reward: -0.0400
len3 Swap
[58, 89, 8]
Episode 12206, loss: -0.7185, total reward: -1.0000
len3 Swap
[23, 23, 57]
Episode 12207, loss: -0.7237, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 0 equal Comparison 2 2 equal Comparison 1 2 less
[100, 14, 66]
Episode 12208, loss: -0.8648, total reward: -0.0700
len3 Swap
[87, 7, 42]
Episode 12209, loss: -0.6830, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[31, 16, 36]
Episode 12210, loss: -0.8562, total reward: -0.0800
len3 Comparison 0 2 more Comparison 1 2 more Comparison 0 2 more Comparison 2 2 equal
[63, 68, 26]
Episode 12211, loss: -1.6598, total reward: -0.0500
len3 Comparison 2

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal
[24, 39, 82]
Episode 12276, loss: -0.9568, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more
[20, 14, 9]
Episode 12277, loss: -0.8965, total reward: -0.0700
len3 Swap
[73, 83, 97]
Episode 12278, loss: -0.6436, total reward: -1.0000
len3 Swap
[70, 4, 80]
Episode 12279, loss: -0.6210, total reward: -1.0000
len3 Swap
[47, 7, 49]
Episode 12280, loss: -0.6303, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 1 2 less Comparison 2 1 more
[74, 25, 62]
Episode 12281, loss: -0.9519, total reward: -0.0500
len3 Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 2
[79, 24, 60]
Episode 12282, loss: -0.9917, total reward: -0.0700
len3 Swap
[15, 97, 17]
Episode 12283, loss: -0.6980, total reward: -1.0000
len3 Swap
[41, 19, 57]
Episode 12284, loss: -0.6944, total reward: -1.0000
len3 Swap
[63, 20, 7

Episode 12350, loss: -0.9536, total reward: -0.0600
len3 Swap
[68, 44, 76]
Episode 12351, loss: -0.6933, total reward: -1.0000
len3 Swap
[96, 72, 37]
Episode 12352, loss: -0.7315, total reward: -1.0000
len3 Swap
[31, 74, 5]
Episode 12353, loss: -0.7101, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more
[25, 99, 97]
Episode 12354, loss: -0.9366, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more Comparison 1 1 equal
[45, 39, 7]
Episode 12355, loss: -0.9841, total reward: -0.0700
len3 Swap
[86, 97, 34]
Episode 12356, loss: -0.6604, total reward: -1.0000
len3 Comparison 0 2 less Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal
[46, 97, 86]
Episode 12357, loss: -1.7132, total reward: -0.0600
len3 Swap
[7, 12, 82]
Episode 12358, loss: -0.7252, total reward: -1.0000
len3 Swap
[50, 24, 25]
Episode 12359, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Swap Comparison 2

Episode 12423, loss: -2.2409, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 0 more Comparison 2 2 equal Swap Comparison 2
[44, 2, 55]
Episode 12424, loss: -0.7444, total reward: -0.0600
len3 Swap
[71, 9, 42]
Episode 12425, loss: -0.7697, total reward: -1.0000
len3 Swap
[83, 4, 75]
Episode 12426, loss: -0.7549, total reward: -1.0000
len3 Swap
[57, 56, 27]
Episode 12427, loss: -0.7923, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[80, 49, 52]
Episode 12428, loss: -0.8113, total reward: -0.0700
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[74, 79, 10]
Episode 12429, loss: -2.3979, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 1 less Comparison 2 2 equal Comparison 2 2 equal
[100, 27, 8]
Episode 12430, loss: -0.7322, total reward: -0.0700
len3 Swap
[33, 9, 100]
Episode 12431, loss: -0.6931, total reward: -1.0000
len3 Swap
[39, 3, 7]
Episode 12432, loss: -

Episode 12499, loss: -0.6159, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[14, 49, 8]
Episode 12500, loss: -0.6495, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[26, 61, 35]
Episode 12501, loss: -0.6405, total reward: -0.0800
len3 Swap
[19, 53, 58]
Episode 12502, loss: -0.9328, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal Comparison 0 2 more
[84, 63, 11]
Episode 12503, loss: -0.6679, total reward: -0.0600
len3 Swap
[44, 42, 50]
Episode 12504, loss: -0.9296, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[51, 97, 72]
Episode 12505, loss: -0.3323, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 2
[98, 34, 39]
Episode 12506, loss: -0.6861, total reward: -0.0700
len3 Comparison 2 2 equal Comparis

len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal
[43, 22, 75]
Episode 12571, loss: -0.0095, total reward: -0.0700
len3 Comparison 0 1 more Comparison 1 2 more Comparison 2 0 less Comparison 2 2 equal
[84, 81, 12]
Episode 12572, loss: -1.2177, total reward: -0.0500
len3 Swap
[33, 85, 37]
Episode 12573, loss: -0.7141, total reward: -1.0000
len3 Swap
[5, 60, 1]
Episode 12574, loss: -0.6725, total reward: -1.0000
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more
[90, 71, 58]
Episode 12575, loss: -1.3102, total reward: -0.0600
len3 Swap
[34, 55, 9]
Episode 12576, loss: -0.7323, total reward: -1.0000
len3 Swap
[12, 7, 33]
Episode 12577, loss: -0.7645, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal
[45, 7, 93]
Episode 12578, loss: -0.1748, total reward: -0.0700
len3 Swap
[28, 17, 42]
Episode 12579, loss: -0.7449, total reward: -1.0000
len3 Swap
[46, 40, 74

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more Comparison 2 0 less
[81, 64, 77]
Episode 12647, loss: -0.5085, total reward: -0.0600
len3 Swap
[34, 51, 35]
Episode 12648, loss: -1.0117, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[53, 45, 52]
Episode 12649, loss: -0.5476, total reward: -0.0700
len3 Comparison 2 0 more Comparison 2 1 more Comparison 2 2 equal Comparison 1 2 less
[58, 81, 94]
Episode 12650, loss: -0.5165, total reward: -0.0500
len3 Swap
[1, 18, 69]
Episode 12651, loss: -1.0199, total reward: -1.0000
len3 Swap
[65, 100, 10]
Episode 12652, loss: -0.9897, total reward: -1.0000
len3 Swap
[60, 53, 11]
Episode 12653, loss: -1.0105, total reward: -1.0000
len3 Swap
[37, 87, 80]
Episode 12654, loss: -0.9342, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Swap Swap
[75, 8, 82]
Episode 12655, loss: 0.3959, total reward: -1.0700
len3 Comparison 0 2 less Swa

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[77, 27, 59]
Episode 12715, loss: -0.8397, total reward: -0.0800
len3 Comparison 1 2 less Comparison 2 0 more Comparison 0 2 less Comparison 2 2 equal
[18, 34, 63]
Episode 12716, loss: -1.4903, total reward: -0.0500
len3 Comparison 2 1 more Comparison 0 2 more Comparison 2 2 equal Swap Comparison 2
[74, 8, 64]
Episode 12717, loss: -0.6622, total reward: -0.0500
len3 Swap
[19, 31, 46]
Episode 12718, loss: -0.8740, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 0 equal Comparison 2 2 equal Comparison 1 2 more
[99, 95, 60]
Episode 12719, loss: -0.7689, total reward: -0.0700
len3 Swap
[99, 36, 36]
Episode 12720, loss: -0.8825, total reward: -1.0000
len3 Comparison 0 1 more Swap Swap
[33, 58, 24]
Episode 12721, loss: -0.2521, total reward: -0.9100
len3 Comparison 2 1 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[56, 20, 92]
Episode 12722, loss: -0.0697, total reward:

len3 Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal
[21, 61, 7]
Episode 12783, loss: -0.7067, total reward: -0.0700
len3 Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[3, 11, 35]
Episode 12784, loss: -1.7832, total reward: -0.0700
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal
[89, 87, 50]
Episode 12785, loss: -1.8281, total reward: -0.0600
len3 Comparison 2 1 less Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[24, 79, 60]
Episode 12786, loss: -0.2574, total reward: -0.0600
len3 Swap
[1, 6, 30]
Episode 12787, loss: -0.8405, total reward: -1.0000
len3 Swap
[85, 12, 55]
Episode 12788, loss: -0.7958, total reward: -1.0000
len3 Swap
[67, 81, 8]
Episode 12789, loss: -0.7582, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 1 less Comparison 2 2 equal Comparison 0 2 more
[88, 78, 10]
Episode 12790, loss: -0.7063, total reward: -0.0600
len3 Swap
[70, 11,

len3 Comparison 1 2 more Comparison 0 1 less Comparison 2 2 equal Comparison 1 2 more
[93, 98, 96]
Episode 12855, loss: -1.4444, total reward: -0.0500
len3 Comparison 2 1 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[13, 49, 81]
Episode 12856, loss: -0.4426, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 1 more Comparison 2 2 equal
[42, 62, 81]
Episode 12857, loss: -0.7499, total reward: -0.0600
len3 Swap
[14, 63, 54]
Episode 12858, loss: -0.8033, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[48, 52, 33]
Episode 12859, loss: -0.7322, total reward: -0.0800
len3 Comparison 2 1 more Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal
[19, 58, 86]
Episode 12860, loss: 0.3764, total reward: -0.0600
len3 Swap
[86, 73, 58]
Episode 12861, loss: -0.6342, total reward: -1.0000
len3 Swap
[3, 97, 77]
Episode 12862, loss: -0.7976, total reward: -1.0000
len3 Comparison

len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal Comparison 2 1 more
[100, 39, 79]
Episode 12924, loss: -0.9858, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[63, 89, 91]
Episode 12925, loss: -0.9465, total reward: -0.0800
len3 Swap
[83, 67, 33]
Episode 12926, loss: -0.5764, total reward: -1.0000
len3 Swap
[35, 91, 29]
Episode 12927, loss: -0.5503, total reward: -1.0000
len3 Comparison 0 1 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[19, 84, 49]
Episode 12928, loss: -1.8248, total reward: -0.0700
len3 Swap
[22, 66, 8]
Episode 12929, loss: -0.6452, total reward: -1.0000
len3 Swap
[20, 47, 21]
Episode 12930, loss: -0.6418, total reward: -1.0000
len3 Comparison 2 1 less Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[81, 28, 3]
Episode 12931, loss: 0.3063, total reward: -0.0600
len3 Swap
[19, 53, 7]
Episode 12932, loss: -0.6321, total reward: -1.0000
len3 Swap
[65, 30

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 0 0 equal
[42, 43, 27]
Episode 13000, loss: 0.0607, total reward: -0.0800
len3 Swap
[32, 9, 21]
Episode 13001, loss: -0.6931, total reward: -1.0000
len3 Swap
[35, 23, 9]
Episode 13002, loss: -0.7656, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[40, 48, 74]
Episode 13003, loss: -0.7993, total reward: -0.0700
len3 Swap
[98, 1, 63]
Episode 13004, loss: -0.7849, total reward: -1.0000
len3 Swap
[18, 89, 93]
Episode 13005, loss: -0.7573, total reward: -1.0000
len3 Swap
[70, 4, 24]
Episode 13006, loss: -0.7296, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[74, 53, 1]
Episode 13007, loss: -0.6896, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[61, 75, 38]
Episode 13008, loss: -0.7628, total reward: -0.0800
len3 Swap
[61, 6

Episode 13068, loss: -0.8872, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 1 more Comparison 2 2 equal Comparison 2 2 equal
[12, 20, 38]
Episode 13069, loss: -0.8622, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 1 0 more Comparison 2 2 equal Comparison 1 2 less
[39, 53, 76]
Episode 13070, loss: -0.8944, total reward: -0.0600
len3 Comparison 1 1 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[17, 2, 89]
Episode 13071, loss: -1.3282, total reward: -0.0800
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 1 2 more Comparison 2 0 more
[6, 97, 30]
Episode 13072, loss: -1.4335, total reward: -0.0500
len3 Swap
[46, 42, 69]
Episode 13073, loss: -0.6413, total reward: -1.0000
len3 Comparison 2 1 more Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal
[57, 82, 99]
Episode 13074, loss: -0.7350, total reward: -0.0600
len3 Swap
[94, 67, 32]
Episode 13075, loss: -0.6077, total reward: -1.0000
len3 Swap
[40, 76, 58]
Episode 13076, 

Episode 13145, loss: -0.1963, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[38, 70, 92]
Episode 13146, loss: -0.3961, total reward: -0.0700
len3 Comparison 1 2 less Swap Comparison 2 2 equal Comparison 2 2 equal Comparison 0
[85, 41, 29]
Episode 13147, loss: -1.6399, total reward: -0.1500
len3 Swap
[47, 13, 75]
Episode 13148, loss: -0.6931, total reward: -1.0000
len3 Comparison 1 2 less Comparison 0 2 less Comparison 2 2 equal Comparison 0 2 less
[49, 34, 50]
Episode 13149, loss: -1.4403, total reward: -0.0500
len3 Swap
[37, 87, 31]
Episode 13150, loss: -0.8054, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[69, 89, 30]
Episode 13151, loss: -0.8056, total reward: -0.0800
len3 Swap
[65, 80, 14]
Episode 13152, loss: -0.8469, total reward: -1.0000
len3 Swap
[40, 5, 88]
Episode 13153, loss: -0.8023, total reward: -1.0000
len3 Swap
[46, 32, 6]
Episode 13154, loss

Episode 13215, loss: -0.7974, total reward: -0.1500
len3 Swap
[20, 47, 6]
Episode 13216, loss: -0.6931, total reward: -1.0000
len3 Swap
[65, 20, 33]
Episode 13217, loss: -0.7460, total reward: -1.0000
len3 Swap
[36, 15, 10]
Episode 13218, loss: -0.7941, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 0 more Comparison 2 2 equal
[31, 42, 6]
Episode 13219, loss: -0.8445, total reward: -0.0700
len3 Swap
[11, 19, 94]
Episode 13220, loss: -0.7630, total reward: -1.0000
len3 Swap
[24, 20, 74]
Episode 13221, loss: -0.7115, total reward: -1.0000
len3 Comparison 1 2 less Comparison 1 2 less Comparison 1 2 less Comparison 2 2 equal
[99, 15, 48]
Episode 13222, loss: -1.1799, total reward: -0.0500
len3 Swap
[15, 47, 81]
Episode 13223, loss: -0.7202, total reward: -1.0000
len3 Swap
[42, 41, 22]
Episode 13224, loss: -0.7485, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 1 more Comparison 1 2 less Comparison 2 2 equal
[45, 64, 82]
Episode 13225, loss: 

Episode 13291, loss: -0.8923, total reward: -0.0700
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 0 2 more Swap Comparison 2
[48, 82, 61]
Episode 13292, loss: -1.7555, total reward: 0.0600
len3 Swap
[6, 41, 27]
Episode 13293, loss: -0.9352, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[55, 50, 16]
Episode 13294, loss: -0.7233, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Comparison 2 1 more
[69, 35, 91]
Episode 13295, loss: 0.1461, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[46, 2, 61]
Episode 13296, loss: -0.6743, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 0 2 equal Swap Comparison 0 2 equal Comparison 0
[2, 79, 2]
Episode 13297, loss: -0.7684, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[35, 43, 61]
Episode 13298, los

len3 Comparison 1 2 more Comparison 2 2 equal Comparison 0 1 less Comparison 0 2 more
[38, 39, 33]
Episode 13364, loss: -1.1295, total reward: -0.0500
len3 Swap
[99, 52, 38]
Episode 13365, loss: -0.6976, total reward: -1.0000
len3 Swap
[77, 84, 82]
Episode 13366, loss: -0.7087, total reward: -1.0000
len3 Swap
[18, 76, 6]
Episode 13367, loss: -0.7092, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less Swap Comparison 0
[67, 24, 19]
Episode 13368, loss: -0.9992, total reward: -0.1500
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more
[31, 60, 56]
Episode 13369, loss: -1.0287, total reward: -0.0700
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more
[78, 63, 44]
Episode 13370, loss: -1.7920, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal
[18, 88, 100]
Episode 13371, loss: -0.9835, total reward: -0.0700
len3 Comparison 

len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more
[27, 61, 25]
Episode 13437, loss: -1.6826, total reward: -0.0600
len3 Swap
[6, 34, 17]
Episode 13438, loss: -0.6778, total reward: -1.0000
len3 Swap
[10, 44, 94]
Episode 13439, loss: -0.7166, total reward: -1.0000
len3 Swap
[28, 29, 6]
Episode 13440, loss: -0.7131, total reward: -1.0000
len3 Swap
[6, 94, 79]
Episode 13441, loss: -0.6875, total reward: -1.0000
len3 Swap
[31, 48, 95]
Episode 13442, loss: -0.7190, total reward: -1.0000
len3 Comparison 0 2 more Comparison 0 0 equal Comparison 2 2 equal Comparison 2 2 equal
[49, 30, 24]
Episode 13443, loss: -1.8586, total reward: -0.0700
len3 Swap
[41, 58, 56]
Episode 13444, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal Swap Comparison 2
[17, 5, 35]
Episode 13445, loss: -0.8249, total reward: -0.0600
len3 Swap
[28, 25, 24]
Episode 13446, loss: -0.7502, total reward: -1.0000
len3 Swap
[11, 62, 75]
E

len3 Comparison 1 2 more Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal
[75, 58, 18]
Episode 13510, loss: -1.7964, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less Comparison 1 2 more
[57, 97, 70]
Episode 13511, loss: -0.8698, total reward: -0.0600
len3 Swap
[76, 81, 72]
Episode 13512, loss: -0.6966, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more Comparison 0 2 more
[75, 68, 27]
Episode 13513, loss: -0.9093, total reward: -0.0600
len3 Swap
[3, 35, 40]
Episode 13514, loss: -0.7369, total reward: -1.0000
len3 Swap
[46, 37, 51]
Episode 13515, loss: -0.7087, total reward: -1.0000
len3 Swap
[36, 39, 64]
Episode 13516, loss: -0.7255, total reward: -1.0000
len3 Swap
[47, 44, 33]
Episode 13517, loss: -0.6989, total reward: -1.0000
len3 Swap
[72, 73, 3]
Episode 13518, loss: -0.6859, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[9, 1, 9

len3 Comparison 2 2 equal Comparison 0 2 more Comparison 1 1 equal Swap Comparison 2
[93, 74, 83]
Episode 13585, loss: -0.9695, total reward: -0.0600
len3 Swap
[12, 85, 18]
Episode 13586, loss: -0.7309, total reward: -1.0000
len3 Swap
[46, 86, 55]
Episode 13587, loss: -0.7473, total reward: -1.0000
len3 Swap
[12, 97, 91]
Episode 13588, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal
[55, 52, 10]
Episode 13589, loss: -0.9755, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less Comparison 0 2 less
[57, 49, 82]
Episode 13590, loss: -0.9041, total reward: -0.0600
len3 Swap
[33, 32, 57]
Episode 13591, loss: -0.7981, total reward: -1.0000
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 1 2 less Comparison 0 0 equal
[98, 21, 22]
Episode 13592, loss: -1.4133, total reward: -0.0600
len3 Comparison 0 2 more Comparison 1 2 more Comparison 1 2 more Comparison 2 2 equal
[64, 45

Episode 13654, loss: -0.7000, total reward: -1.0000
len3 Swap
[30, 89, 51]
Episode 13655, loss: -0.6886, total reward: -1.0000
len3 Comparison 0 2 less Comparison 2 2 equal Comparison 1 2 less Comparison 0 2 less
[67, 7, 82]
Episode 13656, loss: -1.5589, total reward: -0.0500
len3 Swap
[38, 25, 9]
Episode 13657, loss: -0.6848, total reward: -1.0000
len3 Comparison 1 2 more Comparison 1 2 more Comparison 1 0 more Comparison 2 2 equal
[16, 100, 19]
Episode 13658, loss: -1.5252, total reward: -0.0500
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 0 2 less Comparison 1 2 less
[31, 38, 55]
Episode 13659, loss: -0.2443, total reward: -0.0500
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 1 2 more Comparison 1 2 more
[4, 49, 11]
Episode 13660, loss: -1.6734, total reward: -0.0500
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 1 equal Comparison 2 2 equal
[48, 55, 10]
Episode 13661, loss: -1.0732, total reward: -0.0800
len3 Swap
[62, 58, 90]
Episode 13662, loss

Episode 13728, loss: -1.7818, total reward: -0.0600
len3 Comparison 0 2 more Swap Comparison 1 2 less Comparison 0 2 less Swap Comparison
[90, 42, 87]
Episode 13729, loss: 2.1263, total reward: -0.0300
len3 Swap
[22, 100, 91]
Episode 13730, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 1 less Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal
[66, 72, 14]
Episode 13731, loss: -0.2833, total reward: -0.0600
len3 Swap
[17, 90, 21]
Episode 13732, loss: -0.7873, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 1 2 more Comparison 2 1 less
[8, 81, 6]
Episode 13733, loss: -0.7810, total reward: -0.0500
len3 Swap
[59, 24, 50]
Episode 13734, loss: -0.8576, total reward: -1.0000
len3 Swap
[22, 30, 91]
Episode 13735, loss: -0.8310, total reward: -1.0000
len3 Swap
[12, 56, 62]
Episode 13736, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 0 2 more Comparison 0 2 more
[11, 11, 6]
Episode 13737, loss: -0

Episode 13799, loss: -1.3282, total reward: -0.0500
len3 Swap
[41, 74, 9]
Episode 13800, loss: -0.8902, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 0 2 less Comparison 2 2 equal
[83, 75, 97]
Episode 13801, loss: -0.7722, total reward: -0.0600
len3 Swap
[91, 12, 85]
Episode 13802, loss: -0.8962, total reward: -1.0000
len3 Swap
[13, 64, 94]
Episode 13803, loss: -0.6931, total reward: -1.0000
len3 Swap
[79, 81, 50]
Episode 13804, loss: -0.8969, total reward: -1.0000
len3 Comparison 1 2 less Comparison 1 2 less Comparison 1 2 less Comparison 0 2 less
[77, 3, 96]
Episode 13805, loss: -1.2871, total reward: -0.0400
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less Comparison 1 2 less
[17, 36, 46]
Episode 13806, loss: -0.8131, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal Comparison 0 2 more
[100, 41, 95]
Episode 13807, loss: -0.7361, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 1 2 mo

Episode 13872, loss: -1.7000, total reward: -0.0700
len3 Swap
[38, 44, 95]
Episode 13873, loss: -0.9173, total reward: -1.0000
len3 Swap
[62, 50, 86]
Episode 13874, loss: -0.9114, total reward: -1.0000
len3 Swap
[94, 42, 56]
Episode 13875, loss: -0.9035, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal Comparison 0 2 more
[42, 40, 41]
Episode 13876, loss: -0.6785, total reward: -0.0600
len3 Swap
[68, 46, 6]
Episode 13877, loss: -0.8519, total reward: -1.0000
len3 Comparison 0 2 more Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal
[94, 28, 30]
Episode 13878, loss: -0.4037, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[72, 74, 82]
Episode 13879, loss: -0.6970, total reward: -0.0800
len3 Comparison 0 1 less Comparison 2 0 more Comparison 2 2 equal Swap Comparison 2
[14, 91, 31]
Episode 13880, loss: -1.3064, total reward: -0.0500
len3 Swap
[63, 59, 47]
Episode 13881, loss

Episode 13943, loss: -0.6972, total reward: 9.9400
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 0 more Comparison 2 2 equal
[16, 63, 77]
Episode 13944, loss: -0.8131, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more Comparison 0 2 more
[18, 35, 8]
Episode 13945, loss: -0.3957, total reward: -0.0600
len3 Swap
[22, 21, 14]
Episode 13946, loss: -0.7836, total reward: -1.0000
len3 Swap
[49, 54, 97]
Episode 13947, loss: -0.8258, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal Comparison 1 1 equal
[35, 31, 87]
Episode 13948, loss: 0.1455, total reward: -0.0700
len3 Swap
[24, 39, 93]
Episode 13949, loss: -0.8029, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[33, 92, 7]
Episode 13950, loss: -0.7691, total reward: -0.0800
len3 Swap
[91, 81, 78]
Episode 13951, loss: -0.7815, total reward: -1.0000
len3 Swap
[23, 67, 48]
Episode 13952, los

Episode 14015, loss: -0.7015, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 0 more
[3, 95, 96]
Episode 14016, loss: -0.7659, total reward: -0.0700
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 1 2 more Swap Comparison 2
[90, 28, 73]
Episode 14017, loss: -1.9450, total reward: 0.0600
len3 Swap
[74, 51, 61]
Episode 14018, loss: -0.7395, total reward: -1.0000
len3 Swap
[32, 45, 74]
Episode 14019, loss: -0.8545, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 1 equal Comparison 0 2 less
[2, 55, 11]
Episode 14020, loss: -0.7745, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 0 2 more Swap Comparison 2 2 equal Comparison 2
[51, 92, 84]
Episode 14021, loss: 0.0810, total reward: 0.0500
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Swap
[3, 48, 76]
Episode 14022, loss: -0.6911, total reward: 9.9400
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 

len3 Comparison 1 2 less Comparison 2 2 equal Comparison 1 2 less Comparison 0 1 less
[11, 87, 88]
Episode 14086, loss: -2.0348, total reward: -0.0500
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[37, 71, 61]
Episode 14087, loss: -0.8834, total reward: -0.0800
len3 Swap
[13, 48, 35]
Episode 14088, loss: -0.6931, total reward: -1.0000
len3 Swap
[89, 56, 26]
Episode 14089, loss: -0.7146, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 0 more Comparison 2 2 equal
[68, 53, 78]
Episode 14090, loss: -0.8875, total reward: -0.0700
len3 Swap
[76, 2, 10]
Episode 14091, loss: -0.6827, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Comparison 0 2 less
[71, 81, 85]
Episode 14092, loss: -0.8639, total reward: -0.0600
len3 Swap
[16, 54, 57]
Episode 14093, loss: -0.7279, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[6

Episode 14157, loss: -1.3321, total reward: -0.0700
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[5, 69, 2]
Episode 14158, loss: -1.8545, total reward: -0.0700
len3 Comparison 1 2 less Swap Comparison 2 2 equal Comparison 2 2 equal Comparison 2
[67, 30, 25]
Episode 14159, loss: -1.7061, total reward: -0.1500
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[1, 54, 25]
Episode 14160, loss: -0.9174, total reward: -0.0700
len3 Swap
[50, 25, 18]
Episode 14161, loss: -0.7762, total reward: -1.0000
len3 Swap
[93, 23, 89]
Episode 14162, loss: -0.7328, total reward: -1.0000
len3 Comparison 0 1 more Swap
[16, 18, 69]
Episode 14163, loss: 1.1049, total reward: 9.9900
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[45, 59, 12]
Episode 14164, loss: -0.8594, total reward: -0.0800
len3 Comparison 0 1 less Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal
[79, 90, 3]

Episode 14231, loss: -0.8749, total reward: -0.0700
len3 Swap
[55, 83, 17]
Episode 14232, loss: -0.7602, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 1 less Comparison 2 2 equal Comparison 0 2 less
[42, 84, 88]
Episode 14233, loss: -0.0094, total reward: -0.0600
len3 Swap
[3, 72, 59]
Episode 14234, loss: -0.7686, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal
[18, 97, 23]
Episode 14235, loss: -0.9331, total reward: -0.0700
len3 Comparison 0 2 more Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal
[48, 71, 11]
Episode 14236, loss: -1.0771, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal
[57, 39, 3]
Episode 14237, loss: -0.8735, total reward: -0.0700
len3 Swap
[43, 71, 63]
Episode 14238, loss: -0.7901, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[97, 7, 8]
Episode 14239, l

len3 Comparison 2 2 equal Comparison 2 0 less Comparison 2 2 equal Comparison 2 2 equal
[57, 95, 8]
Episode 14306, loss: -0.8627, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less
[11, 73, 25]
Episode 14307, loss: -0.5949, total reward: -0.0700
len3 Swap
[12, 46, 33]
Episode 14308, loss: -0.9316, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 0 2 less Comparison 2 2 equal
[46, 4, 99]
Episode 14309, loss: -0.7028, total reward: -0.0600
len3 Swap
[32, 80, 50]
Episode 14310, loss: -0.9706, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[93, 95, 20]
Episode 14311, loss: -0.6216, total reward: -0.0800
len3 Swap
[24, 44, 53]
Episode 14312, loss: -0.6931, total reward: -1.0000
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[57, 74, 87]
Episode 14313, loss: -1.8292, total reward: -0.0600
len3 Swap
[92

Episode 14374, loss: -0.7909, total reward: -0.0700
len3 Swap
[56, 6, 92]
Episode 14375, loss: -0.8800, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more
[73, 74, 21]
Episode 14376, loss: -0.7004, total reward: -0.0700
len3 Swap
[53, 64, 78]
Episode 14377, loss: -0.8251, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 0 2 more Comparison 0 0 equal
[69, 50, 29]
Episode 14378, loss: -0.7516, total reward: -0.0600
len3 Comparison 1 2 more Comparison 2 0 less Comparison 0 2 more Comparison 0 2 more
[52, 67, 23]
Episode 14379, loss: -1.7700, total reward: -0.0400
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more
[10, 93, 37]
Episode 14380, loss: -0.7059, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 0 2 less Comparison 2 1 more
[38, 59, 69]
Episode 14381, loss: -0.7343, total reward: -0.0500
len3 Comparison 2 1 less Comparison 2 2 

len3 Comparison 2 2 equal Comparison 2 0 less Comparison 2 2 equal Comparison 2 2 equal
[66, 54, 44]
Episode 14446, loss: -0.6114, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more
[39, 76, 18]
Episode 14447, loss: -0.6913, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal
[10, 86, 23]
Episode 14448, loss: -0.6541, total reward: -0.0700
len3 Swap
[24, 84, 84]
Episode 14449, loss: -0.8648, total reward: -1.0000
len3 Comparison 1 1 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less
[24, 31, 52]
Episode 14450, loss: -0.7537, total reward: -0.0700
len3 Swap
[39, 66, 69]
Episode 14451, loss: -0.8851, total reward: -1.0000
len3 Comparison 2 1 less Comparison 2 2 equal Comparison 0 2 less Comparison 0 2 less
[28, 39, 30]
Episode 14452, loss: -0.1800, total reward: -0.0500
len3 Swap
[92, 95, 52]
Episode 14453, loss: -0.6931, total reward: -1.0000
len3 Swap
[9

Episode 14511, loss: -1.0402, total reward: -0.0700
len3 Comparison 2 1 less Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more
[21, 90, 5]
Episode 14512, loss: -0.5856, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 0 0 equal Comparison 2 2 equal Comparison 1 2 more
[88, 50, 45]
Episode 14513, loss: -0.9201, total reward: -0.0700
len3 Swap
[33, 86, 20]
Episode 14514, loss: -0.5826, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more Comparison 2 1 less
[42, 86, 64]
Episode 14515, loss: -1.0914, total reward: -0.0600
len3 Swap
[38, 58, 61]
Episode 14516, loss: -0.5977, total reward: -1.0000
len3 Comparison 0 2 more Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal
[91, 34, 7]
Episode 14517, loss: -1.7663, total reward: -0.0600
len3 Swap
[7, 96, 82]
Episode 14518, loss: -0.4928, total reward: -1.0000
len3 Swap
[88, 97, 5]
Episode 14519, loss: -0.5497, total reward: -1.0000
len3 Swap
[62, 49, 98]
Episode 14520, loss:

Episode 14588, loss: -1.9591, total reward: -0.0700
len3 Swap
[15, 21, 79]
Episode 14589, loss: -0.6706, total reward: -1.0000
len3 Swap
[98, 94, 52]
Episode 14590, loss: -0.7305, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more
[83, 70, 24]
Episode 14591, loss: -0.8830, total reward: -0.0700
len3 Swap
[51, 51, 52]
Episode 14592, loss: -0.7413, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[83, 17, 1]
Episode 14593, loss: -0.4622, total reward: -0.0800
len3 Swap
[32, 50, 79]
Episode 14594, loss: -0.7398, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal
[23, 95, 47]
Episode 14595, loss: -0.8047, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 2
[66, 42, 10]
Episode 14596, loss: -0.1514, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[8, 14, 79]
Episode 14661, loss: -0.9205, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal
[76, 33, 51]
Episode 14662, loss: -0.8137, total reward: -0.0700
len3 Swap
[26, 56, 60]
Episode 14663, loss: -0.7251, total reward: -1.0000
len3 Swap
[3, 18, 95]
Episode 14664, loss: -0.7154, total reward: -1.0000
len3 Swap
[63, 11, 22]
Episode 14665, loss: -0.6610, total reward: -1.0000
len3 Swap
[32, 83, 13]
Episode 14666, loss: -0.8015, total reward: -1.0000
len3 Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[9, 75, 46]
Episode 14667, loss: -2.1984, total reward: -0.0700
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more
[82, 22, 77]
Episode 14668, loss: -1.3337, total reward: -0.0600
len3 Swap
[69, 17, 8]
Episode 14669, loss: -0.7233, total reward: -1.0000
len3 Swap
[27, 22

Episode 14731, loss: -0.9378, total reward: -0.0500
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 1 2 less Comparison 0 2 less
[2, 71, 81]
Episode 14732, loss: -0.8484, total reward: -0.0500
len3 Swap
[59, 46, 8]
Episode 14733, loss: -0.7856, total reward: -1.0000
len3 Swap
[91, 14, 41]
Episode 14734, loss: -0.7444, total reward: -1.0000
len3 Comparison 2 1 less Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 2
[30, 72, 15]
Episode 14735, loss: -0.4128, total reward: -0.0600
len3 Swap
[18, 42, 43]
Episode 14736, loss: -0.7637, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal
[1, 22, 12]
Episode 14737, loss: -0.9258, total reward: -0.0700
len3 Swap
[19, 90, 20]
Episode 14738, loss: -0.8118, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 0 less Comparison 2 2 equal Comparison 1 2 more
[70, 70, 32]
Episode 14739, loss: -0.7997, total reward: -0.0600
len3 Swap
[23, 90, 36]
Episode 14740, loss: -

len3 Comparison 2 1 less Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more
[15, 78, 57]
Episode 14804, loss: -0.5615, total reward: -0.0600
len3 Swap
[79, 25, 97]
Episode 14805, loss: -0.6930, total reward: -1.0000
len3 Swap
[22, 84, 95]
Episode 14806, loss: -0.6843, total reward: -1.0000
len3 Swap
[42, 85, 14]
Episode 14807, loss: -0.7344, total reward: -1.0000
len3 Swap
[52, 26, 34]
Episode 14808, loss: -0.7394, total reward: -1.0000
len3 Swap
[64, 81, 10]
Episode 14809, loss: -0.7205, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more Comparison 0 1 more
[71, 21, 3]
Episode 14810, loss: -0.9440, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Comparison 1 2 more
[8, 69, 64]
Episode 14811, loss: -0.7614, total reward: -0.0600
len3 Comparison 0 2 less Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal
[12, 64, 20]
Episode 14812, loss: -1.6984, total reward: -0.0600
len3 Comparison 2 2

Episode 14877, loss: -0.8248, total reward: -0.0800
len3 Swap
[91, 47, 26]
Episode 14878, loss: -0.6933, total reward: -1.0000
len3 Swap
[78, 87, 97]
Episode 14879, loss: -0.7525, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 more Swap Comparison 2 1 less Comparison 0
[26, 63, 30]
Episode 14880, loss: 0.0672, total reward: 0.0600
len3 Swap
[40, 28, 90]
Episode 14881, loss: -0.7858, total reward: -1.0000
len3 Swap
[40, 64, 57]
Episode 14882, loss: -0.8337, total reward: -1.0000
len3 Swap
[28, 61, 7]
Episode 14883, loss: -0.7455, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal Comparison 1 2 more
[83, 7, 6]
Episode 14884, loss: -0.1519, total reward: -0.0600
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[89, 66, 68]
Episode 14885, loss: -2.0418, total reward: -0.0700
len3 Swap
[17, 16, 48]
Episode 14886, loss: -0.7899, total reward: -1.0000
len3 Swap
[13, 88, 92]
Episode 14887, loss: -0.82

Episode 14946, loss: -0.9925, total reward: 0.0600
len3 Swap
[89, 93, 3]
Episode 14947, loss: -0.6745, total reward: -1.0000
len3 Swap
[59, 44, 10]
Episode 14948, loss: -0.6931, total reward: -1.0000
len3 Swap
[100, 69, 17]
Episode 14949, loss: -0.6924, total reward: -1.0000
len3 Comparison 0 2 less Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[33, 85, 45]
Episode 14950, loss: -1.8532, total reward: -0.0600
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[2, 100, 84]
Episode 14951, loss: -1.9694, total reward: -0.0600
len3 Swap
[84, 86, 20]
Episode 14952, loss: -0.6195, total reward: -1.0000
len3 Swap
[48, 69, 95]
Episode 14953, loss: -0.6334, total reward: -1.0000
len3 Swap
[8, 9, 86]
Episode 14954, loss: -0.6070, total reward: -1.0000
len3 Swap
[44, 68, 81]
Episode 14955, loss: -0.6564, total reward: -1.0000
len3 Swap
[87, 84, 1]
Episode 14956, loss: -0.6931, total reward: -1.0000
len3 Swap
[64, 49, 18]
Episode 14957, loss: -0.6

len3 Comparison 0 2 equal Swap Comparison 0 2 equal Comparison 0 2 equal Comparison 2
[69, 4, 69]
Episode 15021, loss: -0.2875, total reward: -0.0700
len3 Swap
[23, 39, 67]
Episode 15022, loss: -0.7003, total reward: -1.0000
len3 Swap
[84, 35, 94]
Episode 15023, loss: -0.7421, total reward: -1.0000
len3 Swap
[6, 17, 50]
Episode 15024, loss: -0.6961, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[99, 2, 24]
Episode 15025, loss: -0.8233, total reward: -0.0700
len3 Swap
[5, 38, 1]
Episode 15026, loss: -0.6768, total reward: -1.0000
len3 Swap
[45, 87, 92]
Episode 15027, loss: -0.7474, total reward: -1.0000
len3 Comparison 0 2 less Comparison 1 2 less Comparison 1 2 less Comparison 0 1 less
[10, 59, 92]
Episode 15028, loss: -1.5643, total reward: -0.0400
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[4, 4, 67]
Episode 15029, loss: -0.9481, total reward: -0.0800
len3 Swap
[53, 2, 19]
Ep

len3 Comparison 0 2 less Swap Comparison 2 1 less Comparison 0 1 more Comparison 0
[72, 23, 19]
Episode 15095, loss: -1.4202, total reward: -0.1300
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[70, 4, 64]
Episode 15096, loss: -0.8407, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal
[27, 63, 21]
Episode 15097, loss: -0.8126, total reward: -0.0700
len3 Comparison 2 1 less Comparison 1 2 more Comparison 0 2 more Comparison 2 2 equal
[30, 36, 3]
Episode 15098, loss: -0.4363, total reward: -0.0500
len3 Swap
[96, 62, 76]
Episode 15099, loss: -0.6931, total reward: -1.0000
len3 Swap
[40, 83, 21]
Episode 15100, loss: -0.8388, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[92, 38, 8]
Episode 15101, loss: -0.9009, total reward: -0.0800
len3 Swap
[20, 21, 89]
Episode 15102, loss: -0.7853, total reward: -1.0000
len3 Comparison 1 2

Episode 15165, loss: -0.7633, total reward: -0.0600
len3 Swap
[43, 59, 53]
Episode 15166, loss: -0.8340, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[98, 40, 78]
Episode 15167, loss: -0.8343, total reward: -0.0800
len3 Swap
[83, 33, 24]
Episode 15168, loss: -0.8433, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 less Swap Comparison 1 2 more Comparison 0
[20, 41, 18]
Episode 15169, loss: -0.5675, total reward: -0.1400
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less Swap Comparison 2
[90, 67, 63]
Episode 15170, loss: -0.8666, total reward: -0.1500
len3 Swap
[43, 43, 84]
Episode 15171, loss: -0.6931, total reward: -1.0000
len3 Swap
[20, 92, 24]
Episode 15172, loss: -0.6931, total reward: -1.0000
len3 Swap
[42, 97, 89]
Episode 15173, loss: -0.7929, total reward: -1.0000
len3 Swap
[41, 41, 33]
Episode 15174, loss: -0.8158, total reward: -1.0000
len3 Swap
[19, 34, 38]
Episode 15175, loss: -

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 0 more
[39, 52, 45]
Episode 15239, loss: -0.8734, total reward: -0.0700
len3 Swap
[39, 66, 85]
Episode 15240, loss: -0.8893, total reward: -1.0000
len3 Swap
[45, 17, 62]
Episode 15241, loss: -0.6931, total reward: -1.0000
len3 Swap
[92, 16, 51]
Episode 15242, loss: -0.8868, total reward: -1.0000
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 1 2 more Swap Comparison 0
[58, 22, 50]
Episode 15243, loss: -1.5508, total reward: 0.0600
len3 Swap
[86, 47, 46]
Episode 15244, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 less Swap Comparison 0 2 more Swap Comparison
[89, 40, 94]
Episode 15245, loss: 1.0793, total reward: -0.0400
len3 Swap
[3, 78, 6]
Episode 15246, loss: -0.8296, total reward: -1.0000
len3 Swap
[60, 69, 75]
Episode 15247, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 0
[93, 3, 15]
Ep

Episode 15312, loss: -0.7838, total reward: -0.0600
len3 Swap
[86, 53, 65]
Episode 15313, loss: -0.8081, total reward: -1.0000
len3 Swap
[41, 45, 10]
Episode 15314, loss: -0.8274, total reward: -1.0000
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more
[73, 90, 16]
Episode 15315, loss: -1.6387, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal Swap Comparison 2
[71, 95, 38]
Episode 15316, loss: -0.9280, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal
[6, 42, 40]
Episode 15317, loss: -0.9024, total reward: -0.0700
len3 Swap
[54, 26, 50]
Episode 15318, loss: -0.9119, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 0 2 less Comparison 2 2 equal
[29, 39, 39]
Episode 15319, loss: -0.7717, total reward: -0.0600
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more
[90, 71, 69]
Episode 15320, lo

len3 Comparison 2 0 more Comparison 2 2 equal Comparison 2 2 equal Swap
[21, 64, 79]
Episode 15379, loss: -0.6100, total reward: 9.9500
len3 Swap
[22, 86, 81]
Episode 15380, loss: -0.6308, total reward: -1.0000
len3 Swap
[30, 59, 25]
Episode 15381, loss: -0.6838, total reward: -1.0000
len3 Swap
[29, 81, 22]
Episode 15382, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 0 2 less Comparison 1 2 more
[41, 75, 71]
Episode 15383, loss: -1.0589, total reward: -0.0500
len3 Swap
[28, 32, 17]
Episode 15384, loss: -0.6954, total reward: -1.0000
len3 Swap
[84, 6, 95]
Episode 15385, loss: -0.6931, total reward: -1.0000
len3 Comparison 0 1 less Comparison 1 2 more Comparison 1 0 more Comparison 2 2 equal
[58, 82, 48]
Episode 15386, loss: -1.0219, total reward: -0.0500
len3 Swap
[5, 35, 5]
Episode 15387, loss: -0.5723, total reward: -1.0000
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[18, 51, 11]
Episode 15388,

Episode 15454, loss: -0.5521, total reward: -0.1500
len3 Swap
[4, 97, 12]
Episode 15455, loss: -0.7520, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less
[25, 76, 45]
Episode 15456, loss: -0.8388, total reward: -0.0700
len3 Swap
[27, 20, 56]
Episode 15457, loss: -0.7738, total reward: -1.0000
len3 Swap
[7, 23, 99]
Episode 15458, loss: -0.7183, total reward: -1.0000
len3 Comparison 1 2 more Comparison 1 2 more Comparison 2 0 less Swap Comparison 1
[6, 78, 38]
Episode 15459, loss: -0.6640, total reward: 0.0700
len3 Comparison 2 2 equal Swap
[40, 40, 61]
Episode 15460, loss: -0.1893, total reward: 9.9800
len3 Swap
[70, 62, 37]
Episode 15461, loss: -0.7388, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal Comparison 1 2 less
[55, 26, 39]
Episode 15462, loss: -0.8438, total reward: -0.0600
len3 Swap
[24, 47, 80]
Episode 15463, loss: -0.7321, total reward: -1.0000
len3 Comparison 2 2 equal Com

Episode 15527, loss: -1.2014, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Comparison 1 2 more
[20, 57, 36]
Episode 15528, loss: -0.9097, total reward: -0.0600
len3 Swap
[17, 98, 43]
Episode 15529, loss: -0.6722, total reward: -1.0000
len3 Swap
[63, 27, 84]
Episode 15530, loss: -0.7341, total reward: -1.0000
len3 Swap
[93, 54, 42]
Episode 15531, loss: -0.7387, total reward: -1.0000
len3 Comparison 0 2 less Comparison 2 2 equal Comparison 0 2 less Comparison 0 2 less
[73, 17, 96]
Episode 15532, loss: -1.8080, total reward: -0.0500
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[67, 98, 53]
Episode 15533, loss: -1.2389, total reward: -0.0800
len3 Swap
[10, 89, 43]
Episode 15534, loss: -0.8049, total reward: -1.0000
len3 Swap
[99, 16, 53]
Episode 15535, loss: -0.6984, total reward: -1.0000
len3 Comparison 1 2 more Comparison 1 2 more Comparison 2 2 equal Comparison 0 2 more
[82, 56, 11]
Episode 15536, lo

len3 Comparison 2 1 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[19, 16, 53]
Episode 15601, loss: -0.7768, total reward: -0.0700
len3 Swap
[17, 17, 50]
Episode 15602, loss: -0.6946, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal
[35, 47, 41]
Episode 15603, loss: -0.9108, total reward: -0.0700
len3 Comparison 1 2 more Comparison 1 2 more Comparison 2 2 equal Comparison 1 2 more
[60, 56, 42]
Episode 15604, loss: -1.4103, total reward: -0.0500
len3 Comparison 0 2 less Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal
[59, 24, 68]
Episode 15605, loss: -1.4898, total reward: -0.0600
len3 Comparison 2 2 equal Swap
[7, 23, 36]
Episode 15606, loss: -0.0934, total reward: 9.9800
len3 Swap
[91, 11, 48]
Episode 15607, loss: -0.6300, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[100, 87, 59]
Episode 15608, loss: -1.0076, total reward: 

Episode 15673, loss: -0.8978, total reward: -0.0600
len3 Comparison 1 2 more Comparison 0 2 less Comparison 2 2 equal Swap Comparison 2
[23, 82, 68]
Episode 15674, loss: -1.1501, total reward: -0.0500
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 0 2 less Comparison 2 2 equal
[34, 13, 98]
Episode 15675, loss: -1.0310, total reward: -0.0600
len3 Swap
[48, 16, 40]
Episode 15676, loss: -0.7039, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 0 2 less Comparison 2
[84, 77, 87]
Episode 15677, loss: -0.3909, total reward: -0.0600
len3 Swap
[47, 84, 91]
Episode 15678, loss: -0.6488, total reward: -1.0000
len3 Comparison 2 0 more Comparison 0 2 less Comparison 1 2 less Comparison 2 2 equal
[32, 71, 97]
Episode 15679, loss: -0.6396, total reward: -0.0500
len3 Swap
[32, 81, 64]
Episode 15680, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Swap Comparison 1 1 equal Comparison 1 0 less Comparison 2
[17, 11, 44]
Episode 15681, loss: -0

len3 Comparison 2 2 equal Swap
[14, 54, 84]
Episode 15752, loss: 0.0163, total reward: 9.9800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal
[1, 21, 83]
Episode 15753, loss: -0.8504, total reward: -0.0700
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 0 2 more Comparison 0 2 more
[81, 100, 19]
Episode 15754, loss: -1.5322, total reward: -0.0500
len3 Comparison 1 2 less Comparison 1 2 less Comparison 1 2 less Comparison 2 2 equal
[88, 10, 59]
Episode 15755, loss: -1.2435, total reward: -0.0500
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 0 2 less Comparison 2 2 equal
[45, 41, 65]
Episode 15756, loss: -0.7291, total reward: -0.0600
len3 Comparison 2 2 equal Swap Swap
[44, 21, 61]
Episode 15757, loss: 0.5340, total reward: -1.0300
len3 Swap
[13, 8, 23]
Episode 15758, loss: -0.8282, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[45, 96, 50]
Episode 15759, loss: -0

Episode 15820, loss: -1.3450, total reward: -0.0500
len3 Swap
[2, 46, 100]
Episode 15821, loss: -0.8318, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[13, 65, 84]
Episode 15822, loss: -0.7630, total reward: -0.0700
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[34, 47, 84]
Episode 15823, loss: -1.6259, total reward: -0.0700
len3 Swap
[63, 59, 84]
Episode 15824, loss: -0.8067, total reward: -1.0000
len3 Swap
[40, 51, 93]
Episode 15825, loss: -0.8430, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 1 less Comparison 1 2 more Comparison 1 2 more
[100, 89, 48]
Episode 15826, loss: -0.0734, total reward: -0.0500
len3 Swap
[19, 63, 38]
Episode 15827, loss: -0.8282, total reward: -1.0000
len3 Swap
[23, 33, 34]
Episode 15828, loss: -0.6931, total reward: -1.0000
len3 Swap
[90, 16, 24]
Episode 15829, loss: -0.8661, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[7, 77, 58]
Episode 15895, loss: -0.8796, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[2, 56, 59]
Episode 15896, loss: -0.7284, total reward: -0.0700
len3 Swap
[27, 88, 98]
Episode 15897, loss: -0.9318, total reward: -1.0000
len3 Comparison 2 2 equal Swap Comparison 0 2 less Comparison 2 2 equal Comparison 2
[68, 20, 99]
Episode 15898, loss: -0.0664, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal
[53, 7, 35]
Episode 15899, loss: -0.7000, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[53, 27, 14]
Episode 15900, loss: -0.7293, total reward: -0.0800
len3 Swap
[79, 100, 60]
Episode 15901, loss: -0.9622, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal

Episode 15962, loss: -1.8534, total reward: -0.0600
len3 Comparison 0 2 less Comparison 0 2 less Comparison 0 2 less Comparison 1 2 less
[8, 31, 82]
Episode 15963, loss: -1.2353, total reward: -0.0400
len3 Swap
[14, 89, 86]
Episode 15964, loss: -0.6778, total reward: -1.0000
len3 Swap
[20, 99, 66]
Episode 15965, loss: -0.6870, total reward: -1.0000
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more
[92, 29, 69]
Episode 15966, loss: -1.6701, total reward: -0.0600
len3 Swap
[36, 7, 71]
Episode 15967, loss: -0.6792, total reward: -1.0000
len3 Swap
[37, 34, 16]
Episode 15968, loss: -0.6931, total reward: -1.0000
len3 Swap
[45, 20, 85]
Episode 15969, loss: -0.6710, total reward: -1.0000
len3 Swap
[80, 31, 86]
Episode 15970, loss: -0.6788, total reward: -1.0000
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 0 2 more Comparison 0 0 equal
[66, 64, 62]
Episode 15971, loss: -1.3954, total reward: -0.0600
len3 Swap
[82, 62, 37]
Episode 15972, loss: -0

Episode 16037, loss: 0.1376, total reward: -0.0700
len3 Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 2 2 equal Comparison 2
[3, 57, 40]
Episode 16038, loss: 0.0382, total reward: -0.0700
len3 Swap
[98, 39, 67]
Episode 16039, loss: -0.8263, total reward: -1.0000
len3 Swap
[13, 6, 48]
Episode 16040, loss: -0.8020, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[43, 96, 51]
Episode 16041, loss: -0.2664, total reward: -0.0700
len3 Swap
[66, 92, 41]
Episode 16042, loss: -0.7920, total reward: -1.0000
len3 Swap
[9, 6, 77]
Episode 16043, loss: -0.7701, total reward: -1.0000
len3 Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[2, 32, 55]
Episode 16044, loss: -2.0629, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[35, 14, 76]
Episode 16045, loss: -0.8365, total reward: -0.0800
len3 Swap
[51, 32, 4]
Episode 16046, loss: -0

len3 Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[72, 69, 73]
Episode 16117, loss: -1.0797, total reward: -0.0700
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less
[34, 46, 90]
Episode 16118, loss: -1.8119, total reward: -0.0600
len3 Swap
[62, 53, 47]
Episode 16119, loss: -1.1194, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[50, 66, 55]
Episode 16120, loss: -0.5304, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[72, 80, 92]
Episode 16121, loss: -0.5681, total reward: -0.0700
len3 Swap
[59, 22, 60]
Episode 16122, loss: -0.6931, total reward: -1.0000
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[18, 87, 32]
Episode 16123, loss: -1.8038, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 1 more Comparison 2 2 equa

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[100, 95, 57]
Episode 16185, loss: -0.8385, total reward: -0.0700
len3 Comparison 2 0 less Comparison 2 2 equal Comparison 1 1 equal Comparison 2 2 equal
[96, 54, 48]
Episode 16186, loss: -0.5764, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 1 0 more Comparison 2 2 equal Comparison 1 2 less
[73, 76, 83]
Episode 16187, loss: -0.7782, total reward: -0.0600
len3 Swap
[74, 68, 95]
Episode 16188, loss: -0.8697, total reward: -1.0000
len3 Swap
[44, 77, 80]
Episode 16189, loss: -0.8263, total reward: -1.0000
len3 Swap
[56, 99, 50]
Episode 16190, loss: -0.6931, total reward: -1.0000
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal
[52, 27, 70]
Episode 16191, loss: -1.7648, total reward: -0.0600
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less
[6, 97, 40]
Episode 16192, loss: -1.6374, total reward: -0.0600
len3 Swap
[98

len3 Swap
[82, 2, 12]
Episode 16259, loss: -0.8946, total reward: -1.0000
len3 Swap
[31, 88, 58]
Episode 16260, loss: -0.8503, total reward: -1.0000
len3 Comparison 0 2 less Comparison 2 2 equal Comparison 1 2 less Comparison 1 2 less
[4, 11, 89]
Episode 16261, loss: -1.2425, total reward: -0.0500
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal
[29, 9, 77]
Episode 16262, loss: -1.2763, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 1 less Comparison 2 2 equal Comparison 2 2 equal
[47, 42, 4]
Episode 16263, loss: -0.7098, total reward: -0.0700
len3 Swap
[15, 95, 99]
Episode 16264, loss: -0.8611, total reward: -1.0000
len3 Comparison 2 1 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[5, 59, 94]
Episode 16265, loss: -0.4157, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 0 2 more Comparison 2 2 equal
[10, 45, 5]
Episode 16266, loss: -0.8093, total reward: -0.0600
len3 Comparison 2 1 

len3 Comparison 2 2 equal Comparison 1 2 less Comparison 0 2 more Comparison 2 2 equal
[94, 81, 82]
Episode 16330, loss: -0.9078, total reward: -0.0600
len3 Swap
[36, 53, 29]
Episode 16331, loss: -0.7067, total reward: -1.0000
len3 Swap
[64, 21, 58]
Episode 16332, loss: -0.7270, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 1 less
[91, 69, 50]
Episode 16333, loss: -0.8687, total reward: -0.0700
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 1
[5, 47, 26]
Episode 16334, loss: -1.8632, total reward: -0.0600
len3 Swap
[86, 34, 15]
Episode 16335, loss: -0.7908, total reward: -1.0000
len3 Swap
[59, 32, 48]
Episode 16336, loss: -0.7407, total reward: -1.0000
len3 Swap
[29, 59, 8]
Episode 16337, loss: -0.7583, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[12, 90, 34]
Episode 16338, loss: -0.8339, total reward: -0.0800
len3 Swap
[53, 7, 

len3 Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[84, 52, 17]
Episode 16401, loss: -1.7007, total reward: -0.0700
len3 Comparison 0 0 equal Comparison 1 2 less Comparison 2 2 equal Comparison 2 1 more
[66, 65, 90]
Episode 16402, loss: -1.0935, total reward: -0.0600
len3 Swap
[63, 67, 95]
Episode 16403, loss: -0.6876, total reward: -1.0000
len3 Comparison 1 2 more Comparison 0 2 more Comparison 2 2 equal Comparison 2 1 less
[83, 69, 59]
Episode 16404, loss: -1.4468, total reward: -0.0500
len3 Swap
[52, 14, 91]
Episode 16405, loss: -0.6122, total reward: -1.0000
len3 Swap
[62, 90, 74]
Episode 16406, loss: -0.6931, total reward: -1.0000
len3 Swap
[78, 30, 18]
Episode 16407, loss: -0.6912, total reward: -1.0000
len3 Swap
[40, 9, 48]
Episode 16408, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal
[67, 91, 82]
Episode 16409, loss: -0.9536, total reward: -0.0700
len3 Swap
[68, 95

len3 Comparison 1 2 less Comparison 0 2 less Comparison 0 2 less Comparison 1 2 less
[74, 14, 100]
Episode 16481, loss: -1.2438, total reward: -0.0400
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal
[68, 59, 66]
Episode 16482, loss: -0.6675, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal
[11, 69, 29]
Episode 16483, loss: -0.9581, total reward: -0.0700
len3 Swap
[32, 52, 98]
Episode 16484, loss: -0.8768, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 more Swap Comparison 1 2 less Comparison 2
[98, 4, 35]
Episode 16485, loss: 0.1000, total reward: 0.0600
len3 Swap
[7, 36, 74]
Episode 16486, loss: -0.8432, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal
[96, 68, 49]
Episode 16487, loss: -0.6895, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal
[99, 67

Episode 16551, loss: -1.4507, total reward: -0.0500
len3 Swap
[66, 20, 9]
Episode 16552, loss: -0.7890, total reward: -1.0000
len3 Comparison 0 2 less Comparison 2 2 equal Comparison 1 2 more Swap
[17, 51, 54]
Episode 16553, loss: -1.4080, total reward: 9.9600
len3 Swap
[31, 2, 27]
Episode 16554, loss: -0.8064, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal Comparison 1 2 less
[71, 51, 69]
Episode 16555, loss: -0.8193, total reward: -0.0600
len3 Swap
[11, 15, 71]
Episode 16556, loss: -0.7840, total reward: -1.0000
len3 Swap
[98, 45, 26]
Episode 16557, loss: -0.7553, total reward: -1.0000
len3 Swap
[47, 72, 79]
Episode 16558, loss: -0.6931, total reward: -1.0000
len3 Swap
[48, 5, 74]
Episode 16559, loss: -0.8371, total reward: -1.0000
len3 Comparison 0 2 less Comparison 0 2 less Comparison 0 2 less Comparison 1 2 less
[38, 5, 81]
Episode 16560, loss: -1.4417, total reward: -0.0400
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 1 2 more

len3 Comparison 2 2 equal Comparison 0 2 more Comparison 0 2 more Comparison 0 2 more
[65, 59, 29]
Episode 16622, loss: -0.8961, total reward: -0.0500
len3 Swap
[69, 63, 54]
Episode 16623, loss: -0.7718, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 0 2 more Swap Comparison 2
[12, 65, 17]
Episode 16624, loss: -0.9336, total reward: 0.0600
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal Comparison 1 2 more
[21, 39, 38]
Episode 16625, loss: -0.8569, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[85, 55, 4]
Episode 16626, loss: -0.9210, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 1 2 less Comparison 1 2 less Comparison 0 2 less
[76, 63, 88]
Episode 16627, loss: -0.9609, total reward: -0.0500
len3 Swap
[55, 80, 8]
Episode 16628, loss: -0.6752, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[51, 56

Episode 16698, loss: -0.6086, total reward: -1.0000
len3 Swap
[7, 64, 13]
Episode 16699, loss: -0.6466, total reward: -1.0000
len3 Swap
[79, 38, 83]
Episode 16700, loss: -0.6442, total reward: -1.0000
len3 Swap
[92, 8, 100]
Episode 16701, loss: -0.6515, total reward: -1.0000
len3 Comparison 2 2 equal Swap Comparison 0 2 more Comparison 1 2 more Comparison 2
[77, 27, 19]
Episode 16702, loss: -0.2779, total reward: -0.0500
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less
[30, 11, 85]
Episode 16703, loss: -2.1022, total reward: -0.0600
len3 Swap
[76, 14, 68]
Episode 16704, loss: -0.6565, total reward: -1.0000
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 0 2 more Comparison 1 2 more
[92, 29, 25]
Episode 16705, loss: -2.2780, total reward: -0.0500
len3 Swap
[23, 32, 9]
Episode 16706, loss: -0.6903, total reward: -1.0000
len3 Swap
[6, 5, 72]
Episode 16707, loss: -0.6829, total reward: -1.0000
len3 Swap
[33, 55, 59]
Episode 16708, loss: -0.703

len3 Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less
[28, 84, 93]
Episode 16772, loss: -1.9090, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal Comparison 0 2 more
[52, 65, 13]
Episode 16773, loss: -0.6670, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal Comparison 0 2 more
[79, 83, 10]
Episode 16774, loss: -0.6916, total reward: -0.0600
len3 Comparison 1 2 less Comparison 0 2 more Comparison 0 2 more Comparison 0 2 more
[99, 48, 73]
Episode 16775, loss: -1.6423, total reward: -0.0400
len3 Comparison 2 1 more Comparison 0 2 less Comparison 0 2 less Comparison 0 2 less
[91, 51, 92]
Episode 16776, loss: -0.7103, total reward: -0.0400
len3 Swap
[14, 84, 3]
Episode 16777, loss: -0.6931, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 0 2 less Comparison 1 2 more
[9, 65, 10]
Episode 16778, loss: -0.7781, total reward: -0.0500
len3 Comparison 2 2 

len3 Comparison 2 1 less Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 0
[52, 81, 48]
Episode 16842, loss: -0.1908, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal
[80, 65, 4]
Episode 16843, loss: -1.0254, total reward: -0.0700
len3 Swap
[91, 1, 22]
Episode 16844, loss: -0.6920, total reward: -1.0000
len3 Swap
[14, 62, 24]
Episode 16845, loss: -0.6783, total reward: -1.0000
len3 Comparison 1 2 less Comparison 0 2 more Comparison 0 2 more Comparison 0 2 more
[52, 26, 34]
Episode 16846, loss: -1.3036, total reward: -0.0400
len3 Swap
[14, 78, 54]
Episode 16847, loss: -0.6792, total reward: -1.0000
len3 Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal Swap Swap
[55, 28, 78]
Episode 16848, loss: -0.1162, total reward: -1.0600
len3 Swap
[19, 93, 42]
Episode 16849, loss: -0.6143, total reward: -1.0000
len3 Swap
[83, 3, 56]
Episode 16850, loss: -0.6742, total reward: -1.0000
len3 Swap
[18, 30, 63]
Episode 16

Episode 16918, loss: -0.6097, total reward: -1.0000
len3 Swap
[47, 32, 78]
Episode 16919, loss: -0.6455, total reward: -1.0000
len3 Swap
[49, 82, 35]
Episode 16920, loss: -0.6559, total reward: -1.0000
len3 Swap
[38, 12, 95]
Episode 16921, loss: -0.6038, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less Comparison 0 0 equal
[2, 67, 70]
Episode 16922, loss: -0.9546, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal
[1, 81, 78]
Episode 16923, loss: -0.8710, total reward: -0.0700
len3 Swap
[76, 82, 46]
Episode 16924, loss: -0.6613, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 0 equal Comparison 0 1 less
[13, 68, 89]
Episode 16925, loss: -0.1012, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 1 2 more Comparison 1 2 more
[54, 48, 12]
Episode 16926, loss: -0.8996, total reward: -0.0500
len3 Swap
[78, 77, 1]
Episode 16927, los

len3 Comparison 2 2 equal Comparison 2 1 less Comparison 2 2 equal Comparison 0 2 more
[96, 59, 41]
Episode 16992, loss: -0.7183, total reward: -0.0600
len3 Swap
[1, 79, 20]
Episode 16993, loss: -0.6297, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Comparison 0 2 less
[54, 26, 55]
Episode 16994, loss: -0.1019, total reward: -0.0600
len3 Swap
[15, 73, 33]
Episode 16995, loss: -0.6328, total reward: -1.0000
len3 Swap
[46, 65, 52]
Episode 16996, loss: -0.6270, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 2 2 equal Swap Comparison
[58, 75, 12]
Episode 16997, loss: -0.1198, total reward: -0.0800
len3 Swap
[95, 56, 95]
Episode 16998, loss: -0.6403, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less Comparison 1 2 less
[1, 4, 27]
Episode 16999, loss: -0.9370, total reward: -0.0600
len3 Swap
[99, 65, 32]
Episode 17000, loss: -0.6677, total reward: -1.0000
len3 Swap
[14, 53,

len3 Comparison 0 2 less Comparison 2 2 equal Comparison 2 1 less Comparison 2 2 equal
[8, 87, 51]
Episode 17068, loss: -1.3547, total reward: -0.0600
len3 Swap
[67, 42, 53]
Episode 17069, loss: -0.8187, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 2
[27, 99, 83]
Episode 17070, loss: -0.7740, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[66, 44, 20]
Episode 17071, loss: -0.7345, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 0 0 equal
[17, 8, 86]
Episode 17072, loss: -0.8473, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[22, 75, 5]
Episode 17073, loss: -0.8366, total reward: -0.0800
len3 Swap
[2, 6, 58]
Episode 17074, loss: -0.8677, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more Comparison 0 2 more
[30

len3 Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[44, 13, 78]
Episode 17133, loss: -2.3257, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Swap
[13, 34, 64]
Episode 17134, loss: -0.8630, total reward: 9.9400
len3 Swap
[49, 41, 95]
Episode 17135, loss: -0.6611, total reward: -1.0000
len3 Swap
[39, 14, 9]
Episode 17136, loss: -0.5490, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 1 2 more Comparison 2
[100, 97, 18]
Episode 17137, loss: -1.0806, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[13, 66, 43]
Episode 17138, loss: -0.9788, total reward: -0.0800
len3 Swap
[61, 62, 46]
Episode 17139, loss: -0.5752, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[100, 21, 23]
Episode 17140, loss: -1.0218, total reward: -0.0700
len3 Comparison 2 2 equal C

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[68, 92, 96]
Episode 17205, loss: -0.8642, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal
[43, 28, 65]
Episode 17206, loss: -1.0652, total reward: -0.0700
len3 Swap
[53, 6, 20]
Episode 17207, loss: -0.5585, total reward: -1.0000
len3 Swap
[67, 29, 69]
Episode 17208, loss: -0.4700, total reward: -1.0000
len3 Swap
[71, 28, 4]
Episode 17209, loss: -0.5821, total reward: -1.0000
len3 Swap
[85, 14, 59]
Episode 17210, loss: -0.5919, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal
[30, 87, 55]
Episode 17211, loss: -1.1470, total reward: -0.0700
len3 Swap
[12, 36, 38]
Episode 17212, loss: -0.5507, total reward: -1.0000
len3 Swap
[59, 5, 38]
Episode 17213, loss: -0.5633, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Swap Swap
[9, 82, 76]
Episode 17214, loss: 0.9

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 1 equal
[48, 43, 37]
Episode 17279, loss: -0.8885, total reward: -0.0800
len3 Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[69, 47, 83]
Episode 17280, loss: -2.0675, total reward: -0.0700
len3 Swap
[39, 84, 13]
Episode 17281, loss: -0.6385, total reward: -1.0000
len3 Swap
[63, 30, 16]
Episode 17282, loss: -0.6816, total reward: -1.0000
len3 Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[21, 36, 39]
Episode 17283, loss: -2.1338, total reward: -0.0700
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 0 2 more Comparison 0 2 more
[63, 67, 56]
Episode 17284, loss: -1.9937, total reward: -0.0500
len3 Swap
[63, 46, 61]
Episode 17285, loss: -0.6647, total reward: -1.0000
len3 Swap
[62, 3, 67]
Episode 17286, loss: -0.6433, total reward: -1.0000
len3 Swap
[65, 75, 69]
Episode 17287, loss: -0.6016, total reward: -1.0000
len3 Swap
[88, 

len3 Swap
[45, 15, 79]
Episode 17353, loss: -0.7723, total reward: -1.0000
len3 Swap
[93, 91, 29]
Episode 17354, loss: -0.7898, total reward: -1.0000
len3 Swap
[37, 45, 3]
Episode 17355, loss: -0.7333, total reward: -1.0000
len3 Swap
[64, 72, 26]
Episode 17356, loss: -0.8230, total reward: -1.0000
len3 Swap
[35, 33, 61]
Episode 17357, loss: -0.8270, total reward: -1.0000
len3 Swap
[73, 61, 2]
Episode 17358, loss: -0.8174, total reward: -1.0000
len3 Swap
[72, 94, 56]
Episode 17359, loss: -0.7601, total reward: -1.0000
len3 Swap
[84, 69, 76]
Episode 17360, loss: -0.8029, total reward: -1.0000
len3 Swap
[63, 67, 97]
Episode 17361, loss: -0.8193, total reward: -1.0000
len3 Comparison 2 1 more Comparison 0 2 more Comparison 0 2 more Comparison 2 2 equal
[94, 21, 26]
Episode 17362, loss: -0.3224, total reward: -0.0500
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[38, 12, 42]
Episode 17363, loss: -0.7362, total reward: -0.0800
len3 Comparison 2 2 eq

len3 Comparison 2 2 equal Comparison 2 0 less Comparison 2 2 equal Comparison 2 2 equal
[95, 8, 40]
Episode 17432, loss: -0.5897, total reward: -0.0700
len3 Swap
[62, 51, 53]
Episode 17433, loss: -0.9484, total reward: -1.0000
len3 Swap
[90, 4, 29]
Episode 17434, loss: -0.9738, total reward: -1.0000
len3 Swap
[31, 24, 98]
Episode 17435, loss: -0.9322, total reward: -1.0000
len3 Comparison 0 2 more Comparison 1 2 more Comparison 2 2 equal Comparison 2 2 equal
[41, 37, 31]
Episode 17436, loss: -1.5660, total reward: -0.0600
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 0 more
[53, 79, 100]
Episode 17437, loss: -1.1083, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[22, 1, 43]
Episode 17438, loss: 0.2075, total reward: -0.0800
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 0
[69, 11, 11]
Episode 17439, loss: -2.0077, total reward: -0.0600
len3 Comparison 2

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal
[36, 79, 74]
Episode 17500, loss: -0.8489, total reward: -0.0700
len3 Swap
[39, 70, 67]
Episode 17501, loss: -0.7803, total reward: -1.0000
len3 Swap
[36, 13, 16]
Episode 17502, loss: -0.7947, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less Comparison 0 2 less
[50, 36, 100]
Episode 17503, loss: -0.8443, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[82, 48, 87]
Episode 17504, loss: -0.7739, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal
[80, 47, 81]
Episode 17505, loss: -0.7793, total reward: -0.0700
len3 Swap
[52, 13, 24]
Episode 17506, loss: -0.7727, total reward: -1.0000
len3 Swap
[88, 50, 34]
Episode 17507, loss: -0.7494, total reward: -1.0000
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 1 more


len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal
[39, 73, 51]
Episode 17573, loss: -0.9464, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 0 2 more Swap
[86, 89, 90]
Episode 17574, loss: -0.6999, total reward: 9.9700
len3 Swap
[83, 92, 77]
Episode 17575, loss: -0.7027, total reward: -1.0000
len3 Swap
[59, 33, 49]
Episode 17576, loss: -0.5934, total reward: -1.0000
len3 Swap
[46, 4, 88]
Episode 17577, loss: -0.6874, total reward: -1.0000
len3 Swap
[37, 8, 71]
Episode 17578, loss: -0.6955, total reward: -1.0000
len3 Swap
[83, 62, 92]
Episode 17579, loss: -0.6555, total reward: -1.0000
len3 Swap
[54, 84, 50]
Episode 17580, loss: -0.6697, total reward: -1.0000
len3 Swap
[47, 95, 33]
Episode 17581, loss: -0.7066, total reward: -1.0000
len3 Swap
[49, 65, 12]
Episode 17582, loss: -0.6692, total reward: -1.0000
len3 Swap
[15, 61, 83]
Episode 17583, loss: -0.7459, total reward: -1.0000
len3 Swap
[59, 15, 94]
Episode 17584, loss: -0.8096, total 

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[58, 97, 72]
Episode 17647, loss: -0.7993, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less Comparison 1 2 less
[27, 17, 32]
Episode 17648, loss: -0.8888, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal
[3, 40, 40]
Episode 17649, loss: -0.8091, total reward: -0.0700
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 1 2 more Swap Comparison 2
[34, 5, 55]
Episode 17650, loss: -1.7752, total reward: 0.0600
len3 Swap
[44, 66, 22]
Episode 17651, loss: -0.6717, total reward: -1.0000
len3 Swap
[100, 10, 24]
Episode 17652, loss: -0.6924, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[74, 69, 8]
Episode 17653, loss: -0.8552, total reward: -0.0800
len3 Swap
[45, 62, 6]
Episode 17654, loss: -0.7116, total reward: -1.0000
len3 Comparison 2

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 0 more Swap Comparison 2
[64, 2, 41]
Episode 17723, loss: -0.9331, total reward: -0.1500
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[4, 34, 96]
Episode 17724, loss: -0.7141, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[22, 23, 75]
Episode 17725, loss: -0.7135, total reward: -0.0800
len3 Swap
[32, 37, 20]
Episode 17726, loss: -0.8512, total reward: -1.0000
len3 Swap
[62, 33, 90]
Episode 17727, loss: -0.8649, total reward: -1.0000
len3 Swap
[43, 57, 92]
Episode 17728, loss: -0.8281, total reward: -1.0000
len3 Swap
[54, 79, 79]
Episode 17729, loss: -0.8575, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 1 less Comparison 2 2 equal
[72, 76, 65]
Episode 17730, loss: -0.6959, total reward: -0.0700
len3 Swap
[62, 21, 26]
Episode 17731, loss: -0.6931, total reward: -1.0000
len3 Comparison 

Episode 17792, loss: 0.1441, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 0 less
[91, 75, 11]
Episode 17793, loss: -0.7386, total reward: -0.0700
len3 Swap
[29, 24, 8]
Episode 17794, loss: -0.8049, total reward: -1.0000
len3 Swap
[77, 39, 23]
Episode 17795, loss: -0.8270, total reward: -1.0000
len3 Swap
[34, 89, 73]
Episode 17796, loss: -0.8046, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[8, 89, 27]
Episode 17797, loss: -0.7169, total reward: -0.0800
len3 Swap
[46, 92, 98]
Episode 17798, loss: -0.7555, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 0 less Comparison 1 2 more
[87, 98, 15]
Episode 17799, loss: -0.7910, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more Comparison 1 2 more
[72, 33, 32]
Episode 17800, loss: -0.8273, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[59, 1, 19]
Episode 17860, loss: -0.6058, total reward: -0.0800
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[30, 2, 37]
Episode 17861, loss: -1.7663, total reward: -0.0700
len3 Swap
[41, 79, 27]
Episode 17862, loss: -0.5911, total reward: -1.0000
len3 Swap
[39, 19, 39]
Episode 17863, loss: -0.6235, total reward: -1.0000
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 2
[58, 65, 2]
Episode 17864, loss: -2.4099, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[37, 32, 95]
Episode 17865, loss: -1.0056, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 1 more Comparison 2 2 equal Comparison 0 2 less
[62, 5, 94]
Episode 17866, loss: -0.9897, total reward: -0.0600
len3 Swap
[28, 65, 26]
Episode 17867, loss: -0.5899, total reward: -1.0000
len3 Swap
[29, 20

Episode 17937, loss: -0.7244, total reward: -1.0000
len3 Swap
[81, 57, 49]
Episode 17938, loss: -0.6928, total reward: -1.0000
len3 Comparison 1 2 more Comparison 1 2 more Comparison 1 2 more Comparison 2 2 equal
[50, 97, 94]
Episode 17939, loss: -1.7142, total reward: -0.0500
len3 Swap
[32, 6, 70]
Episode 17940, loss: -0.7094, total reward: -1.0000
len3 Swap
[18, 41, 72]
Episode 17941, loss: -0.6809, total reward: -1.0000
len3 Swap
[7, 18, 25]
Episode 17942, loss: -0.7053, total reward: -1.0000
len3 Swap
[99, 61, 5]
Episode 17943, loss: -0.7173, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal
[95, 12, 55]
Episode 17944, loss: -0.7400, total reward: -0.0700
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal
[76, 24, 70]
Episode 17945, loss: -0.8153, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less
[12, 57, 39]
Episode 17946, loss

len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Comparison 2 0 more
[15, 100, 64]
Episode 18010, loss: -0.7931, total reward: -0.0600
len3 Comparison 2 0 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[23, 94, 91]
Episode 18011, loss: -0.4172, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[3, 58, 33]
Episode 18012, loss: -0.8787, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less Comparison 2 2 equal
[97, 77, 95]
Episode 18013, loss: -0.8834, total reward: -0.0700
len3 Comparison 2 0 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[31, 10, 45]
Episode 18014, loss: -0.4265, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less
[16, 3, 73]
Episode 18015, loss: -0.8733, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 1 less Comparison 2 2 eq

len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Comparison 1 2 less
[51, 2, 74]
Episode 18082, loss: -0.7498, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 0
[6, 98, 19]
Episode 18083, loss: -0.7722, total reward: -0.0700
len3 Swap
[9, 74, 88]
Episode 18084, loss: -0.7303, total reward: -1.0000
len3 Swap
[77, 83, 93]
Episode 18085, loss: -0.6856, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[69, 81, 18]
Episode 18086, loss: 0.0048, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more
[89, 87, 30]
Episode 18087, loss: -0.8047, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[77, 72, 50]
Episode 18088, loss: -0.7830, total reward: -0.0800
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[

len3 Comparison 2 2 equal Comparison 1 2 more Comparison 1 2 more Comparison 2 2 equal
[88, 90, 4]
Episode 18153, loss: -0.9355, total reward: -0.0600
len3 Comparison 1 2 more Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[16, 55, 32]
Episode 18154, loss: -1.3829, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 1 2 equal Comparison 2 2 equal Comparison 2 2 equal
[52, 44, 44]
Episode 18155, loss: -0.7729, total reward: -0.0800
len3 Swap
[87, 29, 47]
Episode 18156, loss: -0.6820, total reward: -1.0000
len3 Swap
[42, 19, 79]
Episode 18157, loss: -0.7417, total reward: -1.0000
len3 Swap
[5, 17, 37]
Episode 18158, loss: -0.7291, total reward: -1.0000
len3 Swap
[95, 3, 30]
Episode 18159, loss: -0.7280, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 2 2 equal Comparison 2 1 less
[88, 67, 4]
Episode 18160, loss: -0.8163, total reward: -0.0600
len3 Swap
[99, 36, 97]
Episode 18161, loss: -0.7351, total reward: -1.0000
len3 Swap
[92, 41, 

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal
[19, 39, 69]
Episode 18228, loss: -0.9039, total reward: -0.0700
len3 Comparison 2 0 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[71, 89, 51]
Episode 18229, loss: 0.5218, total reward: -0.0700
len3 Swap
[56, 51, 72]
Episode 18230, loss: -0.7744, total reward: -1.0000
len3 Comparison 1 2 more Comparison 1 2 more Comparison 2 2 equal Comparison 0 2 more
[55, 26, 1]
Episode 18231, loss: -1.7080, total reward: -0.0500
len3 Swap
[10, 10, 11]
Episode 18232, loss: -0.7457, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[8, 96, 9]
Episode 18233, loss: -0.8298, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 1 2 more Comparison 1 2 more
[34, 75, 61]
Episode 18234, loss: -0.7884, total reward: -0.0500
len3 Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal Comparison 0 2 more
[57, 6

Episode 18302, loss: -0.7883, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less Comparison 0 2 more
[64, 4, 56]
Episode 18303, loss: -0.8966, total reward: -0.0600
len3 Comparison 2 2 equal Comparison 2 1 more Comparison 2 2 equal Comparison 2 2 equal
[39, 35, 52]
Episode 18304, loss: -0.7704, total reward: -0.0700
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[16, 13, 86]
Episode 18305, loss: -2.1594, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 more Comparison 2 2 equal
[17, 92, 83]
Episode 18306, loss: -0.9361, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 0 less
[4, 44, 1]
Episode 18307, loss: -0.7785, total reward: -0.0700
len3 Swap
[12, 85, 65]
Episode 18308, loss: -0.7162, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 more Comparison 1 2 less Comparison 2 2 equal
[92, 44, 64]
Episode 1830

len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[47, 92, 74]
Episode 18372, loss: -0.8058, total reward: -0.0800
len3 Comparison 0 2 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[95, 71, 94]
Episode 18373, loss: -1.3848, total reward: -0.0700
len3 Swap
[54, 30, 4]
Episode 18374, loss: -0.6447, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 less
[26, 18, 82]
Episode 18375, loss: -0.9270, total reward: -0.0700
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Swap Comparison 2
[92, 38, 23]
Episode 18376, loss: -0.8980, total reward: -0.0700
len3 Comparison 2 0 more Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[31, 19, 98]
Episode 18377, loss: -0.4538, total reward: -0.0700
len3 Swap
[53, 27, 16]
Episode 18378, loss: -0.6667, total reward: -1.0000
len3 Swap
[77, 17, 49]
Episode 18379, loss: -0.6486, total reward: -1.0000
len3 Swap
[7

len3 Comparison 0 2 more Comparison 2 2 equal Comparison 1 2 more Comparison 2 1 less
[47, 96, 26]
Episode 18450, loss: -2.5042, total reward: -0.0500
len3 Swap
[100, 78, 21]
Episode 18451, loss: -0.7468, total reward: -1.0000
len3 Swap
[89, 15, 46]
Episode 18452, loss: -0.8014, total reward: -1.0000
len3 Swap
[75, 82, 34]
Episode 18453, loss: -0.8159, total reward: -1.0000
len3 Swap
[10, 39, 91]
Episode 18454, loss: -0.6892, total reward: -1.0000
len3 Swap
[53, 59, 54]
Episode 18455, loss: -0.8427, total reward: -1.0000
len3 Swap
[1, 14, 94]
Episode 18456, loss: -0.7739, total reward: -1.0000
len3 Comparison 2 2 equal Swap Comparison 2 2 equal Comparison 2 2 equal Comparison 2
[95, 13, 32]
Episode 18457, loss: 0.1235, total reward: -0.0700
len3 Swap
[7, 67, 60]
Episode 18458, loss: -0.8161, total reward: -1.0000
len3 Comparison 2 2 equal Comparison 0 2 less Comparison 2 2 equal Comparison 2 2 equal
[29, 90, 63]
Episode 18459, loss: -0.7613, total reward: -0.0700
len3 Comparison 1 1 eq

Episode 18526, loss: -0.7494, total reward: -0.0700
len3 Comparison 0 2 more Swap Comparison 0 2 less Comparison 2 2 equal Comparison 2
[8, 64, 28]
Episode 18527, loss: 1.0223, total reward: 0.0600
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 0 2 more
[94, 42, 82]
Episode 18528, loss: -0.7844, total reward: -0.0700
len3 Swap
[96, 28, 18]
Episode 18529, loss: -0.7126, total reward: -1.0000
len3 Comparison 1 2 less Comparison 2 0 more Swap Comparison 2 2 equal Comparison 2
[60, 52, 7]
Episode 18530, loss: -1.4245, total reward: -0.1400
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 1 2 less Swap Comparison 2
[52, 88, 25]
Episode 18531, loss: -0.8037, total reward: -0.1500
len3 Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[67, 91, 17]
Episode 18532, loss: -0.7777, total reward: -0.0800
len3 Comparison 1 2 less Comparison 2 2 equal Comparison 2 2 equal Comparison 2 2 equal
[66, 61, 72]
Episode 18533, loss: 