In [None]:
# Copy dataset from Drive to local content directory
!cp -r /content/drive/MyDrive/dakshina_dataset_v1.0 /content/

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import random
import pandas as pd
import numpy as np
import wandb
import os
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader, Dataset

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)

Using device: cuda


## Build Vocabulary
Take the English and hindi words to make a vocabulary. T
The English words will be used to transliterate the Hindi words.

In [None]:
# Choose your language
LANG = 'hi'  # Hindi
DATA_DIR = f'/content/dakshina_dataset_v1.0/{LANG}/lexicons/'

def load_data(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
    input_texts, target_texts = [], []
    for line in lines:
        latin, devanagari = line.strip().split('\t')[:2]
        input_texts.append(list(latin.lower()))
        target_texts.append(['<sos>'] + list(devanagari) + ['<eos>'])
    return input_texts, target_texts

train_x, train_y = load_data(os.path.join(DATA_DIR, 'hi.translit.sampled.train.tsv'))
val_x, val_y = load_data(os.path.join(DATA_DIR, 'hi.translit.sampled.dev.tsv'))
test_x, test_y = load_data(os.path.join(DATA_DIR, 'hi.translit.sampled.test.tsv'))

# Create vocab
def build_vocab(seqs):
    vocab = {'<pad>': 0, '<sos>': 1, '<eos>': 2}
    for seq in seqs:
        for ch in seq:
            if ch not in vocab:
                vocab[ch] = len(vocab)
    return vocab

src_vocab = build_vocab(train_x)
tgt_vocab = build_vocab(train_y)

inv_tgt_vocab = {v: k for k, v in tgt_vocab.items()}


In [None]:
class TransliterationDataset(Dataset):
    def __init__(self, src_data, tgt_data, src_vocab, tgt_vocab):
        self.src_data = src_data
        self.tgt_data = tgt_data
        self.src_vocab = src_vocab
        self.tgt_vocab = tgt_vocab

    def __len__(self):
        return len(self.src_data)

    def __getitem__(self, idx):
        src_seq = [self.src_vocab[ch] for ch in self.src_data[idx]]
        tgt_seq = [self.tgt_vocab[ch] for ch in self.tgt_data[idx]]
        return torch.tensor(src_seq), torch.tensor(tgt_seq)

def collate_fn(batch):
    src_batch, tgt_batch = zip(*batch)
    src_lens = torch.tensor([len(s) for s in src_batch])
    tgt_lens = torch.tensor([len(t) for t in tgt_batch])
    src_batch = pad_sequence(src_batch, padding_value=0, batch_first=True)
    tgt_batch = pad_sequence(tgt_batch, padding_value=0, batch_first=True)
    return src_batch, tgt_batch, src_lens, tgt_lens

BATCH_SIZE = 64
#Prepare train loader
train_dataset = TransliterationDataset(train_x, train_y, src_vocab, tgt_vocab)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
#Prepare val loader
val_dataset = TransliterationDataset(val_x, val_y, src_vocab, tgt_vocab)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn)
# Prepare test loader
test_dataset = TransliterationDataset(test_x, test_y, src_vocab, tgt_vocab)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn)

## Model Definition
A Seq2Seq model is constructed with encoder decoder logic.

In [None]:
class Seq2Seq(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.encoder_embed = nn.Embedding(config['src_vocab_size'], config['embedding_size'])
        self.decoder_embed = nn.Embedding(config['tgt_vocab_size'], config['embedding_size'])

        cell = {'RNN': nn.RNN, 'GRU': nn.GRU, 'LSTM': nn.LSTM}[config['cell_type']]

        self.encoder = cell(config['embedding_size'], config['hidden_size'], config['num_encoder_layers'],
                            batch_first=True, dropout=config['dropout'] if config['num_encoder_layers'] > 1 else 0)

        self.decoder = cell(config['embedding_size'], config['hidden_size'], config['num_decoder_layers'],
                            batch_first=True, dropout=config['dropout'] if config['num_decoder_layers'] > 1 else 0)

        self.fc = nn.Linear(config['hidden_size'], config['tgt_vocab_size'])

        self.cell_type = config['cell_type']

    def forward(self, src, tgt, src_len, tgt_len):
        src_embed = self.encoder_embed(src)
        tgt_embed = self.decoder_embed(tgt[:, :-1])

        _, h = self.encoder(src_embed)

        if self.cell_type == 'LSTM':
            h0, c0 = h
            output, _ = self.decoder(tgt_embed, (h0, c0))
        else:
            output, _ = self.decoder(tgt_embed, h)

        logits = self.fc(output)
        return logits


## Sweeping

Function used to perform hyperparameter sweep

In [None]:
def train_model(train_loader, val_loader, config=None):
    with wandb.init(config=config) as run:
        config = wandb.config

        # Skip invalid layer configs
        if config.num_encoder_layers != config.num_decoder_layers:
            print(f"Skipping run due to layer mismatch: enc={config.num_encoder_layers}, dec={config.num_decoder_layers}")
            return

        # Set run name from config
        run.name = (
            f"ed_{config.embedding_size}"
            f"_hs_{config.hidden_size}"
            f"_enc_{config.num_encoder_layers}"
            f"_dec_{config.num_decoder_layers}"
            f"_cell_{config.cell_type}"
            f"_drop_{config.dropout}"
        )
        run.save()

        # Create model
        model = Seq2Seq({
            'src_vocab_size': len(src_vocab),
            'tgt_vocab_size': len(tgt_vocab),
            'embedding_size': config.embedding_size,
            'hidden_size': config.hidden_size,
            'num_encoder_layers': config.num_encoder_layers,
            'num_decoder_layers': config.num_decoder_layers,
            'cell_type': config.cell_type,
            'dropout': config.dropout
        }).to(device)

        optimizer = optim.Adam(model.parameters(), lr=0.001)
        criterion = nn.CrossEntropyLoss(ignore_index=0)

        best_val_acc = 0
        for epoch in range(10):
            # === Training ===
            model.train()
            total_train_loss = 0
            correct_train = 0
            total_tokens = 0

            for src, tgt, src_len, tgt_len in train_loader:
                src, tgt = src.to(device), tgt.to(device)
                optimizer.zero_grad()

                output = model(src, tgt, src_len, tgt_len)
                loss = criterion(output.view(-1, output.shape[-1]), tgt[:, 1:].contiguous().view(-1))
                loss.backward()
                optimizer.step()

                total_train_loss += loss.item()
                pred = output.argmax(-1)
                mask = tgt[:, 1:] != 0
                correct_train += ((pred == tgt[:, 1:]) * mask).sum().item()
                total_tokens += mask.sum().item()

            train_loss = total_train_loss / len(train_loader)
            train_acc = correct_train / total_tokens

            # === Validation ===
            model.eval()
            total_val_loss = 0
            correct_val = 0
            val_tokens = 0

            with torch.no_grad():
                for src, tgt, src_len, tgt_len in val_loader:
                    src, tgt = src.to(device), tgt.to(device)
                    output = model(src, tgt, src_len, tgt_len)
                    loss = criterion(output.view(-1, output.shape[-1]), tgt[:, 1:].contiguous().view(-1))
                    total_val_loss += loss.item()

                    pred = output.argmax(-1)
                    mask = tgt[:, 1:] != 0
                    correct_val += ((pred == tgt[:, 1:]) * mask).sum().item()
                    val_tokens += mask.sum().item()

            val_loss = total_val_loss / len(val_loader)
            val_acc = correct_val / val_tokens

            # Log to wandb
            wandb.log({
                'epoch': epoch,
                'train_loss': train_loss,
                'val_loss': val_loss,
                'train_accuracy': train_acc,
                'val_accuracy': val_acc
            })

        # Print final metrics
        print(f"\nFinal Run Metrics for {run.name}:")
        print(f"Train Loss: {train_loss:.4f} | Train Accuracy: {train_acc:.4f}")
        print(f"Val   Loss: {val_loss:.4f} | Val   Accuracy: {val_acc:.4f}\n")

In [None]:
sweep_config = {
    'method': 'bayes',
    'metric': {'name': 'val_loss', 'goal': 'minimize'},
    'parameters': {
        'embedding_size': {'values': [16, 32, 64, 256]},
        'hidden_size': {'values': [16, 32, 64, 256]},
        'num_encoder_layers': {'values': [1, 2, 3]},
        'num_decoder_layers': {'values': [1, 2, 3]},
        'cell_type': {'values': ['RNN', 'GRU', 'LSTM']},
        'dropout': {'values': [0.2, 0.3]},
        'beam_size': {'values': [1, 2, 3, 5, 10]}
    }
}

In [None]:
sweep_id = wandb.sweep(sweep_config, project="Assignment3_Attempt1")

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Create sweep with ID: ex4t3obv
Sweep URL: https://wandb.ai/vinyk-sd-indian-institute-of-technology-madras/Assignment3_Attempt1/sweeps/ex4t3obv


In [None]:
wandb.agent(sweep_id, function=lambda: train_model(train_loader, val_loader), count=50)# Run 15 trials (you can increase if needed)

[34m[1mwandb[0m: Agent Starting Run: qvetco5x with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 2
[34m[1mwandb[0m: Currently logged in as: [33mvinyk-sd[0m ([33mvinyk-sd-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Skipping run due to layer mismatch: enc=2, dec=1


[34m[1mwandb[0m: Agent Starting Run: qi5j61zd with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=1


[34m[1mwandb[0m: Agent Starting Run: n97uybr5 with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Agent Starting Run: info5pn6 with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Agent Starting Run: u3iaivc8 with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2





Final Run Metrics for ed_64_hs_64_enc_2_dec_2_cell_RNN_drop_0.2:
Train Loss: 1.9056 | Train Accuracy: 0.4147
Val   Loss: 1.9085 | Val   Accuracy: 0.4204



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▆▇▇▇██
train_loss,█▅▄▃▃▂▂▂▁▁
val_accuracy,▁▃▄▅▆▆▇▇██
val_loss,█▆▅▃▃▂▂▂▁▁

0,1
epoch,9.0
train_accuracy,0.41474
train_loss,1.90558
val_accuracy,0.42039
val_loss,1.90852


[34m[1mwandb[0m: Agent Starting Run: 3jwbxtbj with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=2


[34m[1mwandb[0m: Agent Starting Run: 97o9lcoe with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=3


[34m[1mwandb[0m: Agent Starting Run: k4leprmo with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=2


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9lyjy5g8 with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=3


[34m[1mwandb[0m: Agent Starting Run: v8rbhiu6 with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=1


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: o2qatku9 with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=1


[34m[1mwandb[0m: Agent Starting Run: 7j1zlfw9 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2



Final Run Metrics for ed_64_hs_16_enc_2_dec_2_cell_RNN_drop_0.3:
Train Loss: 2.1918 | Train Accuracy: 0.3429
Val   Loss: 2.2223 | Val   Accuracy: 0.3433



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▇▇▇███
train_loss,█▅▄▃▂▂▂▂▁▁
val_accuracy,▁▂▂▅▅▇███▇
val_loss,█▇▆▄▃▃▂▁▁▂

0,1
epoch,9.0
train_accuracy,0.34291
train_loss,2.19183
val_accuracy,0.34331
val_loss,2.22232


[34m[1mwandb[0m: Agent Starting Run: b39ai05i with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=3


[34m[1mwandb[0m: Agent Starting Run: aqieo6m3 with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 3



Final Run Metrics for ed_64_hs_16_enc_3_dec_3_cell_LSTM_drop_0.2:
Train Loss: 1.5759 | Train Accuracy: 0.5175
Val   Loss: 1.4442 | Val   Accuracy: 0.5652



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▃▄▅▆▆▇▇█
train_loss,█▆▅▄▄▃▂▂▁▁
val_accuracy,▁▂▂▃▄▅▆▇▇█
val_loss,█▇▆▅▄▃▃▂▁▁

0,1
epoch,9.0
train_accuracy,0.51749
train_loss,1.57591
val_accuracy,0.56517
val_loss,1.44421


[34m[1mwandb[0m: Agent Starting Run: m1salprs with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=2


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xzyhixk8 with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=1


[34m[1mwandb[0m: Agent Starting Run: opuc0vnx with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Agent Starting Run: ax02hr4p with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=2


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 95mgxw5z with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 3



Final Run Metrics for ed_256_hs_16_enc_3_dec_3_cell_GRU_drop_0.3:
Train Loss: 1.6837 | Train Accuracy: 0.4906
Val   Loss: 1.5251 | Val   Accuracy: 0.5405



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▄▆▆▇▇██
train_loss,█▆▅▄▃▃▂▂▁▁
val_accuracy,▁▃▃▅▆▆▇▇██
val_loss,█▆▅▄▃▃▂▂▁▁

0,1
epoch,9.0
train_accuracy,0.49063
train_loss,1.68371
val_accuracy,0.54052
val_loss,1.52512


[34m[1mwandb[0m: Agent Starting Run: n6ml6buw with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=2


[34m[1mwandb[0m: Agent Starting Run: lqvsfbs9 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=3


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: r28611ry with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=1


[34m[1mwandb[0m: Agent Starting Run: kqn5xj2z with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 1



Final Run Metrics for ed_64_hs_256_enc_1_dec_1_cell_GRU_drop_0.2:
Train Loss: 0.2741 | Train Accuracy: 0.9015
Val   Loss: 0.4359 | Val   Accuracy: 0.8680



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▆▇▇██████
train_loss,█▃▂▂▂▁▁▁▁▁
val_accuracy,▁▅▇▇██████
val_loss,█▄▂▂▁▁▁▁▁▁

0,1
epoch,9.0
train_accuracy,0.90154
train_loss,0.27414
val_accuracy,0.868
val_loss,0.43591


[34m[1mwandb[0m: Agent Starting Run: y13nhw9g with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=1


[34m[1mwandb[0m: Agent Starting Run: alv7zdbn with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=1


[34m[1mwandb[0m: Agent Starting Run: oyra2om4 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 1



Final Run Metrics for ed_256_hs_16_enc_1_dec_1_cell_GRU_drop_0.3:
Train Loss: 1.7897 | Train Accuracy: 0.4710
Val   Loss: 1.7684 | Val   Accuracy: 0.4804



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▄▆▆▇▇██
train_loss,█▆▅▄▃▃▂▂▁▁
val_accuracy,▁▂▃▄▅▆▇▇██
val_loss,█▇▆▅▄▃▂▂▂▁

0,1
epoch,9.0
train_accuracy,0.47097
train_loss,1.78966
val_accuracy,0.4804
val_loss,1.76844


[34m[1mwandb[0m: Agent Starting Run: g8izro1i with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 3



Final Run Metrics for ed_16_hs_16_enc_3_dec_3_cell_GRU_drop_0.3:
Train Loss: 1.6419 | Train Accuracy: 0.5047
Val   Loss: 1.4878 | Val   Accuracy: 0.5578



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▄▅▆▇▇██
train_loss,█▆▅▄▃▃▂▂▁▁
val_accuracy,▁▂▃▄▅▆▇▇██
val_loss,█▆▅▅▄▃▂▂▁▁

0,1
epoch,9.0
train_accuracy,0.50472
train_loss,1.64186
val_accuracy,0.55777
val_loss,1.48776


[34m[1mwandb[0m: Agent Starting Run: 2ng4vcpm with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=3


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 64gpp0u7 with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=1


[34m[1mwandb[0m: Agent Starting Run: nc8wx5p0 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=1


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ouadncd5 with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=3


[34m[1mwandb[0m: Agent Starting Run: ahjvxpj8 with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2



Final Run Metrics for ed_64_hs_32_enc_2_dec_2_cell_RNN_drop_0.2:
Train Loss: 2.1235 | Train Accuracy: 0.3593
Val   Loss: 2.1073 | Val   Accuracy: 0.3695



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▆▇▇▇████
train_loss,█▄▃▃▂▂▂▁▁▁
val_accuracy,▁▄▅▆▇▇▄███
val_loss,█▆▄▃▂▂▄▂▂▁

0,1
epoch,9.0
train_accuracy,0.35929
train_loss,2.12353
val_accuracy,0.36949
val_loss,2.10733


[34m[1mwandb[0m: Agent Starting Run: 5wpqtbjn with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2



Final Run Metrics for ed_256_hs_32_enc_2_dec_2_cell_RNN_drop_0.3:
Train Loss: 2.1453 | Train Accuracy: 0.3559
Val   Loss: 2.1390 | Val   Accuracy: 0.3681



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▇▇▇███
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▃▅▄▇▇▇▇██
val_loss,█▆▄▅▂▂▂▁▁▁

0,1
epoch,9.0
train_accuracy,0.35595
train_loss,2.14533
val_accuracy,0.36813
val_loss,2.139


[34m[1mwandb[0m: Agent Starting Run: z910wwoi with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=2


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: nea70dqb with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Agent Starting Run: k0o5myzm with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=2


[34m[1mwandb[0m: Agent Starting Run: ntqaksqw with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Agent Starting Run: 099ajv1d with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 1



Final Run Metrics for ed_16_hs_256_enc_1_dec_1_cell_GRU_drop_0.3:
Train Loss: 0.3027 | Train Accuracy: 0.8960
Val   Loss: 0.4514 | Val   Accuracy: 0.8669



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▇▇██████
train_loss,█▄▂▂▂▁▁▁▁▁
val_accuracy,▁▆▇▇██████
val_loss,█▃▂▂▁▁▁▁▁▁

0,1
epoch,9.0
train_accuracy,0.896
train_loss,0.30267
val_accuracy,0.86687
val_loss,0.45142


[34m[1mwandb[0m: Agent Starting Run: 2i6tesj8 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=1


[34m[1mwandb[0m: Agent Starting Run: no0dymql with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2



Final Run Metrics for ed_64_hs_256_enc_2_dec_2_cell_GRU_drop_0.2:
Train Loss: 0.2469 | Train Accuracy: 0.9076
Val   Loss: 0.3750 | Val   Accuracy: 0.8835



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▆▇▇██████
train_loss,█▃▂▂▂▁▁▁▁▁
val_accuracy,▁▆▇▇██████
val_loss,█▄▂▂▁▁▁▁▁▁

0,1
epoch,9.0
train_accuracy,0.90761
train_loss,0.24692
val_accuracy,0.88348
val_loss,0.37501


[34m[1mwandb[0m: Agent Starting Run: 7viujev7 with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=1


[34m[1mwandb[0m: Agent Starting Run: b1vieyip with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2



Final Run Metrics for ed_64_hs_256_enc_2_dec_2_cell_RNN_drop_0.2:
Train Loss: 1.5796 | Train Accuracy: 0.5115
Val   Loss: 1.7812 | Val   Accuracy: 0.4647



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▆▇▇▇██
train_loss,█▅▄▃▃▂▂▁▁▁
val_accuracy,▁▃▃▅▅▆▇▇██
val_loss,█▆▆▅▄▂▂▂▁▁

0,1
epoch,9.0
train_accuracy,0.51153
train_loss,1.57964
val_accuracy,0.46469
val_loss,1.7812


[34m[1mwandb[0m: Agent Starting Run: owx2u7wg with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=2


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: anfonvp1 with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=1


[34m[1mwandb[0m: Agent Starting Run: 6xgadasj with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2



Final Run Metrics for ed_256_hs_16_enc_2_dec_2_cell_RNN_drop_0.2:
Train Loss: 2.2147 | Train Accuracy: 0.3416
Val   Loss: 2.2018 | Val   Accuracy: 0.3477



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▆▇▇▇████
train_loss,█▄▃▂▂▂▁▁▁▁
val_accuracy,▁▄▄▄▆▇▇▇█▇
val_loss,█▅▄▄▃▂▂▁▁▁

0,1
epoch,9.0
train_accuracy,0.3416
train_loss,2.21468
val_accuracy,0.34773
val_loss,2.20183


[34m[1mwandb[0m: Agent Starting Run: bz93a88a with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2



Final Run Metrics for ed_256_hs_64_enc_2_dec_2_cell_LSTM_drop_0.2:
Train Loss: 0.4967 | Train Accuracy: 0.8422
Val   Loss: 0.4827 | Val   Accuracy: 0.8506



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▇▇▇█████
train_loss,█▄▂▂▂▁▁▁▁▁
val_accuracy,▁▅▆▇▇█████
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
epoch,9.0
train_accuracy,0.84224
train_loss,0.49666
val_accuracy,0.85064
val_loss,0.48268


[34m[1mwandb[0m: Agent Starting Run: 1d0kgzoe with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=1


[34m[1mwandb[0m: Agent Starting Run: wzkml4im with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=3


[34m[1mwandb[0m: Agent Starting Run: 41p8iwks with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=1


[34m[1mwandb[0m: Agent Starting Run: sezmfcya with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=3


In [None]:
wandb.agent(sweep_id, function=lambda: train_model(train_loader, val_loader), count=150)# Run 15 trials (you can increase if needed)

[34m[1mwandb[0m: Agent Starting Run: nxay9hod with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=3


[34m[1mwandb[0m: Agent Starting Run: 8y2sqfw4 with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 3



Final Run Metrics for ed_256_hs_256_enc_3_dec_3_cell_GRU_drop_0.3:
Train Loss: 0.2889 | Train Accuracy: 0.8971
Val   Loss: 0.3582 | Val   Accuracy: 0.8865



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▆▇▇██████
train_loss,█▃▂▂▁▁▁▁▁▁
val_accuracy,▁▅▇▇▇▇████
val_loss,█▄▃▂▁▂▁▁▁▁

0,1
epoch,9.0
train_accuracy,0.89711
train_loss,0.28888
val_accuracy,0.88655
val_loss,0.35819


[34m[1mwandb[0m: Agent Starting Run: 7jqxjxdj with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=2


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: d2grtnig with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 1



Final Run Metrics for ed_256_hs_32_enc_1_dec_1_cell_GRU_drop_0.2:
Train Loss: 1.1551 | Train Accuracy: 0.6459
Val   Loss: 1.1310 | Val   Accuracy: 0.6537



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▆▇▇▇██
train_loss,█▅▄▃▃▂▂▂▁▁
val_accuracy,▁▄▅▆▆▆▇▇██
val_loss,█▆▄▃▃▂▂▂▁▁

0,1
epoch,9.0
train_accuracy,0.64591
train_loss,1.15507
val_accuracy,0.65371
val_loss,1.13102


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5eplj35o with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=2


[34m[1mwandb[0m: Agent Starting Run: 83mkr6xp with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 3



Final Run Metrics for ed_64_hs_256_enc_3_dec_3_cell_GRU_drop_0.2:
Train Loss: 0.2542 | Train Accuracy: 0.9057
Val   Loss: 0.3584 | Val   Accuracy: 0.8903



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▇▇▇██████
train_loss,█▃▂▂▁▁▁▁▁▁
val_accuracy,▁▅▇▇▇█████
val_loss,█▃▂▁▁▁▁▁▁▁

0,1
epoch,9.0
train_accuracy,0.90566
train_loss,0.2542
val_accuracy,0.89028
val_loss,0.35837


[34m[1mwandb[0m: Agent Starting Run: 7yjt0bmr with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=1


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ylfdyir8 with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=2


[34m[1mwandb[0m: Agent Starting Run: v34kzr8g with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 3



Final Run Metrics for ed_32_hs_32_enc_3_dec_3_cell_GRU_drop_0.2:
Train Loss: 0.8537 | Train Accuracy: 0.7433
Val   Loss: 0.7224 | Val   Accuracy: 0.7941



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▅▆▆▇▇███
train_loss,█▆▄▃▂▂▂▁▁▁
val_accuracy,▁▄▅▆▇▇▇███
val_loss,█▆▄▃▂▂▂▁▁▁

0,1
epoch,9.0
train_accuracy,0.74325
train_loss,0.85375
val_accuracy,0.79413
val_loss,0.72241


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: g53hn7xc with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=2


[34m[1mwandb[0m: Agent Starting Run: 4p4nvwjf with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=3


[34m[1mwandb[0m: Agent Starting Run: vglqylr0 with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 3



Final Run Metrics for ed_256_hs_256_enc_3_dec_3_cell_GRU_drop_0.3:
Train Loss: 0.2865 | Train Accuracy: 0.8974
Val   Loss: 0.3561 | Val   Accuracy: 0.8898



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▆▇▇██████
train_loss,█▃▂▂▁▁▁▁▁▁
val_accuracy,▁▅▆▇▇▇▇▇▇█
val_loss,█▄▃▂▂▁▁▁▁▁

0,1
epoch,9.0
train_accuracy,0.89741
train_loss,0.28647
val_accuracy,0.88982
val_loss,0.35606


[34m[1mwandb[0m: Agent Starting Run: chie50ck with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2



Final Run Metrics for ed_256_hs_64_enc_2_dec_2_cell_LSTM_drop_0.2:
Train Loss: 0.5036 | Train Accuracy: 0.8409
Val   Loss: 0.4864 | Val   Accuracy: 0.8530



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▇▇▇█████
train_loss,█▄▂▂▂▁▁▁▁▁
val_accuracy,▁▅▆▇▇▇████
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
epoch,9.0
train_accuracy,0.84089
train_loss,0.50362
val_accuracy,0.85304
val_loss,0.48641


[34m[1mwandb[0m: Agent Starting Run: 00cxuh4q with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 1



Final Run Metrics for ed_16_hs_16_enc_1_dec_1_cell_LSTM_drop_0.2:
Train Loss: 1.4712 | Train Accuracy: 0.5560
Val   Loss: 1.4548 | Val   Accuracy: 0.5657



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▃▄▅▆▇▇██
train_loss,█▆▅▄▃▃▂▂▁▁
val_accuracy,▁▂▃▄▅▆▇▇██
val_loss,█▇▆▅▄▃▂▂▁▁

0,1
epoch,9.0
train_accuracy,0.55603
train_loss,1.47118
val_accuracy,0.5657
val_loss,1.45482


[34m[1mwandb[0m: Agent Starting Run: 97ih4ft6 with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=3


[34m[1mwandb[0m: Agent Starting Run: apiantzb with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=3


[34m[1mwandb[0m: Agent Starting Run: jw0eck0a with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2



Final Run Metrics for ed_32_hs_32_enc_2_dec_2_cell_GRU_drop_0.3:
Train Loss: 0.8665 | Train Accuracy: 0.7358
Val   Loss: 0.7585 | Val   Accuracy: 0.7792



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▅▆▆▇▇███
train_loss,█▆▄▃▂▂▂▁▁▁
val_accuracy,▁▃▅▆▆▇▇███
val_loss,█▆▄▃▂▂▂▁▁▁

0,1
epoch,9.0
train_accuracy,0.7358
train_loss,0.86649
val_accuracy,0.77923
val_loss,0.75853


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: yag914ix with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2



Final Run Metrics for ed_32_hs_32_enc_2_dec_2_cell_RNN_drop_0.2:
Train Loss: 2.0561 | Train Accuracy: 0.3785
Val   Loss: 2.0409 | Val   Accuracy: 0.3881



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▆▆▇▆▇█
train_loss,█▅▄▃▃▃▂▃▂▁
val_accuracy,▁▃▄▄▅▅▆▂██
val_loss,█▆▅▅▄▃▂▆▁▁

0,1
epoch,9.0
train_accuracy,0.37852
train_loss,2.05609
val_accuracy,0.38813
val_loss,2.04094


[34m[1mwandb[0m: Agent Starting Run: ew7494oa with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 1



Final Run Metrics for ed_16_hs_32_enc_1_dec_1_cell_LSTM_drop_0.2:
Train Loss: 1.0797 | Train Accuracy: 0.6809
Val   Loss: 1.0829 | Val   Accuracy: 0.6860



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▂▃▄▅▆▇▇██
train_loss,█▇▆▅▄▃▂▂▁▁
val_accuracy,▁▁▃▄▅▆▇▇██
val_loss,█▇▆▅▄▃▂▂▁▁

0,1
epoch,9.0
train_accuracy,0.68089
train_loss,1.07968
val_accuracy,0.686
val_loss,1.08289


[34m[1mwandb[0m: Agent Starting Run: w40t8lfg with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2



Final Run Metrics for ed_32_hs_64_enc_2_dec_2_cell_LSTM_drop_0.2:
Train Loss: 0.4943 | Train Accuracy: 0.8445
Val   Loss: 0.4872 | Val   Accuracy: 0.8519



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▆▇▇▇████
train_loss,█▅▃▂▂▂▁▁▁▁
val_accuracy,▁▅▆▇▇█████
val_loss,█▄▃▂▂▁▁▁▁▁

0,1
epoch,9.0
train_accuracy,0.8445
train_loss,0.4943
val_accuracy,0.85191
val_loss,0.48716


[34m[1mwandb[0m: Agent Starting Run: mjb7032v with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=1


[34m[1mwandb[0m: Agent Starting Run: va9rab8z with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=2


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xxcky4q8 with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2



Final Run Metrics for ed_32_hs_32_enc_2_dec_2_cell_GRU_drop_0.2:
Train Loss: 0.8631 | Train Accuracy: 0.7383
Val   Loss: 0.7715 | Val   Accuracy: 0.7747



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▅▆▆▇▇███
train_loss,█▆▄▃▃▂▂▁▁▁
val_accuracy,▁▃▅▆▆▇▇▇██
val_loss,█▆▄▃▃▂▂▁▁▁

0,1
epoch,9.0
train_accuracy,0.73827
train_loss,0.86312
val_accuracy,0.77475
val_loss,0.77147


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: nhjf93kh with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=1


[34m[1mwandb[0m: Agent Starting Run: dpbjoabo with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 3



Final Run Metrics for ed_64_hs_16_enc_3_dec_3_cell_RNN_drop_0.3:
Train Loss: 2.2189 | Train Accuracy: 0.3414
Val   Loss: 2.2108 | Val   Accuracy: 0.3470



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▆▆▇▇▇███
train_loss,█▄▄▃▂▂▂▁▁▁
val_accuracy,▁▃▃▅▆▆█▇██
val_loss,█▆▆▄▃▃▂▂▁▁

0,1
epoch,9.0
train_accuracy,0.34143
train_loss,2.21889
val_accuracy,0.34695
val_loss,2.21076


[34m[1mwandb[0m: Agent Starting Run: 453lorq3 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=2


[34m[1mwandb[0m: Agent Starting Run: 6ctvmvg5 with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=2


[34m[1mwandb[0m: Agent Starting Run: svdoak2y with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ith6v22u with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=1


[34m[1mwandb[0m: Agent Starting Run: hqa7oica with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=2


[34m[1mwandb[0m: Agent Starting Run: pqudictf with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 3



Final Run Metrics for ed_16_hs_64_enc_3_dec_3_cell_RNN_drop_0.2:
Train Loss: 2.0166 | Train Accuracy: 0.3843
Val   Loss: 2.0376 | Val   Accuracy: 0.3866



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▇▇████
train_loss,█▄▄▃▂▂▁▁▁▁
val_accuracy,▁▄▃▇▆▇▇█▇▇
val_loss,█▆▆▂▃▂▁▁▁▂

0,1
epoch,9.0
train_accuracy,0.38432
train_loss,2.01664
val_accuracy,0.38659
val_loss,2.03762


[34m[1mwandb[0m: Agent Starting Run: 2mzh4xqs with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=1


[34m[1mwandb[0m: Agent Starting Run: 0bkb0mx2 with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=2


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: e6cwvd8x with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=2


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5ef0xep3 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=1


[34m[1mwandb[0m: Agent Starting Run: s4elcume with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=1


[34m[1mwandb[0m: Agent Starting Run: gpqsezuy with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=3


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8wncfiuf with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Agent Starting Run: zyx2d827 with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=2


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: mw8wkzph with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 3



Final Run Metrics for ed_32_hs_16_enc_3_dec_3_cell_GRU_drop_0.2:
Train Loss: 1.6438 | Train Accuracy: 0.5100
Val   Loss: 1.5312 | Val   Accuracy: 0.5506



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▄▅▆▆▇██
train_loss,█▅▅▄▄▃▂▂▁▁
val_accuracy,▁▃▃▄▄▆▆▇██
val_loss,█▆▆▅▄▃▃▂▁▁

0,1
epoch,9.0
train_accuracy,0.51005
train_loss,1.64377
val_accuracy,0.55059
val_loss,1.53123


[34m[1mwandb[0m: Agent Starting Run: 5ci5so7x with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2



Final Run Metrics for ed_32_hs_256_enc_2_dec_2_cell_GRU_drop_0.2:
Train Loss: 0.2544 | Train Accuracy: 0.9062
Val   Loss: 0.3815 | Val   Accuracy: 0.8829



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▆▇▇██████
train_loss,█▃▂▂▂▁▁▁▁▁
val_accuracy,▁▅▅▇▇█████
val_loss,█▄▃▂▁▁▁▁▁▁

0,1
epoch,9.0
train_accuracy,0.90623
train_loss,0.2544
val_accuracy,0.88293
val_loss,0.38152


[34m[1mwandb[0m: Agent Starting Run: gj8m1qs1 with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=3


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: larbq9in with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Agent Starting Run: oahp8i5e with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Agent Starting Run: owo4sxcn with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=2


[34m[1mwandb[0m: Agent Starting Run: 4e443b4r with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 3



Final Run Metrics for ed_64_hs_16_enc_3_dec_3_cell_RNN_drop_0.3:
Train Loss: 2.2179 | Train Accuracy: 0.3394
Val   Loss: 2.1796 | Val   Accuracy: 0.3492



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▇▇▇▇██
train_loss,█▄▃▃▂▂▂▂▁▁
val_accuracy,▁▃▄▆▆▇▇███
val_loss,█▆▅▄▃▃▂▂▂▁

0,1
epoch,9.0
train_accuracy,0.33939
train_loss,2.21794
val_accuracy,0.34915
val_loss,2.17957


[34m[1mwandb[0m: Agent Starting Run: ln1y8lzi with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=3


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: t7ber4h4 with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=2


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: w9w0yoho with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Agent Starting Run: daeqxkm1 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=2


[34m[1mwandb[0m: Agent Starting Run: mhvhya4s with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 1



Final Run Metrics for ed_64_hs_16_enc_1_dec_1_cell_RNN_drop_0.3:
Train Loss: 2.1641 | Train Accuracy: 0.3414
Val   Loss: 2.1809 | Val   Accuracy: 0.3421



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▇▇████
train_loss,█▄▃▃▂▂▁▁▁▁
val_accuracy,▂▁▄▅▆▇▇▇██
val_loss,██▅▄▃▂▂▁▁▁

0,1
epoch,9.0
train_accuracy,0.34136
train_loss,2.16411
val_accuracy,0.34212
val_loss,2.18092


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: s9gbnlb1 with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=1


[34m[1mwandb[0m: Agent Starting Run: psp8spuh with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ajkjhkiy with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=3


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: l3wsv44f with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 21is7vyq with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 1



Final Run Metrics for ed_32_hs_256_enc_1_dec_1_cell_RNN_drop_0.3:
Train Loss: 1.6063 | Train Accuracy: 0.5046
Val   Loss: 1.7672 | Val   Accuracy: 0.4683



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▆▆▇▇██
train_loss,█▆▅▄▃▂▂▂▁▁
val_accuracy,▁▃▃▅▆▇▇███
val_loss,█▆▆▄▃▂▂▁▁▁

0,1
epoch,9.0
train_accuracy,0.50464
train_loss,1.60625
val_accuracy,0.46833
val_loss,1.76724


[34m[1mwandb[0m: Agent Starting Run: cr1fvbeu with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 3



Final Run Metrics for ed_64_hs_64_enc_3_dec_3_cell_GRU_drop_0.3:
Train Loss: 0.5350 | Train Accuracy: 0.8340
Val   Loss: 0.4713 | Val   Accuracy: 0.8591



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▆▇▇█████
train_loss,█▄▃▂▂▁▁▁▁▁
val_accuracy,▁▅▆▇▇▇████
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
epoch,9.0
train_accuracy,0.834
train_loss,0.53503
val_accuracy,0.85906
val_loss,0.47131


[34m[1mwandb[0m: Agent Starting Run: ff3ydotb with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Agent Starting Run: 6urpfdan with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=2


[34m[1mwandb[0m: Agent Starting Run: 1p144omc with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 3



Final Run Metrics for ed_16_hs_256_enc_3_dec_3_cell_LSTM_drop_0.3:
Train Loss: 0.2844 | Train Accuracy: 0.8989
Val   Loss: 0.3540 | Val   Accuracy: 0.8878



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▇▇██████
train_loss,█▄▂▂▁▁▁▁▁▁
val_accuracy,▁▆▇███████
val_loss,█▃▂▂▁▁▁▁▁▁

0,1
epoch,9.0
train_accuracy,0.89894
train_loss,0.28436
val_accuracy,0.88782
val_loss,0.35405


[34m[1mwandb[0m: Agent Starting Run: eoeuf0qt with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=1


[34m[1mwandb[0m: Agent Starting Run: 6e1iekji with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=1


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: nud9fyoq with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=1


[34m[1mwandb[0m: Agent Starting Run: b9mguitd with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2



Final Run Metrics for ed_32_hs_64_enc_2_dec_2_cell_LSTM_drop_0.2:
Train Loss: 0.5036 | Train Accuracy: 0.8430
Val   Loss: 0.4932 | Val   Accuracy: 0.8520



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▆▇▇▇████
train_loss,█▅▃▂▂▂▁▁▁▁
val_accuracy,▁▅▆▇▇█████
val_loss,█▄▃▂▂▁▁▁▁▁

0,1
epoch,9.0
train_accuracy,0.84304
train_loss,0.50359
val_accuracy,0.852
val_loss,0.49322


[34m[1mwandb[0m: Agent Starting Run: 1exjjw9f with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2



Final Run Metrics for ed_256_hs_64_enc_2_dec_2_cell_LSTM_drop_0.3:
Train Loss: 0.5330 | Train Accuracy: 0.8317
Val   Loss: 0.4972 | Val   Accuracy: 0.8507



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▆▇▇█████
train_loss,█▄▃▂▂▁▁▁▁▁
val_accuracy,▁▅▆▇▇█████
val_loss,█▄▃▂▂▁▁▁▁▁

0,1
epoch,9.0
train_accuracy,0.83166
train_loss,0.53299
val_accuracy,0.85073
val_loss,0.49724


[34m[1mwandb[0m: Agent Starting Run: hvgcohpy with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=2


[34m[1mwandb[0m: Agent Starting Run: 3k4tnva3 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=3


[34m[1mwandb[0m: Agent Starting Run: ywbm1z9v with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=2


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7532woe4 with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=2


[34m[1mwandb[0m: Agent Starting Run: p9rjxc7r with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=2


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: vwvksr8l with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=1


[34m[1mwandb[0m: Agent Starting Run: rzmboxg6 with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 85f9g4v9 with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=1


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: glwamzdy with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 3



Final Run Metrics for ed_32_hs_32_enc_3_dec_3_cell_LSTM_drop_0.2:
Train Loss: 0.9675 | Train Accuracy: 0.7101
Val   Loss: 0.8156 | Val   Accuracy: 0.7672



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▂▃▄▅▆▇▇██
train_loss,█▇▆▅▄▃▂▂▁▁
val_accuracy,▁▂▃▅▅▆▇▇██
val_loss,█▇▆▅▄▃▂▂▁▁

0,1
epoch,9.0
train_accuracy,0.71011
train_loss,0.96753
val_accuracy,0.76717
val_loss,0.81563


[34m[1mwandb[0m: Agent Starting Run: 5bq7l0yy with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 3



Final Run Metrics for ed_32_hs_32_enc_3_dec_3_cell_RNN_drop_0.3:
Train Loss: 2.1204 | Train Accuracy: 0.3611
Val   Loss: 2.0803 | Val   Accuracy: 0.3741



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▆▆▇▇▇███
train_loss,█▅▃▃▃▂▂▁▁▁
val_accuracy,▁▃▅▅▆▇▇███
val_loss,█▆▅▄▃▃▂▂▂▁

0,1
epoch,9.0
train_accuracy,0.36112
train_loss,2.12044
val_accuracy,0.37412
val_loss,2.08026


[34m[1mwandb[0m: Agent Starting Run: 79p88rsc with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=1


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: p0fb8nuz with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=1


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: mqictxmc with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Agent Starting Run: vij8aqti with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=1


[34m[1mwandb[0m: Agent Starting Run: mbvbh6yb with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=3


[34m[1mwandb[0m: Agent Starting Run: ud3dnxfa with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 750ayoi7 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2



Final Run Metrics for ed_16_hs_64_enc_2_dec_2_cell_GRU_drop_0.2:
Train Loss: 0.5971 | Train Accuracy: 0.8170
Val   Loss: 0.5760 | Val   Accuracy: 0.8312



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▆▆▇▇████
train_loss,█▅▃▃▂▂▁▁▁▁
val_accuracy,▁▄▆▆▇▇████
val_loss,█▅▃▃▂▂▁▁▁▁

0,1
epoch,9.0
train_accuracy,0.81697
train_loss,0.59708
val_accuracy,0.83123
val_loss,0.57605


[34m[1mwandb[0m: Agent Starting Run: xwdttavy with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2



Final Run Metrics for ed_64_hs_256_enc_2_dec_2_cell_LSTM_drop_0.2:
Train Loss: 0.2383 | Train Accuracy: 0.9102
Val   Loss: 0.3512 | Val   Accuracy: 0.8874



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▆▇▇██████
train_loss,█▃▂▂▁▁▁▁▁▁
val_accuracy,▁▆▇▇██████
val_loss,█▃▂▂▁▁▁▁▁▁

0,1
epoch,9.0
train_accuracy,0.91023
train_loss,0.23827
val_accuracy,0.88736
val_loss,0.3512


[34m[1mwandb[0m: Agent Starting Run: hvusl849 with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=2


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: pfhpai1j with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=1


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: reg5wj4i with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=2


[34m[1mwandb[0m: Agent Starting Run: n5sc4nqa with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: delvg8nd with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=3


[34m[1mwandb[0m: Agent Starting Run: l2vyyvjb with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 1



Final Run Metrics for ed_256_hs_32_enc_1_dec_1_cell_LSTM_drop_0.2:
Train Loss: 0.8184 | Train Accuracy: 0.7513
Val   Loss: 0.8278 | Val   Accuracy: 0.7536



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▆▇▇▇████
train_loss,█▄▃▂▂▂▁▁▁▁
val_accuracy,▁▅▆▇▇▇████
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
epoch,9.0
train_accuracy,0.75135
train_loss,0.81836
val_accuracy,0.75362
val_loss,0.82784


[34m[1mwandb[0m: Agent Starting Run: oi7qzq7g with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Agent Starting Run: x8t30x6w with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=2


[34m[1mwandb[0m: Agent Starting Run: czk2otzq with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Agent Starting Run: q253e61n with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Agent Starting Run: 6nl9afq5 with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=1


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: h1dhxv88 with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=2


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ti2my7ap with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 3



Final Run Metrics for ed_256_hs_16_enc_3_dec_3_cell_RNN_drop_0.2:
Train Loss: 2.2369 | Train Accuracy: 0.3393
Val   Loss: 2.2119 | Val   Accuracy: 0.3486



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▇▇▇███
train_loss,█▄▃▃▂▂▂▁▁▁
val_accuracy,▁▃▄▅▆▆▇███
val_loss,█▆▅▄▃▃▂▁▁▁

0,1
epoch,9.0
train_accuracy,0.33934
train_loss,2.23688
val_accuracy,0.3486
val_loss,2.21193


[34m[1mwandb[0m: Agent Starting Run: m6ajli4c with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=2


[34m[1mwandb[0m: Agent Starting Run: w6762bso with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 3



Final Run Metrics for ed_16_hs_16_enc_3_dec_3_cell_LSTM_drop_0.2:
Train Loss: 1.7616 | Train Accuracy: 0.4656
Val   Loss: 1.6636 | Val   Accuracy: 0.5066



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▃▄▅▅▆▇▇█
train_loss,█▆▅▄▄▃▂▂▁▁
val_accuracy,▁▂▂▃▄▅▆▇▇█
val_loss,█▆▆▅▄▃▂▂▂▁

0,1
epoch,9.0
train_accuracy,0.46563
train_loss,1.76157
val_accuracy,0.50661
val_loss,1.66356


[34m[1mwandb[0m: Agent Starting Run: ryjqv110 with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=1


[34m[1mwandb[0m: Agent Starting Run: ky7j8crh with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=2


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: eypami1a with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: d68l0l1g with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Agent Starting Run: heh0g9w8 with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 3



Final Run Metrics for ed_64_hs_16_enc_3_dec_3_cell_GRU_drop_0.3:
Train Loss: 1.4470 | Train Accuracy: 0.5611
Val   Loss: 1.2724 | Val   Accuracy: 0.6191



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▆▇▇▇██
train_loss,█▆▅▄▃▂▂▂▁▁
val_accuracy,▁▂▄▅▆▆▇▇██
val_loss,█▆▅▄▃▃▂▂▁▁

0,1
epoch,9.0
train_accuracy,0.56111
train_loss,1.44703
val_accuracy,0.61905
val_loss,1.27237


[34m[1mwandb[0m: Agent Starting Run: 057u1iui with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Agent Starting Run: wq8y143a with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=1


[34m[1mwandb[0m: Agent Starting Run: hl0mr0mv with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=2


[34m[1mwandb[0m: Agent Starting Run: sm59ki1d with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=3


[34m[1mwandb[0m: Agent Starting Run: skdbthf1 with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=1


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: smgdnzic with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 3



Final Run Metrics for ed_16_hs_64_enc_3_dec_3_cell_LSTM_drop_0.3:
Train Loss: 0.6834 | Train Accuracy: 0.7915
Val   Loss: 0.6031 | Val   Accuracy: 0.8263



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▂▄▅▆▇▇███
train_loss,█▆▅▄▃▂▂▁▁▁
val_accuracy,▁▂▄▆▆▇▇███
val_loss,█▇▅▄▃▂▂▁▁▁

0,1
epoch,9.0
train_accuracy,0.79153
train_loss,0.68344
val_accuracy,0.82634
val_loss,0.60306


[34m[1mwandb[0m: Agent Starting Run: rrcqg9w4 with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2



Final Run Metrics for ed_64_hs_16_enc_2_dec_2_cell_GRU_drop_0.3:
Train Loss: 1.3619 | Train Accuracy: 0.5822
Val   Loss: 1.2392 | Val   Accuracy: 0.6223



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▆▆▇▇▇██
train_loss,█▆▄▃▃▂▂▁▁▁
val_accuracy,▁▂▄▅▆▆▇▇██
val_loss,█▆▅▄▃▂▂▂▁▁

0,1
epoch,9.0
train_accuracy,0.5822
train_loss,1.36193
val_accuracy,0.62235
val_loss,1.23921


[34m[1mwandb[0m: Agent Starting Run: f5rw8bmy with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 3



Final Run Metrics for ed_32_hs_32_enc_3_dec_3_cell_LSTM_drop_0.3:
Train Loss: 1.0083 | Train Accuracy: 0.6955
Val   Loss: 0.8517 | Val   Accuracy: 0.7562



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▂▃▄▅▆▇▇██
train_loss,█▇▆▄▄▃▂▂▁▁
val_accuracy,▁▂▃▄▅▆▇▇██
val_loss,█▇▆▅▄▃▂▂▁▁

0,1
epoch,9.0
train_accuracy,0.69547
train_loss,1.00831
val_accuracy,0.75617
val_loss,0.85173


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: t4y4swq1 with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 1



Final Run Metrics for ed_16_hs_64_enc_1_dec_1_cell_RNN_drop_0.3:
Train Loss: 1.9592 | Train Accuracy: 0.3943
Val   Loss: 2.0108 | Val   Accuracy: 0.3900



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▆▆▇▇██
train_loss,█▅▄▃▃▃▂▂▁▁
val_accuracy,▁▂▄▅▆▆▅▆▇█
val_loss,█▇▆▅▄▃▄▃▂▁

0,1
epoch,9.0
train_accuracy,0.39428
train_loss,1.95925
val_accuracy,0.39004
val_loss,2.01075


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: okpiev9n with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=3


[34m[1mwandb[0m: Agent Starting Run: veiqyu7m with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=2


[34m[1mwandb[0m: Agent Starting Run: jaz478by with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=3


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 06orx5aw with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=1


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 3ip2xovb with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xcd64xe7 with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=1


[34m[1mwandb[0m: Agent Starting Run: rpcm7it8 with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=3


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: tti0mf5h with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Agent Starting Run: ov9e1yqu with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=2


[34m[1mwandb[0m: Agent Starting Run: bk1m1xzb with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=3


[34m[1mwandb[0m: Agent Starting Run: lctw3z1l with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=3


[34m[1mwandb[0m: Agent Starting Run: 3nyhjylh with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 3



Final Run Metrics for ed_32_hs_32_enc_3_dec_3_cell_GRU_drop_0.3:
Train Loss: 1.0103 | Train Accuracy: 0.6959
Val   Loss: 0.8456 | Val   Accuracy: 0.7567



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▆▇▇▇██
train_loss,█▆▅▄▃▂▂▂▁▁
val_accuracy,▁▂▄▅▆▇▇▇██
val_loss,█▇▅▄▃▂▂▁▁▁

0,1
epoch,9.0
train_accuracy,0.69594
train_loss,1.01031
val_accuracy,0.75666
val_loss,0.84565


[34m[1mwandb[0m: Agent Starting Run: xmhz9yep with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=2


[34m[1mwandb[0m: Agent Starting Run: idvf4i0c with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=1


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: mm28yo3s with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=2


[34m[1mwandb[0m: Agent Starting Run: l9lgbk89 with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=2


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 1n0ywcvl with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=2


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: eu3024m5 with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Agent Starting Run: ins4jf6q with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Agent Starting Run: nu8ns8ni with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 1



Final Run Metrics for ed_256_hs_64_enc_1_dec_1_cell_GRU_drop_0.2:
Train Loss: 0.6742 | Train Accuracy: 0.7925
Val   Loss: 0.7121 | Val   Accuracy: 0.7837



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▆▇▇▇████
train_loss,█▄▃▂▂▂▁▁▁▁
val_accuracy,▁▄▆▇▇▇████
val_loss,█▅▃▂▂▂▁▁▁▁

0,1
epoch,9.0
train_accuracy,0.79254
train_loss,0.67425
val_accuracy,0.78372
val_loss,0.71207


[34m[1mwandb[0m: Agent Starting Run: 1e4jqlzu with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 1



Final Run Metrics for ed_64_hs_64_enc_1_dec_1_cell_GRU_drop_0.3:
Train Loss: 0.6250 | Train Accuracy: 0.8097
Val   Loss: 0.6516 | Val   Accuracy: 0.8073



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▆▇▇▇████
train_loss,█▅▃▂▂▂▁▁▁▁
val_accuracy,▁▄▆▇▇▇████
val_loss,█▅▃▃▂▂▂▁▁▁

0,1
epoch,9.0
train_accuracy,0.80972
train_loss,0.62499
val_accuracy,0.80727
val_loss,0.65161


[34m[1mwandb[0m: Agent Starting Run: xqvnjmv7 with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=3


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xtofzf7v with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 1



Final Run Metrics for ed_32_hs_256_enc_1_dec_1_cell_RNN_drop_0.2:
Train Loss: 1.6754 | Train Accuracy: 0.4841
Val   Loss: 1.8536 | Val   Accuracy: 0.4401



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▆▆▇███
train_loss,█▆▄▃▃▃▂▁▁▁
val_accuracy,▁▃▄▆▇▆███▇
val_loss,█▆▅▃▂▂▁▁▁▂

0,1
epoch,9.0
train_accuracy,0.48411
train_loss,1.6754
val_accuracy,0.44012
val_loss,1.85357


[34m[1mwandb[0m: Agent Starting Run: 5fpl7y6i with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=1


[34m[1mwandb[0m: Agent Starting Run: jyh8rlnn with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Agent Starting Run: m2lzt0x3 with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2



Final Run Metrics for ed_64_hs_256_enc_2_dec_2_cell_RNN_drop_0.2:
Train Loss: 1.6754 | Train Accuracy: 0.4865
Val   Loss: 1.8056 | Val   Accuracy: 0.4591



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▃▅▅▆▆▇▇█
train_loss,█▆▅▄▃▃▂▂▁▁
val_accuracy,▁▁▄▅▅▆▆▇▇█
val_loss,██▅▄▄▃▃▂▂▁

0,1
epoch,9.0
train_accuracy,0.48655
train_loss,1.67537
val_accuracy,0.45907
val_loss,1.80562


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: vjuyqw6q with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=2


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: fxj42ybp with config:
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 3


Skipping run due to layer mismatch: enc=3, dec=1


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: dsgm4o8e with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Agent Starting Run: k9dzpc83 with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3


[34m[1mwandb[0m: Agent Starting Run: e1dofl7m with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 1



Final Run Metrics for ed_64_hs_256_enc_1_dec_1_cell_LSTM_drop_0.2:
Train Loss: 0.2535 | Train Accuracy: 0.9076
Val   Loss: 0.3887 | Val   Accuracy: 0.8771



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▆▇▇██████
train_loss,█▃▂▂▂▁▁▁▁▁
val_accuracy,▁▆▇▇▇█████
val_loss,█▄▂▂▁▁▁▁▁▁

0,1
epoch,9.0
train_accuracy,0.90761
train_loss,0.25348
val_accuracy,0.87709
val_loss,0.38871


[34m[1mwandb[0m: Agent Starting Run: lyk34epn with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=2


[34m[1mwandb[0m: Agent Starting Run: 3xbsmsti with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=3


[34m[1mwandb[0m: Agent Starting Run: jc9g5x88 with config:
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2



Final Run Metrics for ed_256_hs_64_enc_2_dec_2_cell_GRU_drop_0.3:
Train Loss: 0.5628 | Train Accuracy: 0.8236
Val   Loss: 0.5289 | Val   Accuracy: 0.8415



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▆▇▇▇████
train_loss,█▄▃▂▂▂▁▁▁▁
val_accuracy,▁▅▆▇▇▇████
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
epoch,9.0
train_accuracy,0.8236
train_loss,0.5628
val_accuracy,0.84147
val_loss,0.52886


[34m[1mwandb[0m: Agent Starting Run: uylcurni with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Skipping run due to layer mismatch: enc=1, dec=2


[34m[1mwandb[0m: Agent Starting Run: 0gu84qky with config:
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=1


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: mx8kad0y with config:
[34m[1mwandb[0m: 	beam_size: 10
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=1


[34m[1mwandb[0m: Agent Starting Run: hcek71ns with config:
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Skipping run due to layer mismatch: enc=2, dec=3
