In [10]:
import torch
import optuna
import numpy as np
from mlearn import base
from tqdm import tqdm, trange
from mlearn.utils.metrics import Metrics
from mlearn.modeling.embedding import MLPClassifier
from mlearn.data.clean import Cleaner, Preprocessors
from jsonargparse import ArgumentParser, ActionConfigFile
from torchtext.data import TabularDataset, Field, LabelField, BucketIterator
from mlearn.utils.train import train_singletask_model
from mlearn.utils.pipeline import process_and_batch, param_selection

In [2]:
class TorchTextDefaultExtractor:
    """A class to get index-tensor batches from torchtext data object."""

    def __init__(self, datafield: str, labelfield: str, dataloader: base.DataType):
        """Initialize batch generator for torchtext."""
        self.data, self.df, self.lf = dataloader, datafield, labelfield

    def __len__(self):
        """Get length of the batches."""
        return len(self.data)

    def __iter__(self):
        """Iterate over batches in the data."""
        for batch in self.data:
            X = getattr(batch, self.df)
            y = getattr(batch, self.lf)
            yield (X, y)

In [3]:
# Initialize experiment
datadir = 'data/'
torch.random.manual_seed(42)
np.random.seed(42)
encoding = 'index'
tokenizer = 'ekphrasis'
metrics = ['f1-score', 'precision', 'recall', 'accuracy']
display_metric = stop_metric = 'f1-score'
batch_size=  64
epochs = 50
learning_rate = 0.001
dropout = 0.0
embedding = 100
hidden = 100
nonlinearity = 'relu'
gpu = False
hyperopt = False
save_path = None
train_metrics = Metrics(metrics, display_metric, stop_metric)
dev_metrics = Metrics(metrics, display_metric, stop_metric)
c = Cleaner(['url', 'hashtag', 'username', 'lower'])
experiment = Preprocessors(datadir).select_experiment('word')
onehot = True if encoding == 'onehot' else False

if tokenizer == 'spacy':
    tokenizer = c.tokenize
elif tokenizer == 'bpe':
    tokenizer = c.bpe_tokenize
elif tokenizer == 'ekphrasis':
    tokenizer = c.ekphrasis_tokenize

# Set annotations, corrections and filters.
annotate = {'elongated', 'emphasis'}
filters = [f"<{filtr}>" for filtr in annotate]

c._load_ekphrasis(annotate, filters)

  self.tok = re.compile(r"({})".format("|".join(pipeline)))


Reading twitter - 1grams ...


  regexes = {k.lower(): re.compile(self.expressions[k]) for k, v in


In [43]:
text = Field(tokenize = tokenizer, lower = True, batch_first = True)
label = LabelField()
fields = [('ignore', None), ('text', text), ('label', label), ('ignore', None)]
train, dev, test = TabularDataset.splits('/Users/zeerakw/PhD/projects/active/MTL_abuse/data/', train = 'wulczyn_train.tsv',
                                 validation = 'wulczyn_dev.tsv', test = 'wulczyn_test.tsv', 
                                 format = 'tsv', skip_header = True, fields = fields)
text.build_vocab(train)
label.build_vocab(train)

In [5]:
model = MLPClassifier(len(text.vocab.stoi), embedding, hidden, len(label.vocab.stoi), False, nonlinearity)
loss = torch.nn.NLLLoss()
optimizer = torch.optim.Adam(model.parameters(), learning_rate)

In [6]:
train_ds, dev_ds = BucketIterator.splits(datasets = (train, dev), batch_size = batch_size)
batched_train = TorchTextDefaultExtractor('text', 'label', train_ds)
batched_dev = TorchTextDefaultExtractor('text', 'label', dev_ds)



In [47]:
foo = next(iter(train_ds))
#print([text.vocab.itos[token] for token in foo.text[0]])
bar = []
baz = []
for document in train_ds:
    for i, doc in enumerate(document.text):
        baz.append(label.vocab.itos[document.label[i]])
        toks = []
        for tok in doc:
            token = text.vocab.itos[tok]
            if token == '<pad>':
                break
            toks.append(token)
        bar.append(toks)

In [50]:
print(len(baz))
print(len(bar))

95692
95692


In [11]:
train_singletask_model(model, save_path, epochs, batched_train, loss, optimizer, train_metrics, batched_dev, dev_metrics, shuffle = False, gpu = False)

Training epochs:   0%|          | 0/50 [00:00<?, ?it/s]
Batch:   0%|          | 0/1496 [00:00<?, ?it/s]
Batch:   0%|          | 0/1496 [00:00<?, ?it/s, batch_loss=0.0044]
Batch:   0%|          | 1/1496 [00:00<17:04,  1.46it/s, batch_loss=0.0044]
Batch:   0%|          | 1/1496 [00:01<17:04,  1.46it/s, batch_loss=0.0055]
Batch:   0%|          | 2/1496 [00:01<16:35,  1.50it/s, batch_loss=0.0055]
Batch:   0%|          | 2/1496 [00:01<16:35,  1.50it/s, batch_loss=0.0043]
Batch:   0%|          | 3/1496 [00:01<16:21,  1.52it/s, batch_loss=0.0043]
Batch:   0%|          | 3/1496 [00:02<16:21,  1.52it/s, batch_loss=0.0057]
Batch:   0%|          | 4/1496 [00:02<15:35,  1.59it/s, batch_loss=0.0057]
Batch:   0%|          | 4/1496 [00:02<15:35,  1.59it/s, batch_loss=0.0037]
Batch:   0%|          | 5/1496 [00:02<14:14,  1.74it/s, batch_loss=0.0037]
Batch:   0%|          | 5/1496 [00:03<14:14,  1.74it/s, batch_loss=0.0045]
Batch:   0%|          | 6/1496 [00:03<13:12,  1.88it/s, batch_loss=0.0045]
Batc

Batch:   7%|▋         | 107/1496 [01:00<12:00,  1.93it/s, batch_loss=0.0044]
Batch:   7%|▋         | 107/1496 [01:01<12:00,  1.93it/s, batch_loss=0.0028]
Batch:   7%|▋         | 108/1496 [01:01<11:07,  2.08it/s, batch_loss=0.0028]
Batch:   7%|▋         | 108/1496 [01:01<11:07,  2.08it/s, batch_loss=0.0040]
Batch:   7%|▋         | 109/1496 [01:01<11:21,  2.04it/s, batch_loss=0.0040]
Batch:   7%|▋         | 109/1496 [01:02<11:21,  2.04it/s, batch_loss=0.0055]
Batch:   7%|▋         | 110/1496 [01:02<11:46,  1.96it/s, batch_loss=0.0055]
Batch:   7%|▋         | 110/1496 [01:02<11:46,  1.96it/s, batch_loss=0.0057]
Batch:   7%|▋         | 111/1496 [01:02<12:45,  1.81it/s, batch_loss=0.0057]
Batch:   7%|▋         | 111/1496 [01:03<12:45,  1.81it/s, batch_loss=0.0062]
Batch:   7%|▋         | 112/1496 [01:03<12:13,  1.89it/s, batch_loss=0.0062]
Batch:   7%|▋         | 112/1496 [01:03<12:13,  1.89it/s, batch_loss=0.0071]
Batch:   8%|▊         | 113/1496 [01:03<11:51,  1.94it/s, batch_loss=0.0071]

Batch:  14%|█▍        | 213/1496 [01:49<09:21,  2.29it/s, batch_loss=0.0127]
Batch:  14%|█▍        | 213/1496 [01:50<09:21,  2.29it/s, batch_loss=0.0106]
Batch:  14%|█▍        | 214/1496 [01:50<09:05,  2.35it/s, batch_loss=0.0106]
Batch:  14%|█▍        | 214/1496 [01:50<09:05,  2.35it/s, batch_loss=0.0066]
Batch:  14%|█▍        | 215/1496 [01:50<09:14,  2.31it/s, batch_loss=0.0066]
Batch:  14%|█▍        | 215/1496 [01:51<09:14,  2.31it/s, batch_loss=0.0109]
Batch:  14%|█▍        | 216/1496 [01:51<09:03,  2.35it/s, batch_loss=0.0109]
Batch:  14%|█▍        | 216/1496 [01:51<09:03,  2.35it/s, batch_loss=0.0118]
Batch:  15%|█▍        | 217/1496 [01:51<09:09,  2.33it/s, batch_loss=0.0118]
Batch:  15%|█▍        | 217/1496 [01:51<09:09,  2.33it/s, batch_loss=0.0120]
Batch:  15%|█▍        | 218/1496 [01:51<08:51,  2.41it/s, batch_loss=0.0120]
Batch:  15%|█▍        | 218/1496 [01:52<08:51,  2.41it/s, batch_loss=0.0056]
Batch:  15%|█▍        | 219/1496 [01:52<09:09,  2.33it/s, batch_loss=0.0056]

Batch:  21%|██▏       | 319/1496 [02:36<08:30,  2.30it/s, batch_loss=0.0067]
Batch:  21%|██▏       | 319/1496 [02:37<08:30,  2.30it/s, batch_loss=0.0064]
Batch:  21%|██▏       | 320/1496 [02:37<08:48,  2.22it/s, batch_loss=0.0064]
Batch:  21%|██▏       | 320/1496 [02:37<08:48,  2.22it/s, batch_loss=0.0068]
Batch:  21%|██▏       | 321/1496 [02:37<09:29,  2.06it/s, batch_loss=0.0068]
Batch:  21%|██▏       | 321/1496 [02:38<09:29,  2.06it/s, batch_loss=0.0066]
Batch:  22%|██▏       | 322/1496 [02:38<09:08,  2.14it/s, batch_loss=0.0066]
Batch:  22%|██▏       | 322/1496 [02:38<09:08,  2.14it/s, batch_loss=0.0057]
Batch:  22%|██▏       | 323/1496 [02:38<08:32,  2.29it/s, batch_loss=0.0057]
Batch:  22%|██▏       | 323/1496 [02:38<08:32,  2.29it/s, batch_loss=0.0077]
Batch:  22%|██▏       | 324/1496 [02:38<08:37,  2.27it/s, batch_loss=0.0077]
Batch:  22%|██▏       | 324/1496 [02:39<08:37,  2.27it/s, batch_loss=0.0065]
Batch:  22%|██▏       | 325/1496 [02:39<09:05,  2.15it/s, batch_loss=0.0065]

Batch:  28%|██▊       | 425/1496 [03:25<08:36,  2.07it/s, batch_loss=0.0051]
Batch:  28%|██▊       | 425/1496 [03:25<08:36,  2.07it/s, batch_loss=0.0046]
Batch:  28%|██▊       | 426/1496 [03:25<08:27,  2.11it/s, batch_loss=0.0046]
Batch:  28%|██▊       | 426/1496 [03:26<08:27,  2.11it/s, batch_loss=0.0055]
Batch:  29%|██▊       | 427/1496 [03:26<07:59,  2.23it/s, batch_loss=0.0055]
Batch:  29%|██▊       | 427/1496 [03:26<07:59,  2.23it/s, batch_loss=0.0042]
Batch:  29%|██▊       | 428/1496 [03:26<07:55,  2.25it/s, batch_loss=0.0042]
Batch:  29%|██▊       | 428/1496 [03:27<07:55,  2.25it/s, batch_loss=0.0062]
Batch:  29%|██▊       | 429/1496 [03:27<07:28,  2.38it/s, batch_loss=0.0062]
Batch:  29%|██▊       | 429/1496 [03:27<07:28,  2.38it/s, batch_loss=0.0046]
Batch:  29%|██▊       | 430/1496 [03:27<07:42,  2.31it/s, batch_loss=0.0046]
Batch:  29%|██▊       | 430/1496 [03:27<07:42,  2.31it/s, batch_loss=0.0054]
Batch:  29%|██▉       | 431/1496 [03:27<07:10,  2.47it/s, batch_loss=0.0054]

Batch:  35%|███▌      | 531/1496 [04:13<07:23,  2.18it/s, batch_loss=0.0044]
Batch:  35%|███▌      | 531/1496 [04:13<07:23,  2.18it/s, batch_loss=0.0075]
Batch:  36%|███▌      | 532/1496 [04:13<07:54,  2.03it/s, batch_loss=0.0075]
Batch:  36%|███▌      | 532/1496 [04:14<07:54,  2.03it/s, batch_loss=0.0052]
Batch:  36%|███▌      | 533/1496 [04:14<07:48,  2.05it/s, batch_loss=0.0052]
Batch:  36%|███▌      | 533/1496 [04:14<07:48,  2.05it/s, batch_loss=0.0045]
Batch:  36%|███▌      | 534/1496 [04:14<07:25,  2.16it/s, batch_loss=0.0045]
Batch:  36%|███▌      | 534/1496 [04:14<07:25,  2.16it/s, batch_loss=0.0099]
Batch:  36%|███▌      | 535/1496 [04:14<06:57,  2.30it/s, batch_loss=0.0099]
Batch:  36%|███▌      | 535/1496 [04:15<06:57,  2.30it/s, batch_loss=0.0045]
Batch:  36%|███▌      | 536/1496 [04:15<06:42,  2.38it/s, batch_loss=0.0045]
Batch:  36%|███▌      | 536/1496 [04:15<06:42,  2.38it/s, batch_loss=0.0083]
Batch:  36%|███▌      | 537/1496 [04:15<06:34,  2.43it/s, batch_loss=0.0083]

Batch:  43%|████▎     | 637/1496 [05:03<06:53,  2.08it/s, batch_loss=0.0053]
Batch:  43%|████▎     | 637/1496 [05:03<06:53,  2.08it/s, batch_loss=0.0069]
Batch:  43%|████▎     | 638/1496 [05:03<06:37,  2.16it/s, batch_loss=0.0069]
Batch:  43%|████▎     | 638/1496 [05:04<06:37,  2.16it/s, batch_loss=0.0039]
Batch:  43%|████▎     | 639/1496 [05:04<06:15,  2.28it/s, batch_loss=0.0039]
Batch:  43%|████▎     | 639/1496 [05:04<06:15,  2.28it/s, batch_loss=0.0045]
Batch:  43%|████▎     | 640/1496 [05:04<06:11,  2.30it/s, batch_loss=0.0045]
Batch:  43%|████▎     | 640/1496 [05:05<06:11,  2.30it/s, batch_loss=0.0035]
Batch:  43%|████▎     | 641/1496 [05:05<06:25,  2.22it/s, batch_loss=0.0035]
Batch:  43%|████▎     | 641/1496 [05:05<06:25,  2.22it/s, batch_loss=0.0045]
Batch:  43%|████▎     | 642/1496 [05:05<06:36,  2.15it/s, batch_loss=0.0045]
Batch:  43%|████▎     | 642/1496 [05:06<06:36,  2.15it/s, batch_loss=0.0039]
Batch:  43%|████▎     | 643/1496 [05:06<07:07,  2.00it/s, batch_loss=0.0039]

Batch:  50%|████▉     | 743/1496 [05:56<06:39,  1.89it/s, batch_loss=0.0049]
Batch:  50%|████▉     | 743/1496 [05:57<06:39,  1.89it/s, batch_loss=0.0045]
Batch:  50%|████▉     | 744/1496 [05:57<06:44,  1.86it/s, batch_loss=0.0045]
Batch:  50%|████▉     | 744/1496 [05:57<06:44,  1.86it/s, batch_loss=0.0043]
Batch:  50%|████▉     | 745/1496 [05:57<06:30,  1.92it/s, batch_loss=0.0043]
Batch:  50%|████▉     | 745/1496 [05:58<06:30,  1.92it/s, batch_loss=0.0046]
Batch:  50%|████▉     | 746/1496 [05:58<07:15,  1.72it/s, batch_loss=0.0046]
Batch:  50%|████▉     | 746/1496 [05:59<07:15,  1.72it/s, batch_loss=0.0048]
Batch:  50%|████▉     | 747/1496 [05:59<06:41,  1.86it/s, batch_loss=0.0048]
Batch:  50%|████▉     | 747/1496 [05:59<06:41,  1.86it/s, batch_loss=0.0048]
Batch:  50%|█████     | 748/1496 [05:59<06:08,  2.03it/s, batch_loss=0.0048]
Batch:  50%|█████     | 748/1496 [05:59<06:08,  2.03it/s, batch_loss=0.0040]
Batch:  50%|█████     | 749/1496 [05:59<05:52,  2.12it/s, batch_loss=0.0040]

Batch:  57%|█████▋    | 849/1496 [06:45<04:49,  2.24it/s, batch_loss=0.0052]
Batch:  57%|█████▋    | 849/1496 [06:45<04:49,  2.24it/s, batch_loss=0.0070]
Batch:  57%|█████▋    | 850/1496 [06:45<04:39,  2.31it/s, batch_loss=0.0070]
Batch:  57%|█████▋    | 850/1496 [06:45<04:39,  2.31it/s, batch_loss=0.0045]
Batch:  57%|█████▋    | 851/1496 [06:45<04:45,  2.26it/s, batch_loss=0.0045]
Batch:  57%|█████▋    | 851/1496 [06:46<04:45,  2.26it/s, batch_loss=0.0047]
Batch:  57%|█████▋    | 852/1496 [06:46<04:39,  2.30it/s, batch_loss=0.0047]
Batch:  57%|█████▋    | 852/1496 [06:46<04:39,  2.30it/s, batch_loss=0.0039]
Batch:  57%|█████▋    | 853/1496 [06:46<04:51,  2.20it/s, batch_loss=0.0039]
Batch:  57%|█████▋    | 853/1496 [06:47<04:51,  2.20it/s, batch_loss=0.0040]
Batch:  57%|█████▋    | 854/1496 [06:47<04:40,  2.29it/s, batch_loss=0.0040]
Batch:  57%|█████▋    | 854/1496 [06:47<04:40,  2.29it/s, batch_loss=0.0055]
Batch:  57%|█████▋    | 855/1496 [06:47<04:33,  2.34it/s, batch_loss=0.0055]

Batch:  64%|██████▍   | 955/1496 [07:33<03:52,  2.33it/s, batch_loss=0.0026]
Batch:  64%|██████▍   | 955/1496 [07:33<03:52,  2.33it/s, batch_loss=0.0038]
Batch:  64%|██████▍   | 956/1496 [07:33<04:08,  2.17it/s, batch_loss=0.0038]
Batch:  64%|██████▍   | 956/1496 [07:34<04:08,  2.17it/s, batch_loss=0.0089]
Batch:  64%|██████▍   | 957/1496 [07:34<04:11,  2.14it/s, batch_loss=0.0089]
Batch:  64%|██████▍   | 957/1496 [07:34<04:11,  2.14it/s, batch_loss=0.0060]
Batch:  64%|██████▍   | 958/1496 [07:34<04:17,  2.09it/s, batch_loss=0.0060]
Batch:  64%|██████▍   | 958/1496 [07:35<04:17,  2.09it/s, batch_loss=0.0043]
Batch:  64%|██████▍   | 959/1496 [07:35<04:17,  2.09it/s, batch_loss=0.0043]
Batch:  64%|██████▍   | 959/1496 [07:35<04:17,  2.09it/s, batch_loss=0.0037]
Batch:  64%|██████▍   | 960/1496 [07:35<04:24,  2.03it/s, batch_loss=0.0037]
Batch:  64%|██████▍   | 960/1496 [07:36<04:24,  2.03it/s, batch_loss=0.0037]
Batch:  64%|██████▍   | 961/1496 [07:36<04:28,  1.99it/s, batch_loss=0.0037]

Batch:  71%|███████   | 1060/1496 [08:29<03:48,  1.91it/s, batch_loss=0.0050]
Batch:  71%|███████   | 1061/1496 [08:29<03:48,  1.90it/s, batch_loss=0.0050]
Batch:  71%|███████   | 1061/1496 [08:30<03:48,  1.90it/s, batch_loss=0.0061]
Batch:  71%|███████   | 1062/1496 [08:30<04:10,  1.73it/s, batch_loss=0.0061]
Batch:  71%|███████   | 1062/1496 [08:31<04:10,  1.73it/s, batch_loss=0.0075]
Batch:  71%|███████   | 1063/1496 [08:31<04:17,  1.68it/s, batch_loss=0.0075]
Batch:  71%|███████   | 1063/1496 [08:31<04:17,  1.68it/s, batch_loss=0.0030]
Batch:  71%|███████   | 1064/1496 [08:31<04:04,  1.77it/s, batch_loss=0.0030]
Batch:  71%|███████   | 1064/1496 [08:32<04:04,  1.77it/s, batch_loss=0.0035]
Batch:  71%|███████   | 1065/1496 [08:32<04:00,  1.79it/s, batch_loss=0.0035]
Batch:  71%|███████   | 1065/1496 [08:32<04:00,  1.79it/s, batch_loss=0.0056]
Batch:  71%|███████▏  | 1066/1496 [08:32<03:56,  1.82it/s, batch_loss=0.0056]
Batch:  71%|███████▏  | 1066/1496 [08:33<03:56,  1.82it/s, batch

Batch:  78%|███████▊  | 1165/1496 [09:29<03:06,  1.77it/s, batch_loss=0.0052]
Batch:  78%|███████▊  | 1166/1496 [09:29<03:06,  1.77it/s, batch_loss=0.0052]
Batch:  78%|███████▊  | 1166/1496 [09:30<03:06,  1.77it/s, batch_loss=0.0043]
Batch:  78%|███████▊  | 1167/1496 [09:30<02:57,  1.85it/s, batch_loss=0.0043]
Batch:  78%|███████▊  | 1167/1496 [09:30<02:57,  1.85it/s, batch_loss=0.0043]
Batch:  78%|███████▊  | 1168/1496 [09:30<03:08,  1.74it/s, batch_loss=0.0043]
Batch:  78%|███████▊  | 1168/1496 [09:31<03:08,  1.74it/s, batch_loss=0.0050]
Batch:  78%|███████▊  | 1169/1496 [09:31<03:05,  1.76it/s, batch_loss=0.0050]
Batch:  78%|███████▊  | 1169/1496 [09:31<03:05,  1.76it/s, batch_loss=0.0063]
Batch:  78%|███████▊  | 1170/1496 [09:31<03:07,  1.74it/s, batch_loss=0.0063]
Batch:  78%|███████▊  | 1170/1496 [09:32<03:07,  1.74it/s, batch_loss=0.0073]
Batch:  78%|███████▊  | 1171/1496 [09:32<03:16,  1.65it/s, batch_loss=0.0073]
Batch:  78%|███████▊  | 1171/1496 [09:32<03:16,  1.65it/s, batch

Batch:  85%|████████▍ | 1270/1496 [10:38<01:59,  1.90it/s, batch_loss=0.0072]
Batch:  85%|████████▍ | 1271/1496 [10:38<02:05,  1.79it/s, batch_loss=0.0072]
Batch:  85%|████████▍ | 1271/1496 [10:39<02:05,  1.79it/s, batch_loss=0.0050]
Batch:  85%|████████▌ | 1272/1496 [10:39<02:12,  1.69it/s, batch_loss=0.0050]
Batch:  85%|████████▌ | 1272/1496 [10:39<02:12,  1.69it/s, batch_loss=0.0021]
Batch:  85%|████████▌ | 1273/1496 [10:39<02:10,  1.71it/s, batch_loss=0.0021]
Batch:  85%|████████▌ | 1273/1496 [10:40<02:10,  1.71it/s, batch_loss=0.0064]
Batch:  85%|████████▌ | 1274/1496 [10:40<02:15,  1.64it/s, batch_loss=0.0064]
Batch:  85%|████████▌ | 1274/1496 [10:41<02:15,  1.64it/s, batch_loss=0.0050]
Batch:  85%|████████▌ | 1275/1496 [10:41<02:24,  1.53it/s, batch_loss=0.0050]
Batch:  85%|████████▌ | 1275/1496 [10:42<02:24,  1.53it/s, batch_loss=0.0043]
Batch:  85%|████████▌ | 1276/1496 [10:42<02:26,  1.50it/s, batch_loss=0.0043]
Batch:  85%|████████▌ | 1276/1496 [10:42<02:26,  1.50it/s, batch

Batch:  92%|█████████▏| 1375/1496 [11:36<00:52,  2.31it/s, batch_loss=0.0065]
Batch:  92%|█████████▏| 1376/1496 [11:36<00:52,  2.31it/s, batch_loss=0.0065]
Batch:  92%|█████████▏| 1376/1496 [11:36<00:52,  2.31it/s, batch_loss=0.0058]
Batch:  92%|█████████▏| 1377/1496 [11:36<00:49,  2.39it/s, batch_loss=0.0058]
Batch:  92%|█████████▏| 1377/1496 [11:37<00:49,  2.39it/s, batch_loss=0.0055]
Batch:  92%|█████████▏| 1378/1496 [11:37<00:55,  2.13it/s, batch_loss=0.0055]
Batch:  92%|█████████▏| 1378/1496 [11:37<00:55,  2.13it/s, batch_loss=0.0043]
Batch:  92%|█████████▏| 1379/1496 [11:37<00:58,  2.00it/s, batch_loss=0.0043]
Batch:  92%|█████████▏| 1379/1496 [11:38<00:58,  2.00it/s, batch_loss=0.0044]
Batch:  92%|█████████▏| 1380/1496 [11:38<01:04,  1.81it/s, batch_loss=0.0044]
Batch:  92%|█████████▏| 1380/1496 [11:38<01:04,  1.81it/s, batch_loss=0.0041]
Batch:  92%|█████████▏| 1381/1496 [11:38<01:04,  1.79it/s, batch_loss=0.0041]
Batch:  92%|█████████▏| 1381/1496 [11:39<01:04,  1.79it/s, batch

Batch:  99%|█████████▉| 1480/1496 [12:25<00:09,  1.77it/s, batch_loss=0.0040]
Batch:  99%|█████████▉| 1481/1496 [12:25<00:08,  1.71it/s, batch_loss=0.0040]
Batch:  99%|█████████▉| 1481/1496 [12:25<00:08,  1.71it/s, batch_loss=0.0057]
Batch:  99%|█████████▉| 1482/1496 [12:25<00:07,  1.81it/s, batch_loss=0.0057]
Batch:  99%|█████████▉| 1482/1496 [12:26<00:07,  1.81it/s, batch_loss=0.0069]
Batch:  99%|█████████▉| 1483/1496 [12:26<00:07,  1.81it/s, batch_loss=0.0069]
Batch:  99%|█████████▉| 1483/1496 [12:26<00:07,  1.81it/s, batch_loss=0.0057]
Batch:  99%|█████████▉| 1484/1496 [12:26<00:07,  1.71it/s, batch_loss=0.0057]
Batch:  99%|█████████▉| 1484/1496 [12:27<00:07,  1.71it/s, batch_loss=0.0034]
Batch:  99%|█████████▉| 1485/1496 [12:27<00:06,  1.78it/s, batch_loss=0.0034]
Batch:  99%|█████████▉| 1485/1496 [12:27<00:06,  1.78it/s, batch_loss=0.0048]
Batch:  99%|█████████▉| 1486/1496 [12:27<00:05,  1.89it/s, batch_loss=0.0048]
Batch:  99%|█████████▉| 1486/1496 [12:28<00:05,  1.89it/s, batch

Batch:   6%|▌         | 89/1496 [00:52<13:30,  1.74it/s, batch_loss=0.0057]
Batch:   6%|▌         | 90/1496 [00:52<13:11,  1.78it/s, batch_loss=0.0057]
Batch:   6%|▌         | 90/1496 [00:52<13:11,  1.78it/s, batch_loss=0.0030]
Batch:   6%|▌         | 91/1496 [00:52<12:41,  1.85it/s, batch_loss=0.0030]
Batch:   6%|▌         | 91/1496 [00:53<12:41,  1.85it/s, batch_loss=0.0041]
Batch:   6%|▌         | 92/1496 [00:53<12:55,  1.81it/s, batch_loss=0.0041]
Batch:   6%|▌         | 92/1496 [00:54<12:55,  1.81it/s, batch_loss=0.0068]
Batch:   6%|▌         | 93/1496 [00:54<13:23,  1.75it/s, batch_loss=0.0068]
Batch:   6%|▌         | 93/1496 [00:54<13:23,  1.75it/s, batch_loss=0.0041]
Batch:   6%|▋         | 94/1496 [00:54<12:29,  1.87it/s, batch_loss=0.0041]
Batch:   6%|▋         | 94/1496 [00:55<12:29,  1.87it/s, batch_loss=0.0047]
Batch:   6%|▋         | 95/1496 [00:55<13:19,  1.75it/s, batch_loss=0.0047]
Batch:   6%|▋         | 95/1496 [00:55<13:19,  1.75it/s, batch_loss=0.0049]
Batch:   6%|

Batch:  13%|█▎        | 195/1496 [01:58<13:54,  1.56it/s, batch_loss=0.0032]
Batch:  13%|█▎        | 196/1496 [01:58<12:32,  1.73it/s, batch_loss=0.0032]
Batch:  13%|█▎        | 196/1496 [01:59<12:32,  1.73it/s, batch_loss=0.0049]
Batch:  13%|█▎        | 197/1496 [01:59<12:48,  1.69it/s, batch_loss=0.0049]
Batch:  13%|█▎        | 197/1496 [01:59<12:48,  1.69it/s, batch_loss=0.0063]
Batch:  13%|█▎        | 198/1496 [01:59<11:45,  1.84it/s, batch_loss=0.0063]
Batch:  13%|█▎        | 198/1496 [02:00<11:45,  1.84it/s, batch_loss=0.0056]
Batch:  13%|█▎        | 199/1496 [02:00<12:08,  1.78it/s, batch_loss=0.0056]
Batch:  13%|█▎        | 199/1496 [02:00<12:08,  1.78it/s, batch_loss=0.0055]
Batch:  13%|█▎        | 200/1496 [02:00<12:21,  1.75it/s, batch_loss=0.0055]
Batch:  13%|█▎        | 200/1496 [02:01<12:21,  1.75it/s, batch_loss=0.0061]
Batch:  13%|█▎        | 201/1496 [02:01<13:10,  1.64it/s, batch_loss=0.0061]
Batch:  13%|█▎        | 201/1496 [02:02<13:10,  1.64it/s, batch_loss=0.0050]

KeyboardInterrupt: 