# Multitask learning notebook

In this notebook we'll do some toy experiments with a multitask learning model.

In [1]:
import csv
import torch.optim as opt
from torch.nn import CrossEntropyLoss
from mlearn.utils.pipeline import process_and_batch
from mlearn.utils.metrics import Metrics
from mlearn.utils.train import train_mtl_model, run_mtl_model
from mlearn.data.dataset import GeneralDataset
from mlearn.data.clean import Preprocessors, Cleaner
from mlearn.data.loaders import hoover, davidson, garcia, semeval_sentiment
from mlearn.modeling.multitask import EmbeddingLSTMClassifier, OnehotLSTMClassifier, OnehotMLPClassifier

## Load the datasets to be used

In [2]:
cleaner = Cleaner(['lower', 'hashtag', 'url', 'user'])
data_path = '../tests/data/'
preprocessr = Preprocessors()

# Hoover dataset
mftc = hoover(cleaners = cleaner, data_path = data_path)
mftc.build_token_vocab(mftc.data)
mftc.build_label_vocab(mftc.data)

# Davidson dataset
off = davidson(cleaners = cleaner, data_path = data_path)
off.build_token_vocab(off.data)
off.build_label_vocab(off.data)

# Sentiment analysis dataset
sent = semeval_sentiment(cleaners = cleaner, data_path = data_path)
sent.build_token_vocab(sent.data)
sent.build_label_vocab(sent.data)

Loading Hoover et al. (train): 999it [00:04, 220.93it/s]
Building vocabulary: 100%|██████████| 799/799 [00:00<00:00, 157602.00it/s]
Encoding vocabulary: 100%|██████████| 2821/2821 [00:00<00:00, 506903.08it/s]
Encode label vocab: 100%|██████████| 11/11 [00:00<00:00, 990.11it/s]
Loading Davidson et al. (train): 887it [00:04, 212.44it/s]
Building vocabulary: 100%|██████████| 709/709 [00:00<00:00, 135875.06it/s]
Encoding vocabulary: 100%|██████████| 2913/2913 [00:00<00:00, 379535.52it/s]
Encode label vocab: 100%|██████████| 3/3 [00:00<00:00, 454.96it/s]
Loading Semeval (2016) (train): 999it [00:04, 204.59it/s]
Building vocabulary: 100%|██████████| 799/799 [00:00<00:00, 103484.71it/s]
Encoding vocabulary: 100%|██████████| 3474/3474 [00:00<00:00, 976478.49it/s]
Encode label vocab: 100%|██████████| 3/3 [00:00<00:00, 3405.39it/s]


## Create Batches of train and dev

## Initialize models

In [3]:
input_dims = [sent.vocab_size(), off.vocab_size(), mftc.vocab_size()]
output_dims = [sent.label_count(), off.label_count(), mftc.label_count()]
shared_dim, hidden_dims, embedding_dim = 256, [64, 128, 300], 128
dropout = 0.4
print(input_dims)

[3476, 2915, 2823]


In [4]:
ohmlp = OnehotMLPClassifier(input_dims = input_dims, shared_dim = shared_dim, hidden_dims = hidden_dims,
                            output_dims = output_dims, dropout = dropout)
optim = opt.SGD(ohmlp.parameters(), lr = 0.01)
loss = CrossEntropyLoss()
m = Metrics(['f1-score'], display_metric = 'f1-score', early_stop = 'loss')
dev_m = Metrics(['f1-score', 'accuracy'], display_metric = 'f1-score', early_stop = 'loss')
batchers = [process_and_batch(dataset, dataset.data, 32) for dataset in [sent, off, mftc]]

OnehotMLPClassifier(
  (all_parameters): ParameterList(
      (0): Parameter containing: [torch.FloatTensor of size 64x3476]
      (1): Parameter containing: [torch.FloatTensor of size 64]
      (2): Parameter containing: [torch.FloatTensor of size 64x2915]
      (3): Parameter containing: [torch.FloatTensor of size 64]
      (4): Parameter containing: [torch.FloatTensor of size 64x2823]
      (5): Parameter containing: [torch.FloatTensor of size 64]
      (6): Parameter containing: [torch.FloatTensor of size 128x64]
      (7): Parameter containing: [torch.FloatTensor of size 128]
      (8): Parameter containing: [torch.FloatTensor of size 300x128]
      (9): Parameter containing: [torch.FloatTensor of size 300]
      (10): Parameter containing: [torch.FloatTensor of size 3x300]
      (11): Parameter containing: [torch.FloatTensor of size 3]
      (12): Parameter containing: [torch.FloatTensor of size 3x300]
      (13): Parameter containing: [torch.FloatTensor of size 3]
      (14): Pa

In [5]:
writer = csv.writer(open('testing', 'w', encoding = 'utf-8'), delimiter = '\t')
base = ['timestamp', 'Trained On', 'Predicted On']
model_hdr = ['Epoch', 'Model', 'Input dim', 'Hidden dim', 'Embedding dim', 'Dropout', 'Learning rate', 
             'Window sizes', 'Num filters', 'Max feats', 'Output dim']
hyper_hdr = ['# Epochs', 'Learning rate', 'Batch size']
metric_hdr = [metric for metric in list(m.scores.keys()) + [f'Dev {m}' for m in dev_m.scores]]
writer.writerow(base + model_hdr + hyper_hdr + metric_hdr)

setattr(m, 'tasks', [])
hyper_info = [20, 0.01, 32]

In [6]:
run_mtl_model(train = True, writer = writer, epochs = 20, model = ohmlp, loss_f = loss, metrics = m, 
              batchers = batchers, opt = optim, save_path = 'testings', dev = process_and_batch(sent, sent.dev, len(sent.dev)),
              dev_metrics = dev_m, model_hdr = model_hdr, hyper_info = hyper_info, main_name = sent.name, 
              earlystop = 2, batches_per_epoch = 60,
              data_name = 'test', metric_hdr = metric_hdr, gpu = False, store = False)

Training model:   0%|          | 0/20 [00:00<?, ?it/s]
Batch:   0%|          | 0/60 [00:00<?, ?it/s][A

60


  'precision', 'predicted', average, warn_for)

Batch:   0%|          | 0/60 [00:00<?, ?it/s, batch_loss=0.0347, diff=0.0422, epoch_loss=0.0347, f1-score=0.0422, task=1][A
Batch:   2%|▏         | 1/60 [00:00<00:12,  4.81it/s, batch_loss=0.0347, diff=0.0422, epoch_loss=0.0347, f1-score=0.0422, task=1][A
Batch:   2%|▏         | 1/60 [00:00<00:12,  4.81it/s, batch_loss=0.0346, diff=0, epoch_loss=0.0346, f1-score=0.0422, task=1]     [A
Batch:   3%|▎         | 2/60 [00:00<00:12,  4.63it/s, batch_loss=0.0346, diff=0, epoch_loss=0.0346, f1-score=0.0422, task=1][A
Batch:   3%|▎         | 2/60 [00:00<00:12,  4.63it/s, batch_loss=0.0340, diff=0.192, epoch_loss=0.0344, f1-score=0.106, task=0][A
Batch:   5%|▌         | 3/60 [00:00<00:12,  4.39it/s, batch_loss=0.0340, diff=0.192, epoch_loss=0.0344, f1-score=0.106, task=0][A
Batch:   5%|▌         | 3/60 [00:00<00:12,  4.39it/s, batch_loss=0.0339, diff=0, epoch_loss=0.0343, f1-score=0.138, task=0]    [A
Batch:   7%|▋         | 4/60 [00:00<00:1

Batch:  50%|█████     | 30/60 [00:07<00:06,  4.90it/s, batch_loss=0.0747, diff=-.62, epoch_loss=0.0441, f1-score=0.252, task=2] [A
Batch:  52%|█████▏    | 31/60 [00:07<00:06,  4.75it/s, batch_loss=0.0747, diff=-.62, epoch_loss=0.0441, f1-score=0.252, task=2][A
Batch:  52%|█████▏    | 31/60 [00:08<00:06,  4.75it/s, batch_loss=0.0331, diff=0.212, epoch_loss=0.0438, f1-score=0.251, task=0][A
Batch:  53%|█████▎    | 32/60 [00:08<00:05,  5.44it/s, batch_loss=0.0331, diff=0.212, epoch_loss=0.0438, f1-score=0.251, task=0][A
Batch:  53%|█████▎    | 32/60 [00:08<00:05,  5.44it/s, batch_loss=0.0746, diff=-.142, epoch_loss=0.0447, f1-score=0.246, task=2][A
Batch:  55%|█████▌    | 33/60 [00:08<00:04,  5.94it/s, batch_loss=0.0746, diff=-.142, epoch_loss=0.0447, f1-score=0.246, task=2][A
Batch:  55%|█████▌    | 33/60 [00:08<00:04,  5.94it/s, batch_loss=0.0746, diff=-.0379, epoch_loss=0.0456, f1-score=0.241, task=2][A
Batch:  57%|█████▋    | 34/60 [00:08<00:04,  6.44it/s, batch_loss=0.0746, di

  'recall', 'true', average, warn_for)
Training model:   5%|▌         | 1/20 [00:12<03:53, 12.31s/it, dev_loss=0.0108, dev_score=0.4409, loss=0.0324]
Batch:   0%|          | 0/60 [00:00<?, ?it/s][A
Batch:   0%|          | 0/60 [00:00<?, ?it/s, batch_loss=0.0299, diff=0.274, epoch_loss=0.0299, f1-score=0.28, task=1][A
Batch:   2%|▏         | 1/60 [00:00<00:08,  7.36it/s, batch_loss=0.0299, diff=0.274, epoch_loss=0.0299, f1-score=0.28, task=1][A

60



Batch:   2%|▏         | 1/60 [00:00<00:08,  7.36it/s, batch_loss=0.0296, diff=0, epoch_loss=0.0298, f1-score=0.287, task=1]   [A
Batch:   3%|▎         | 2/60 [00:00<00:07,  7.28it/s, batch_loss=0.0296, diff=0, epoch_loss=0.0298, f1-score=0.287, task=1][A
Batch:   3%|▎         | 2/60 [00:00<00:07,  7.28it/s, batch_loss=0.0294, diff=0, epoch_loss=0.0296, f1-score=0.294, task=1][A
Batch:   5%|▌         | 3/60 [00:00<00:07,  7.50it/s, batch_loss=0.0294, diff=0, epoch_loss=0.0296, f1-score=0.294, task=1][A
Batch:   5%|▌         | 3/60 [00:00<00:07,  7.50it/s, batch_loss=0.0292, diff=0, epoch_loss=0.0295, f1-score=0.301, task=1][A
Batch:   7%|▋         | 4/60 [00:00<00:07,  7.86it/s, batch_loss=0.0292, diff=0, epoch_loss=0.0295, f1-score=0.301, task=1][A
Batch:   7%|▋         | 4/60 [00:00<00:07,  7.86it/s, batch_loss=0.0743, diff=-.628, epoch_loss=0.0385, f1-score=0.298, task=2][A
Batch:   8%|▊         | 5/60 [00:00<00:07,  7.54it/s, batch_loss=0.0743, diff=-.628, epoch_loss=0.0385,

Batch:  53%|█████▎    | 32/60 [00:04<00:03,  7.87it/s, batch_loss=0.0735, diff=-.0392, epoch_loss=0.0465, f1-score=0.313, task=2][A
Batch:  55%|█████▌    | 33/60 [00:04<00:03,  7.65it/s, batch_loss=0.0735, diff=-.0392, epoch_loss=0.0465, f1-score=0.313, task=2][A
Batch:  55%|█████▌    | 33/60 [00:04<00:03,  7.65it/s, batch_loss=0.0329, diff=0.177, epoch_loss=0.0461, f1-score=0.313, task=0] [A
Batch:  57%|█████▋    | 34/60 [00:04<00:03,  7.75it/s, batch_loss=0.0329, diff=0.177, epoch_loss=0.0461, f1-score=0.313, task=0][A
Batch:  57%|█████▋    | 34/60 [00:04<00:03,  7.75it/s, batch_loss=0.0329, diff=0, epoch_loss=0.0457, f1-score=0.313, task=0]    [A
Batch:  58%|█████▊    | 35/60 [00:04<00:03,  7.44it/s, batch_loss=0.0329, diff=0, epoch_loss=0.0457, f1-score=0.313, task=0][A
Batch:  58%|█████▊    | 35/60 [00:04<00:03,  7.44it/s, batch_loss=0.0328, diff=0.0139, epoch_loss=0.0453, f1-score=0.313, task=0][A
Batch:  60%|██████    | 36/60 [00:04<00:03,  7.59it/s, batch_loss=0.0328, di

60



Batch:   0%|          | 0/60 [00:00<?, ?it/s, batch_loss=0.0738, diff=-.0722, epoch_loss=0.0738, f1-score=0.337, task=2][A
Batch:   2%|▏         | 1/60 [00:00<00:12,  4.85it/s, batch_loss=0.0738, diff=-.0722, epoch_loss=0.0738, f1-score=0.337, task=2][A
Batch:   2%|▏         | 1/60 [00:00<00:12,  4.85it/s, batch_loss=0.0739, diff=0, epoch_loss=0.0739, f1-score=0.334, task=2]     [A
Batch:   3%|▎         | 2/60 [00:00<00:10,  5.37it/s, batch_loss=0.0739, diff=0, epoch_loss=0.0739, f1-score=0.334, task=2][A
Batch:   3%|▎         | 2/60 [00:00<00:10,  5.37it/s, batch_loss=0.0326, diff=0.341, epoch_loss=0.0601, f1-score=0.335, task=0][A
Batch:   5%|▌         | 3/60 [00:00<00:09,  5.78it/s, batch_loss=0.0326, diff=0.341, epoch_loss=0.0601, f1-score=0.335, task=0][A
Batch:   5%|▌         | 3/60 [00:00<00:09,  5.78it/s, batch_loss=0.0256, diff=0.36, epoch_loss=0.0515, f1-score=0.338, task=1] [A
Batch:   7%|▋         | 4/60 [00:00<00:09,  6.20it/s, batch_loss=0.0256, diff=0.36, epoch_l

Batch:  53%|█████▎    | 32/60 [00:04<00:04,  6.27it/s, batch_loss=0.0322, diff=0.308, epoch_loss=0.0489, f1-score=0.331, task=0][A
Batch:  53%|█████▎    | 32/60 [00:04<00:04,  6.27it/s, batch_loss=0.0736, diff=-.344, epoch_loss=0.0496, f1-score=0.329, task=2][A
Batch:  55%|█████▌    | 33/60 [00:04<00:03,  6.92it/s, batch_loss=0.0736, diff=-.344, epoch_loss=0.0496, f1-score=0.329, task=2][A
Batch:  55%|█████▌    | 33/60 [00:04<00:03,  6.92it/s, batch_loss=0.0246, diff=0.703, epoch_loss=0.0489, f1-score=0.332, task=1][A
Batch:  57%|█████▋    | 34/60 [00:04<00:04,  6.45it/s, batch_loss=0.0246, diff=0.703, epoch_loss=0.0489, f1-score=0.332, task=1][A
Batch:  57%|█████▋    | 34/60 [00:04<00:04,  6.45it/s, batch_loss=0.0244, diff=0, epoch_loss=0.0482, f1-score=0.334, task=1]    [A
Batch:  58%|█████▊    | 35/60 [00:04<00:03,  6.85it/s, batch_loss=0.0244, diff=0, epoch_loss=0.0482, f1-score=0.334, task=1][A
Batch:  58%|█████▊    | 35/60 [00:04<00:03,  6.85it/s, batch_loss=0.0243, diff=0

60



Batch:   2%|▏         | 1/60 [00:00<00:08,  7.24it/s, batch_loss=0.0211, diff=0, epoch_loss=0.0212, f1-score=0.347, task=1]    [A
Batch:   3%|▎         | 2/60 [00:00<00:09,  6.21it/s, batch_loss=0.0211, diff=0, epoch_loss=0.0212, f1-score=0.347, task=1][A
Batch:   3%|▎         | 2/60 [00:00<00:09,  6.21it/s, batch_loss=0.0723, diff=-.703, epoch_loss=0.0382, f1-score=0.346, task=2][A
Batch:   5%|▌         | 3/60 [00:00<00:09,  6.10it/s, batch_loss=0.0723, diff=-.703, epoch_loss=0.0382, f1-score=0.346, task=2][A
Batch:   5%|▌         | 3/60 [00:00<00:09,  6.10it/s, batch_loss=0.0355, diff=0.0622, epoch_loss=0.0376, f1-score=0.345, task=0][A
Batch:   7%|▋         | 4/60 [00:00<00:10,  5.48it/s, batch_loss=0.0355, diff=0.0622, epoch_loss=0.0376, f1-score=0.345, task=0][A
Batch:   7%|▋         | 4/60 [00:00<00:10,  5.48it/s, batch_loss=0.0210, diff=0.641, epoch_loss=0.0342, f1-score=0.348, task=1] [A
Batch:   8%|▊         | 5/60 [00:00<00:10,  5.23it/s, batch_loss=0.0210, diff=0.641

Batch:  53%|█████▎    | 32/60 [00:09<00:08,  3.42it/s, batch_loss=0.0717, diff=0.00505, epoch_loss=0.0404, f1-score=0.347, task=2][A
Batch:  55%|█████▌    | 33/60 [00:09<00:07,  3.65it/s, batch_loss=0.0717, diff=0.00505, epoch_loss=0.0404, f1-score=0.347, task=2][A
Batch:  55%|█████▌    | 33/60 [00:10<00:07,  3.65it/s, batch_loss=0.0196, diff=0.693, epoch_loss=0.0398, f1-score=0.349, task=1]  [A
Batch:  57%|█████▋    | 34/60 [00:10<00:07,  3.62it/s, batch_loss=0.0196, diff=0.693, epoch_loss=0.0398, f1-score=0.349, task=1][A
Batch:  57%|█████▋    | 34/60 [00:10<00:07,  3.62it/s, batch_loss=0.0353, diff=-.641, epoch_loss=0.0397, f1-score=0.349, task=0][A
Batch:  58%|█████▊    | 35/60 [00:10<00:06,  3.82it/s, batch_loss=0.0353, diff=-.641, epoch_loss=0.0397, f1-score=0.349, task=0][A
Batch:  58%|█████▊    | 35/60 [00:10<00:06,  3.82it/s, batch_loss=0.0715, diff=-.0493, epoch_loss=0.0406, f1-score=0.348, task=2][A
Batch:  60%|██████    | 36/60 [00:10<00:05,  4.26it/s, batch_loss=0.0

60



Batch:   0%|          | 0/60 [00:00<?, ?it/s, batch_loss=0.0733, diff=-.16, epoch_loss=0.0733, f1-score=0.351, task=2][A
Batch:   2%|▏         | 1/60 [00:00<00:13,  4.40it/s, batch_loss=0.0733, diff=-.16, epoch_loss=0.0733, f1-score=0.351, task=2][A
Batch:   2%|▏         | 1/60 [00:00<00:13,  4.40it/s, batch_loss=0.0317, diff=0.465, epoch_loss=0.0525, f1-score=0.352, task=0][A
Batch:   3%|▎         | 2/60 [00:00<00:12,  4.82it/s, batch_loss=0.0317, diff=0.465, epoch_loss=0.0525, f1-score=0.352, task=0][A
Batch:   3%|▎         | 2/60 [00:00<00:12,  4.82it/s, batch_loss=0.0733, diff=-.465, epoch_loss=0.0594, f1-score=0.351, task=2][A
Batch:   5%|▌         | 3/60 [00:00<00:11,  5.08it/s, batch_loss=0.0733, diff=-.465, epoch_loss=0.0594, f1-score=0.351, task=2][A
Batch:   5%|▌         | 3/60 [00:00<00:11,  5.08it/s, batch_loss=0.0315, diff=0.465, epoch_loss=0.0524, f1-score=0.351, task=0][A
Batch:   7%|▋         | 4/60 [00:00<00:10,  5.19it/s, batch_loss=0.0315, diff=0.465, epoch_l

Batch:  52%|█████▏    | 31/60 [00:05<00:08,  3.38it/s, batch_loss=0.0308, diff=0, epoch_loss=0.0443, f1-score=0.354, task=0]    [A
Batch:  53%|█████▎    | 32/60 [00:05<00:07,  3.57it/s, batch_loss=0.0308, diff=0, epoch_loss=0.0443, f1-score=0.354, task=0][A
Batch:  53%|█████▎    | 32/60 [00:06<00:07,  3.57it/s, batch_loss=0.0727, diff=-.468, epoch_loss=0.0451, f1-score=0.353, task=2][A
Batch:  55%|█████▌    | 33/60 [00:06<00:06,  4.01it/s, batch_loss=0.0727, diff=-.468, epoch_loss=0.0451, f1-score=0.353, task=2][A
Batch:  55%|█████▌    | 33/60 [00:06<00:06,  4.01it/s, batch_loss=0.0727, diff=0.0341, epoch_loss=0.0459, f1-score=0.352, task=2][A
Batch:  57%|█████▋    | 34/60 [00:06<00:05,  4.57it/s, batch_loss=0.0727, diff=0.0341, epoch_loss=0.0459, f1-score=0.352, task=2][A
Batch:  57%|█████▋    | 34/60 [00:06<00:05,  4.57it/s, batch_loss=0.0216, diff=0.639, epoch_loss=0.0452, f1-score=0.353, task=1] [A
Batch:  58%|█████▊    | 35/60 [00:06<00:06,  4.12it/s, batch_loss=0.0216, dif

60


[A
Batch:   2%|▏         | 1/60 [00:00<00:09,  6.35it/s, batch_loss=0.0324, diff=0.259, epoch_loss=0.0533, f1-score=0.351, task=0][A
Batch:   3%|▎         | 2/60 [00:00<00:11,  5.22it/s, batch_loss=0.0324, diff=0.259, epoch_loss=0.0533, f1-score=0.351, task=0][A
Batch:   3%|▎         | 2/60 [00:00<00:11,  5.22it/s, batch_loss=0.0261, diff=0.294, epoch_loss=0.0442, f1-score=0.352, task=1][A
Batch:   5%|▌         | 3/60 [00:00<00:12,  4.56it/s, batch_loss=0.0261, diff=0.294, epoch_loss=0.0442, f1-score=0.352, task=1][A
Batch:   5%|▌         | 3/60 [00:01<00:12,  4.56it/s, batch_loss=0.0324, diff=-.294, epoch_loss=0.0413, f1-score=0.352, task=0][A
Batch:   7%|▋         | 4/60 [00:01<00:14,  3.86it/s, batch_loss=0.0324, diff=-.294, epoch_loss=0.0413, f1-score=0.352, task=0][A
Batch:   7%|▋         | 4/60 [00:01<00:14,  3.86it/s, batch_loss=0.0741, diff=-.259, epoch_loss=0.0478, f1-score=0.351, task=2][A
Batch:   8%|▊         | 5/60 [00:01<00:13,  4.03it/s, batch_loss=0.0741, diff=-

Batch:  53%|█████▎    | 32/60 [00:06<00:06,  4.55it/s, batch_loss=0.0321, diff=-.294, epoch_loss=0.0441, f1-score=0.346, task=0][A
Batch:  55%|█████▌    | 33/60 [00:06<00:06,  4.29it/s, batch_loss=0.0321, diff=-.294, epoch_loss=0.0441, f1-score=0.346, task=0][A
Batch:  55%|█████▌    | 33/60 [00:07<00:06,  4.29it/s, batch_loss=0.0737, diff=-.242, epoch_loss=0.0450, f1-score=0.345, task=2][A
Batch:  57%|█████▋    | 34/60 [00:07<00:05,  4.70it/s, batch_loss=0.0737, diff=-.242, epoch_loss=0.0450, f1-score=0.345, task=2][A
Batch:  57%|█████▋    | 34/60 [00:07<00:05,  4.70it/s, batch_loss=0.0260, diff=0.536, epoch_loss=0.0444, f1-score=0.346, task=1][A
Batch:  58%|█████▊    | 35/60 [00:07<00:05,  4.25it/s, batch_loss=0.0260, diff=0.536, epoch_loss=0.0444, f1-score=0.346, task=1][A
Batch:  58%|█████▊    | 35/60 [00:07<00:05,  4.25it/s, batch_loss=0.0260, diff=0, epoch_loss=0.0439, f1-score=0.346, task=1]    [A
Batch:  60%|██████    | 36/60 [00:07<00:06,  3.52it/s, batch_loss=0.0260, di

60



Batch:   2%|▏         | 1/60 [00:00<00:08,  6.74it/s, batch_loss=0.0257, diff=0, epoch_loss=0.0257, f1-score=0.342, task=1]    [A
Batch:   3%|▎         | 2/60 [00:00<00:08,  6.46it/s, batch_loss=0.0257, diff=0, epoch_loss=0.0257, f1-score=0.342, task=1][A
Batch:   3%|▎         | 2/60 [00:00<00:08,  6.46it/s, batch_loss=0.0255, diff=0, epoch_loss=0.0256, f1-score=0.343, task=1][A
Batch:   5%|▌         | 3/60 [00:00<00:08,  6.60it/s, batch_loss=0.0255, diff=0, epoch_loss=0.0256, f1-score=0.343, task=1][A
Batch:   5%|▌         | 3/60 [00:00<00:08,  6.60it/s, batch_loss=0.0320, diff=-.261, epoch_loss=0.0272, f1-score=0.343, task=0][A
Batch:   7%|▋         | 4/60 [00:00<00:08,  6.83it/s, batch_loss=0.0320, diff=-.261, epoch_loss=0.0272, f1-score=0.343, task=0][A
Batch:   7%|▋         | 4/60 [00:00<00:08,  6.83it/s, batch_loss=0.0255, diff=0.261, epoch_loss=0.0269, f1-score=0.343, task=1][A
Batch:   8%|▊         | 5/60 [00:00<00:07,  6.96it/s, batch_loss=0.0255, diff=0.261, epoch_los

Batch:  53%|█████▎    | 32/60 [00:04<00:03,  7.05it/s, batch_loss=0.0319, diff=0, epoch_loss=0.0402, f1-score=0.341, task=0]   [A
Batch:  55%|█████▌    | 33/60 [00:04<00:03,  6.82it/s, batch_loss=0.0319, diff=0, epoch_loss=0.0402, f1-score=0.341, task=0][A
Batch:  55%|█████▌    | 33/60 [00:04<00:03,  6.82it/s, batch_loss=0.0711, diff=-.256, epoch_loss=0.0411, f1-score=0.34, task=2][A
Batch:  57%|█████▋    | 34/60 [00:04<00:03,  6.81it/s, batch_loss=0.0711, diff=-.256, epoch_loss=0.0411, f1-score=0.34, task=2][A
Batch:  57%|█████▋    | 34/60 [00:05<00:03,  6.81it/s, batch_loss=0.0254, diff=0.517, epoch_loss=0.0406, f1-score=0.341, task=1][A
Batch:  58%|█████▊    | 35/60 [00:05<00:03,  6.67it/s, batch_loss=0.0254, diff=0.517, epoch_loss=0.0406, f1-score=0.341, task=1][A
Batch:  58%|█████▊    | 35/60 [00:05<00:03,  6.67it/s, batch_loss=0.0320, diff=-.261, epoch_loss=0.0404, f1-score=0.341, task=0][A
Batch:  60%|██████    | 36/60 [00:05<00:03,  6.40it/s, batch_loss=0.0320, diff=-.26

60



Batch:   2%|▏         | 1/60 [00:00<00:09,  5.94it/s, batch_loss=0.0236, diff=0.585, epoch_loss=0.0480, f1-score=0.341, task=1][A
Batch:   3%|▎         | 2/60 [00:00<00:10,  5.64it/s, batch_loss=0.0236, diff=0.585, epoch_loss=0.0480, f1-score=0.341, task=1][A
Batch:   3%|▎         | 2/60 [00:00<00:10,  5.64it/s, batch_loss=0.0723, diff=-.585, epoch_loss=0.0561, f1-score=0.34, task=2] [A
Batch:   5%|▌         | 3/60 [00:00<00:10,  5.60it/s, batch_loss=0.0723, diff=-.585, epoch_loss=0.0561, f1-score=0.34, task=2][A
Batch:   5%|▌         | 3/60 [00:00<00:10,  5.60it/s, batch_loss=0.0723, diff=0.000501, epoch_loss=0.0601, f1-score=0.339, task=2][A
Batch:   7%|▋         | 4/60 [00:00<00:10,  5.54it/s, batch_loss=0.0723, diff=0.000501, epoch_loss=0.0601, f1-score=0.339, task=2][A
Batch:   7%|▋         | 4/60 [00:00<00:10,  5.54it/s, batch_loss=0.0329, diff=0.218, epoch_loss=0.0547, f1-score=0.339, task=0]   [A
Batch:   8%|▊         | 5/60 [00:00<00:09,  5.67it/s, batch_loss=0.0329, d

Batch:  53%|█████▎    | 32/60 [00:04<00:03,  7.19it/s, batch_loss=0.0329, diff=0.188, epoch_loss=0.0449, f1-score=0.338, task=0][A
Batch:  55%|█████▌    | 33/60 [00:04<00:03,  6.98it/s, batch_loss=0.0329, diff=0.188, epoch_loss=0.0449, f1-score=0.338, task=0][A
Batch:  55%|█████▌    | 33/60 [00:05<00:03,  6.98it/s, batch_loss=0.0329, diff=0, epoch_loss=0.0446, f1-score=0.337, task=0]    [A
Batch:  57%|█████▋    | 34/60 [00:05<00:03,  6.89it/s, batch_loss=0.0329, diff=0, epoch_loss=0.0446, f1-score=0.337, task=0][A
Batch:  57%|█████▋    | 34/60 [00:05<00:03,  6.89it/s, batch_loss=0.0234, diff=0.366, epoch_loss=0.0440, f1-score=0.338, task=1][A
Batch:  58%|█████▊    | 35/60 [00:05<00:03,  6.74it/s, batch_loss=0.0234, diff=0.366, epoch_loss=0.0440, f1-score=0.338, task=1][A
Batch:  58%|█████▊    | 35/60 [00:05<00:03,  6.74it/s, batch_loss=0.0719, diff=-.513, epoch_loss=0.0448, f1-score=0.337, task=2][A
Batch:  60%|██████    | 36/60 [00:05<00:03,  6.49it/s, batch_loss=0.0719, diff=-

Early stopping: Terminate
Loading weights from epoch 5





In [7]:
run_mtl_model(train = False, writer = writer, pred_writer = writer, model = ohmlp, loss_f = loss, metrics = m, gpu = False,
              batchers = process_and_batch(sent, sent.test, 100), mtl = 0, store = True, test = sent.test, data = sent.test,
              model_hdr = model_hdr, hyper_info = hyper_info, main_name = sent.name, data_name = 'test',
              metric_hdr = metric_hdr, dataset = sent, train_field = 'text', label_field = 'label')

                                                               