In [46]:
import os

from pytorch_pretrained_bert.modeling import BertModel
from torch import nn


class BertModule(nn.Module):
    def __init__(self, bert_model_name, dropout_prob=0.1, cache_dir="./cache/"):
        super().__init__()

        # Create cache directory if not exists
        if not os.path.exists(cache_dir):
            os.makedirs(cache_dir)

        self.bert_model = BertModel.from_pretrained(
            bert_model_name, cache_dir=cache_dir
        )

    def forward(self, token_ids, token_type_ids=None, attention_mask=None):
        encoded_layers, pooled_output = self.bert_model(
            token_ids, token_type_ids, attention_mask
        )
        return encoded_layers, pooled_output

In [None]:
pip install pytorch_pretrained_bert

Collecting pytorch_pretrained_bert
[?25l  Downloading https://files.pythonhosted.org/packages/d7/e0/c08d5553b89973d9a240605b9c12404bcf8227590de62bae27acbcfe076b/pytorch_pretrained_bert-0.6.2-py3-none-any.whl (123kB)
[K     |████████████████████████████████| 133kB 7.6MB/s 
[?25hCollecting boto3
[?25l  Downloading https://files.pythonhosted.org/packages/1b/61/0b49baae16f482156550ce0b78a7ad265c27188a9d4fe6a1bd741fb43b9d/boto3-1.16.13.tar.gz (97kB)
[K     |████████████████████████████████| 102kB 6.0MB/s 
Collecting botocore<1.20.0,>=1.19.13
[?25l  Downloading https://files.pythonhosted.org/packages/99/40/b5e681d80dc46bafd0dc2e55266190cc432dfd5b72b9e7e1c5743aa6c362/botocore-1.19.13-py2.py3-none-any.whl (6.7MB)
[K     |████████████████████████████████| 6.7MB 9.9MB/s 
[?25hCollecting jmespath<1.0.0,>=0.7.1
  Downloading https://files.pythonhosted.org/packages/07/cb/5f001272b6faeb23c1c9e0acc04d48eaaf5c862c17709d20e3469c6e0139/jmespath-0.10.0-py2.py3-none-any.whl
Collecting s3transfer<0

In [26]:
from torch import nn


class ClassificationModule(nn.Module):
    def __init__(self, feature_dim, class_cardinality):
        super().__init__()

        self.linear = nn.Linear(feature_dim, class_cardinality)

    def forward(self, feature):
        return self.linear.forward(feature)

In [27]:
from torch import nn


class RegressionModule(nn.Module):
    def __init__(self, feature_dim):
        super().__init__()

        self.linear = nn.Linear(feature_dim, 1)

    def forward(self, feature):
        return self.linear.forward(feature)

In [28]:
GLUE_TASK_NAMES = [
    "CoLA",
    "MNLI",
    "MRPC",
    "QNLI",
    "QQP",
    "RTE",
    "SNLI",
    "SST-2",
    "STS-B",
    "WNLI",
]

SPLIT_MAPPING = {
    "CoLA": {"train": "train.tsv", "dev": "dev.tsv", "test": "test.tsv"},
    "MNLI": {
        "train": "train.tsv",
        "dev": "dev_matched.tsv",
        "test": "test_matched.tsv",
    },
    "MNLI_matched": {
        "train": "train.tsv",
        "dev": "dev_matched.tsv",
        "test": "test_matched.tsv",
    },
    "MNLI_unmatched": {
        "train": "train.tsv",
        "dev": "dev_mismatched.tsv",
        "test": "test_mismatched.tsv",
    },
    "MRPC": {"train": "train.tsv", "dev": "dev.tsv", "test": "test.tsv"},
    "QNLI": {"train": "train.tsv", "dev": "dev.tsv", "test": "test.tsv"},
    "QQP": {"train": "train.tsv", "dev": "dev.tsv", "test": "test.tsv"},
    "RTE": {"train": "train.tsv", "dev": "dev.tsv", "test": "test.tsv"},
    "SNLI": {"train": "train.tsv", "dev": "dev.tsv", "test": "test.tsv"},
    "SST-2": {"train": "train.tsv", "dev": "dev.tsv", "test": "test.tsv"},
    "STS-B": {"train": "train.tsv", "dev": "dev.tsv", "test": "test.tsv"},
    "WNLI": {"train": "train.tsv", "dev": "dev.tsv", "test": "test.tsv"},
}

INDEX_MAPPING = {
    # each one contains three values:
    # sentence 1 index, sentence 2 index, label index, -1 means abstain
    "CoLA": {"train": [3, -1, 1], "dev": [3, -1, 1], "test": [1, -1, -1]},
    "MNLI": {"train": [8, 9, 11], "dev": [8, 9, 15], "test": [8, 9, -1]},
    "MNLI_matched": {"train": [8, 9, 11], "dev": [8, 9, 15], "test": [8, 9, -1]},
    "MNLI_unmatched": {"train": [8, 9, 11], "dev": [8, 9, 15], "test": [8, 9, -1]},
    "MRPC": {"train": [3, 4, 0], "dev": [3, 4, 0], "test": [3, 4, -1]},
    "QNLI": {"train": [1, 2, 3], "dev": [1, 2, 3], "test": [1, 2, -1]},
    "QQP": {"train": [3, 4, 5], "dev": [3, 4, 5], "test": [1, 2, -1]},
    "RTE": {"train": [1, 2, 3], "dev": [1, 2, 3], "test": [1, 2, -1]},
    "SNLI": {"train": [7, 8, 10], "dev": [7, 8, 14], "test": [7, 8, 14]},
    "SST-2": {"train": [0, -1, 1], "dev": [0, -1, 1], "test": [1, -1, -1]},
    "STS-B": {"train": [7, 8, 9], "dev": [7, 8, 9], "test": [7, 8, -1]},
    "WNLI": {"train": [1, 2, 3], "dev": [1, 2, 3], "test": [1, 2, -1]},
}

SKIPPING_HEADER_MAPPING = {
    "CoLA": {"train": 0, "dev": 0, "test": 1},
    "MNLI": {"train": 1, "dev": 1, "test": 1},
    "MNLI_matched": {"train": 1, "dev": 1, "test": 1},
    "MNLI_unmatched": {"train": 1, "dev": 1, "test": 1},
    "MRPC": {"train": 1, "dev": 1, "test": 1},
    "QNLI": {"train": 1, "dev": 1, "test": 1},
    "QQP": {"train": 1, "dev": 1, "test": 1},
    "RTE": {"train": 1, "dev": 1, "test": 1},
    "SNLI": {"train": 1, "dev": 1, "test": 1},
    "SST-2": {"train": 1, "dev": 1, "test": 1},
    "STS-B": {"train": 1, "dev": 1, "test": 1},
    "WNLI": {"train": 1, "dev": 1, "test": 1},
}

LABEL_MAPPING = {
    "CoLA": {"1": 1, "0": 2},
    "MNLI": {"entailment": 1, "contradiction": 2, "neutral": 3},
    "MNLI_matched": {"entailment": 1, "contradiction": 2, "neutral": 3},
    "MNLI_unmatched": {"entailment": 1, "contradiction": 2, "neutral": 3},
    "MRPC": {"1": 1, "0": 2},
    "QNLI": {"entailment": 1, "not_entailment": 2},
    "QQP": {"1": 1, "0": 2},
    "RTE": {"entailment": 1, "not_entailment": 2},
    "SNLI": {"entailment": 1, "contradiction": 2, "neutral": 3},
    "SST-2": {"1": 1, "0": 2},
    "STS-B": None,
    "WNLI": {"1": 1, "0": 2},
}

METRIC_MAPPING = {
    "CoLA": ["matthews_correlation"],
    "MNLI": ["accuracy"],
    "MNLI_matched": ["accuracy"],
    "MNLI_unmatched": ["accuracy"],
    "MRPC": ["accuracy_f1"],
    "QNLI": ["accuracy"],
    "QQP": ["accuracy_f1"],
    "RTE": ["accuracy"],
    "SNLI": ["accuracy"],
    "SST-2": ["accuracy"],
    "STS-B": ["pearson_spearman"],
    "WNLI": ["accuracy"],
}

In [43]:
import codecs
import logging
import os

import numpy as np
import torch
from pytorch_pretrained_bert import BertTokenizer
# from task_config import (
#     INDEX_MAPPING,
#     LABEL_MAPPING,
#     SKIPPING_HEADER_MAPPING,
#     SPLIT_MAPPING,
# )

try:
    from IPython import get_ipython

    if "IPKernelApp" not in get_ipython().config:
        raise ImportError("console")
except (AttributeError, ImportError):
    from tqdm import tqdm
else:
    from tqdm import tqdm_notebook as tqdm


logger = logging.getLogger(__name__)

DELIMITER = "\t"


def preprocessor(
    data_dir,
    task_name,
    split,
    bert_model_name="bert-base-uncased",
    max_data_samples=None,
    max_sequence_length=128,
):

    sentences, labels = parse_tsv(data_dir, task_name, split, max_data_samples)

    labels = torch.from_numpy(np.array(labels))

    do_lower_case = "uncased" in bert_model_name

    tokenizer = BertTokenizer.from_pretrained(
        bert_model_name, do_lower_case=do_lower_case
    )

    bert_token_ids = []
    bert_token_masks = []
    bert_token_segments = []

    for sentence in sentences:
        if len(sentence) not in [1, 2]:
            logger.error("Sentence length doesn't match.")

        # Tokenize sentences
        tokenized_sentence = [tokenizer.tokenize(sent) for sent in sentence]
        sent1_tokens = tokenized_sentence[0]
        sent2_tokens = tokenized_sentence[1] if len(tokenized_sentence) == 2 else None

        # One sentence case
        if len(tokenized_sentence) == 1:
            # Remove tokens that exceeds the max_sequence_length
            if len(sent1_tokens) > max_sequence_length - 2:
                # Account for [CLS] and [SEP] with "- 2"
                sent1_tokens = sent1_tokens[: max_sequence_length - 2]
        # Two sentences case
        else:
            # Remove tokens that exceeds the max_sequence_length
            while True:
                total_length = len(sent1_tokens) + len(sent2_tokens)
                # Account for [CLS], [SEP], [SEP] with "- 3"
                if total_length <= max_sequence_length - 3:
                    break
                if len(sent1_tokens) > len(sent2_tokens):
                    sent1_tokens.pop()
                else:
                    sent2_tokens.pop()

        # Convert to BERT manner
        tokens = ["[CLS]"] + sent1_tokens + ["[SEP]"]
        token_segments = [0] * len(tokens)

        if sent2_tokens:
            tokens += sent2_tokens + ["[SEP]"]
            token_segments += [1] * (len(sent2_tokens) + 1)

        token_ids = tokenizer.convert_tokens_to_ids(tokens)

        # Generate mask where 1 for real tokens and 0 for padding tokens
        token_masks = [1] * len(token_ids)

        bert_token_ids.append(torch.LongTensor(token_ids))
        bert_token_masks.append(torch.LongTensor(token_masks))
        bert_token_segments.append(torch.LongTensor(token_segments))

    return bert_token_ids, bert_token_segments, bert_token_masks, labels


def parse_tsv(data_dir, task_name, split, max_data_samples=None):
    sentences = []
    labels = []

    tsv_path = os.path.join(data_dir, task_name, SPLIT_MAPPING[task_name][split])
    with codecs.open(tsv_path, "r", "utf-8") as f:
        # Skip header if needed
        if SKIPPING_HEADER_MAPPING[task_name]:
            f.readline()

        rows = list(enumerate(f))

        # Truncate to max_data_samples
        if max_data_samples:
            rows = rows[:max_data_samples]

        # Calculate the max number of column
        max_cloumns = len(rows[0][1].strip().split(DELIMITER))

        for idx, row in tqdm(rows):
            row = row.strip().split(DELIMITER)

            if len(row) > max_cloumns:
                logger.warning("Row has more columns than expected, skip...")
                continue

            sent1_idx, sent2_idx, label_idx = INDEX_MAPPING[task_name][split]

            if sent1_idx >= len(row) or sent2_idx >= len(row) or label_idx >= len(row):
                logger.warning("Data column doesn't match, skip...")
                continue

            sent1 = row[sent1_idx]
            sent2 = row[sent2_idx] if sent2_idx >= 0 else None

            if label_idx >= 0:
                if LABEL_MAPPING[task_name] is not None:
                    label = LABEL_MAPPING[task_name][row[label_idx]]
                else:
                    label = np.float32(row[label_idx])
            else:
                label = -1

            sentences.append([sent1] if sent2 is None else [sent1, sent2])
            labels.append(label)

    return sentences, labels


# Test purpose
if __name__ == "__main__":
    task_names = [
        "CoLA",
        "MNLI",
        # "MNLI_matched",
        # "MNLI_unmatched",
        "MRPC",
        "QNLI",
        "QQP",
        "RTE",
        "SNLI",
        "SST-2",
        "STS-B",
        "WNLI",
    ]

    splits = ["train", "dev", "test"]

    data_dir = "data"

    for task_name in task_names:
        for split in splits:
            print(task_name, split)
            print(
                preprocessor(
                    data_dir,
                    task_name,
                    split,
                    bert_model_name="bert-base-uncased",
                    max_data_samples=2,
                    max_sequence_length=20,
                )
            )

CoLA train


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([  101,  2028,  2062, 18404,  2236,  3989,  1998,  1045,  1005,  1049,
         3228,  2039,  1012,   102]), tensor([  101,  2028,  2062, 18404,  2236,  3989,  2030,  1045,  1005,  1049,
         3228,  2039,  1012,   102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([1, 1]))
CoLA dev


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([  101,  1996, 15871,  2081,  1996,  8164,  7683,  2058,  1996,  4139,
         3240,  1012,   102]), tensor([  101,  1996,  6228, 10658, 23277,  8004, 11533,  2993,  6065,  1012,
          102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([1, 1]))
CoLA test


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([  101,  3021, 26265,  2627,  1996,  2160,  1012,   102]), tensor([  101,  1996,  2482, 10189,  8126,  2049,  2126,  2091,  1996,  2346,
         1012,   102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])], [tensor([1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([-1, -1]))
MNLI train


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([  101, 17158,  2135,  6949,  8301, 25057,  2038,  2048,  3937,  9646,
          102,  4031,  1998, 10505,  2024,  2054,  2191,  6949,  8301,   102]), tensor([ 101, 2017, 2113, 2076, 1996, 2161, 1998, 1045, 3984, 2012,  102, 2017,
        4558, 1996, 2477, 2000, 1996, 2206, 2504,  102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([3, 1]))
MNLI dev


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([  101,  1996,  2047,  2916,  2024,  3835,  2438,   102,  3071,  2428,
         7777,  1996, 14751,  6666,   102]), tensor([ 101, 2023, 2609, 2950, 1037, 2862, 1997, 2035, 2400, 4791,  102, 1996,
        2231, 3237, 4790, 7431, 2006, 1996, 4037,  102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([3, 2]))
MNLI test


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([  101,  7632,  2121, 22083,  1010,  2019,  2015, 10819,  2080,  1010,
          102,  7632,  2121, 22083,  2003,  1037,  2171,  4276,  2559,   102]), tensor([  101,  1996,  6698,  1997,  1996, 14260,  3896,  2052, 12530,  1999,
          102,  2116,  2111,  2052,  2022,  2200, 12511,  2000,  6065,   102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([-1, -1]))
MRPC train


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([ 101, 2572, 3217, 5831, 5496, 2010, 2567, 1010, 3183, 2002,  102, 7727,
        2000, 2032, 2004, 2069, 1000, 1996, 7409,  102]), tensor([  101,  9805,  3540, 11514,  2050,  3079, 11282,  2243,  1005,  1055,
          102,  9805,  3540, 11514,  2050,  4149, 11282,  2243,  1005,   102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([1, 2]))
MRPC dev


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([  101,  2002,  2056,  1996,  9440,  2121,  7903,  2063, 11345,  2449,
          102,  1000,  1996,  9440,  2121,  7903,  2063, 11345,  2449,   102]), tensor([  101, 20201, 22948,  2056, 10958, 19053,  4140,  6283,  1996,  8956,
          102,  2010,  2564,  2056,  2002,  2001,  1000,  2531,  3867,   102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([1, 2]))
MRPC test


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([  101,  7473,  2278,  2860,  1005,  1055,  2708,  4082,  2961,  1010,
          102,  2783,  2708,  4082,  2961,  3505, 14998,  1998,  2177,   102]), tensor([  101,  1996,  2088,  1005,  1055,  2048,  2922,  8285, 12088,  2056,
          102,  4968,  4341,  2012,  2119, 13938,  1998,  2053,  1012,   102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([-1, -1]))
QNLI train


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([  101,  2054,  2003,  1996, 24665, 23052,  2012, 10289,  8214,  1029,
          102,  3202,  2369,  1996, 13546,  2003,  1996, 24665, 23052,   102]), tensor([  101,  2054,  2003,  1996, 24665, 23052,  2012, 10289,  8214,  1029,
          102,  2009,  2003,  1037, 15059,  1997,  1996, 24665, 23052,   102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([1, 2]))
QNLI dev


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([  101,  2029,  5088,  2136,  3421,  1996, 10511,  2012,  3565,  4605,
          102,  1996,  2137,  2374,  3034,  1006, 10511,  1007,  3410,   102]), tensor([  101,  2029,  5088,  2136,  3421,  1996, 10511,  2012,  3565,  4605,
          102,  3565,  4605,  2753,  2001,  2019,  2137,  2374,  2208,   102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([1, 2]))
QNLI test


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([  101,  2054, 15839,  2109,  2744,  1997,  1037,  3131,  1997,  2486,
          102,  2060,  8115,  7231,  3197,  1997,  2486,  2421,  1996,   102]), tensor([  101,  2054, 15839,  2109,  2744,  1997,  1037,  3131,  1997,  2486,
          102,  1996, 11382, 24915,  1011,  2486,  5260,  2000,  2019,   102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([-1, -1]))
QQP train


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([  101,  2129,  2003,  1996,  2166,  1997,  1037,  8785,  3076,  1029,
          102,  2029,  2504,  1997, 17463,  8156,  2003,  2438,  2005,   102]), tensor([ 101, 2129, 2079, 1045, 2491, 2026, 7109, 2100, 6699, 1029,  102, 2129,
        2079, 2017, 2491, 2115, 7109, 9961, 1029,  102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([2, 1]))
QQP dev


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([ 101, 2339, 2024, 3060, 1011, 4841, 2061, 3376, 1029,  102, 2339, 2024,
        6696, 2015, 2061, 3376, 1029,  102]), tensor([ 101, 1045, 2215, 2000, 7323, 8065, 1999, 3274, 2671, 2055,  102, 1045,
        5047, 2591, 2865, 2005, 1037, 2512, 1011,  102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([2, 2]))
QQP test


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([  101,  2052,  1996,  2801,  1997,  8398,  1998, 22072,  1999,  2793,
          102,  2079,  2017,  2228,  2008,  2065,  6221,  8398,  2020,   102]), tensor([ 101, 2054, 2024, 1996, 2327, 2702, 7325, 1011, 2000, 1011,  102, 2054,
        2024, 1996, 2327, 2702, 7325, 1011, 2000,  102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([-1, -1]))
RTE train


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([ 101, 2053, 4255, 1997, 3742, 6215, 2179, 1999, 5712, 2664,  102, 4255,
        1997, 3742, 6215, 2179, 1999, 5712, 1012,  102]), tensor([  101,  1037,  2173,  1997, 14038,  1010,  2044,  4831,  2198,  2703,
          102,  4831, 12122, 16855,  2003,  1996,  2047,  3003,  1997,   102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([2, 1]))
RTE dev


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([  101, 11271, 20726,  1010,  1996,  7794,  1997,  1996,  3364,  5696,
        20726,  1010,   102,  5696, 20726,  2018,  2019,  4926,  1012,   102]), tensor([  101,  2664,  1010,  2057,  2085,  2024, 13648,  2008, 24479,  2024,
          102, 10327,  2003,  3045,  1996,  2162,  2114, 24479,  1012,   102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([2, 1]))
RTE test


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([  101,  2158, 23296,  2050,  2001, 11908,  2044,  5506, 28600,  6590,
          102, 18454, 26086,  2003,  3141,  2000,  2158, 23296,  2050,   102]), tensor([  101,  4614,  1999,  4380,  2360,  2008,  2062,  2084,  3263,  2111,
          102,  4614,  1999,  4380,  2907,  3263,  2111,  2004, 13446,   102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([-1, -1]))
SNLI train


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([  101,  1037,  2711,  2006,  1037,  3586, 14523,  2058,  1037,  3714,
          102,  1037,  2711,  2003,  2731,  2010,  3586,  2005,  1037,   102]), tensor([  101,  1037,  2711,  2006,  1037,  3586, 14523,  2058,  1037,  3714,
          102,  1037,  2711,  2003,  2012,  1037, 15736,  1010, 13063,   102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([3, 2]))
SNLI dev


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([  101,  2048,  2308,  2024, 23581,  2096,  3173,  2000,  2175, 14555,
          102,  1996,  5208,  2024, 17662,  9119,  2096,  3173,  2000,   102]), tensor([  101,  2048,  2308,  2024, 23581,  2096,  3173,  2000,  2175, 14555,
         1012,   102,  2048,  2450,  2024,  3173, 14555,  1012,   102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([3, 1]))
SNLI test


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([  101,  2023,  2277,  6596, 10955,  2000,  1996, 11678,  2004,  2027,
          102,  1996,  2277,  2038, 15288,  1999,  1996,  5894,  1012,   102]), tensor([  101,  2023,  2277,  6596, 10955,  2000,  1996, 11678,  2004,  2027,
         6170,   102,  1996,  2277,  2003,  3561,  2007,  2299,  1012,   102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([3, 1]))
SST-2 train


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([  101,  5342,  2047,  3595,  8496,  2013,  1996, 18643,  3197,   102]), tensor([  101,  3397,  2053, 15966,  1010,  2069,  4450,  2098, 18201,  2015,
          102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([2, 2]))
SST-2 dev


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([  101,  2009,  1005,  1055,  1037, 11951,  1998,  2411, 12473,  4990,
         1012,   102]), tensor([  101,  4895, 10258,  2378,  8450,  2135, 21657,  1998,  7143,   102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([1, 2]))
SST-2 test


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([  101, 15491, 28616, 22444,  4095,  1997,  6782,  1998, 11541,  1012,
          102]), tensor([  101,  2023,  2143,  1005,  1055,  3276,  2000,  5025,  6980,  2003,
         1996,  2168,  2004,  2054,  4234,  1011,  3392, 19311,  2075,   102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([-1, -1]))
STS-B train


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([ 101, 1037, 4946, 2003, 2635, 2125, 1012,  102, 2019, 2250, 4946, 2003,
        2635, 2125, 1012,  102]), tensor([ 101, 1037, 2158, 2003, 2652, 1037, 2312, 8928, 1012,  102, 1037, 2158,
        2003, 2652, 1037, 8928, 1012,  102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([5.0000, 3.8000]))
STS-B dev


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([ 101, 1037, 2158, 2007, 1037, 2524, 6045, 2003, 5613, 1012,  102, 1037,
        2158, 4147, 1037, 2524, 6045, 2003, 5613,  102]), tensor([ 101, 1037, 2402, 2775, 2003, 5559, 1037, 3586, 1012,  102, 1037, 2775,
        2003, 5559, 1037, 3586, 1012,  102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([5.0000, 4.7500]))
STS-B test


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([  101,  1037,  2611,  2003, 20724,  2014,  2606,  1012,   102,  1037,
         2611,  2003, 12766,  2014,  2606,  1012,   102]), tensor([ 101, 1037, 2177, 1997, 2273, 2377, 4715, 2006, 1996, 3509,  102, 1037,
        2177, 1997, 3337, 2024, 2652, 4715, 2006,  102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([-1, -1]))
WNLI train


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([  101,  1045,  5881,  1037,  9231,  2083,  1037, 25659,  1012,  2043,
         1045,  2766,   102,  1996, 25659,  2018,  1037,  4920,  1012,   102]), tensor([ 101, 2198, 2481, 1005, 1056, 2156, 1996, 2754, 2007, 5006, 1999, 2392,
        1997,  102, 2198, 2003, 2061, 2460, 1012,  102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([1, 1]))
WNLI dev


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([  101,  1996, 12475,  2003, 18856,  8649,  5999,  2007,  2606,  1012,
         2009,   102,  1996,  2606,  2038,  2000,  2022, 12176,  1012,   102]), tensor([ 101, 4869, 6573, 2006, 6294, 1005, 1055, 2341, 2021, 2016, 2106, 2025,
        3437,  102, 6294, 2106, 2025, 3437, 1012,  102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([2, 1]))
WNLI test


HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


([tensor([  101, 21696,  2063,  1998, 21008,  2018,  2464,  1996,  4499,  8375,
          102,  5194,  2743,  2185,  2043, 21696,  2063,  1998, 21008,   102]), tensor([  101, 21696,  2063,  1998, 21008,  2018,  2464,  1996,  4499,  8375,
          102,  5194,  2743,  2185,  2043,  1996,  4499,  2234,  1999,   102])], [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1])], [tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])], tensor([-1, -1]))


In [48]:
from functools import partial

import torch.nn.functional as F
from emmental.scorer import Scorer
from emmental.task import EmmentalTask
from torch import nn
from torch.nn import MSELoss


def ce_loss(task_name, immediate_ouput_dict, Y, active):
    module_name = f"{task_name}_pred_head"
    return F.cross_entropy(
        immediate_ouput_dict[module_name][0][active], (Y.view(-1) - 1)[active]
    )


def mse_loss(task_name, immediate_ouput_dict, Y, active):
    mse = MSELoss()
    module_name = f"{task_name}_pred_head"
    return mse(
        immediate_ouput_dict[module_name][0][active].view(-1), Y[active].view(-1)
    )


def output(task_name, immediate_ouput_dict):
    module_name = f"{task_name}_pred_head"
    return immediate_ouput_dict[module_name][0]


def get_gule_task(task_names, bert_model_name):

    tasks = dict()

    bert_module = BertModule(bert_model_name)
    bert_output_dim = 768 if "base" in bert_model_name else 1024

    for task_name in task_names:
        task_cardinality = (
            len(LABEL_MAPPING[task_name].keys())
            if LABEL_MAPPING[task_name] is not None
            else 1
        )

        metrics = METRIC_MAPPING[task_name]

        if task_name == "STS-B":
            loss_fn = partial(mse_loss, task_name)
        else:
            loss_fn = partial(ce_loss, task_name)

        task = EmmentalTask(
            name=task_name,
            module_pool=nn.ModuleDict(
                {
                    "bert_module": bert_module,
                    f"{task_name}_pred_head": nn.Linear(
                        bert_output_dim, task_cardinality
                    ),
                }
            ),
            task_flow=[
                {
                    "name": "input",
                    "module": "bert_module",
                    "inputs": [
                        ("_input_", "token_ids"),
                        ("_input_", "token_segments"),
                        ("_input_", "token_masks"),
                    ],
                },
                {
                    "name": f"{task_name}_pred_head",
                    "module": f"{task_name}_pred_head",
                    "inputs": [("input", 1)],
                },
            ],
            loss_func=loss_fn,
            output_func=partial(output, task_name),
            scorer=Scorer(metrics=metrics),
        )

        tasks[task_name] = task

    return tasks

In [49]:
import logging

import torch.nn.functional as F
from torch import nn
from torch.nn import MSELoss

import emmental
from emmental import Meta
from emmental.data import EmmentalDataLoader, EmmentalDataset
from emmental.learner import EmmentalLearner
from emmental.model import EmmentalModel
from emmental.scorer import Scorer
from emmental.task import EmmentalTask

In [50]:
logger = logging.getLogger(__name__)

In [51]:
# TASK_NAMES = ["RTE", "STS-B"]
DATA_DIR = "data"
BERT_MODEL_NAME = "bert-base-uncased"
BATCH_SIZE = 16

In [52]:
emmental.init("logs")

[2020-11-07 10:02:45,188][INFO] emmental.meta:107 - Setting logging directory to: logs/2020_11_07/10_02_45/43af8e36
[2020-11-07 10:02:45,224][INFO] emmental.meta:58 - Loading Emmental default config from /usr/local/lib/python3.6/dist-packages/emmental/emmental-default-config.yaml.


In [53]:
datasets = {}

for task_name in GLUE_TASK_NAMES:
    for split in ["train", "dev", "test"]:
        bert_token_ids, bert_token_segments, bert_token_masks, labels = preprocessor(
            data_dir=DATA_DIR,
            task_name=task_name,
            split=split,
            bert_model_name=BERT_MODEL_NAME,
            max_data_samples=1000,
            max_sequence_length=128,
        )
        X_dict = {
            "token_ids": bert_token_ids,
            "token_segments": bert_token_segments,
            "token_masks": bert_token_masks,
        }
        Y_dict = {"labels": labels}

        if task_name not in datasets: datasets[task_name] = {}
        
        datasets[task_name][split] = EmmentalDataset(name="GLUE", X_dict=X_dict, Y_dict=Y_dict)

        logger.info(f"Loaded {split} for {task_name}.")

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




[2020-11-07 10:02:57,348][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:02:57,597][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:02:57,598][INFO] __main__:24 - Loaded train for CoLA.


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




[2020-11-07 10:02:58,021][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:02:58,176][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:02:58,176][INFO] __main__:24 - Loaded dev for CoLA.


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




[2020-11-07 10:02:58,611][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:02:58,825][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:02:58,826][INFO] __main__:24 - Loaded test for CoLA.


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




[2020-11-07 10:03:05,594][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:03:06,215][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:03:06,216][INFO] __main__:24 - Loaded train for MNLI.


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




[2020-11-07 10:03:06,833][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:03:07,497][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:03:07,498][INFO] __main__:24 - Loaded dev for MNLI.


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




[2020-11-07 10:03:08,110][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:03:09,020][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:03:09,022][INFO] __main__:24 - Loaded test for MNLI.


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




[2020-11-07 10:03:09,550][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:03:10,943][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:03:10,950][INFO] __main__:24 - Loaded train for MRPC.


HBox(children=(FloatProgress(value=0.0, max=408.0), HTML(value='')))




[2020-11-07 10:03:11,465][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:03:12,042][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:03:12,048][INFO] __main__:24 - Loaded dev for MRPC.


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




[2020-11-07 10:03:12,538][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:03:13,993][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:03:14,000][INFO] __main__:24 - Loaded test for MRPC.


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




[2020-11-07 10:03:15,748][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:03:16,549][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:03:16,550][INFO] __main__:24 - Loaded train for QNLI.


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




[2020-11-07 10:03:17,013][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:03:17,843][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:03:17,845][INFO] __main__:24 - Loaded dev for QNLI.


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




[2020-11-07 10:03:18,344][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:03:19,474][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:03:19,475][INFO] __main__:24 - Loaded test for QNLI.


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




[2020-11-07 10:03:22,778][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:03:23,287][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:03:23,289][INFO] __main__:24 - Loaded train for QQP.


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




[2020-11-07 10:03:24,046][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:03:24,539][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:03:24,540][INFO] __main__:24 - Loaded dev for QQP.


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




[2020-11-07 10:03:27,683][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:03:28,102][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:03:28,103][INFO] __main__:24 - Loaded test for QQP.


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




[2020-11-07 10:03:28,582][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:03:29,596][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:03:29,597][INFO] __main__:24 - Loaded train for RTE.


HBox(children=(FloatProgress(value=0.0, max=277.0), HTML(value='')))




[2020-11-07 10:03:30,033][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:03:30,389][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:03:30,390][INFO] __main__:24 - Loaded dev for RTE.


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




[2020-11-07 10:03:30,839][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:03:31,694][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:03:31,695][INFO] __main__:24 - Loaded test for RTE.


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




[2020-11-07 10:03:36,510][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:03:36,981][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:03:36,982][INFO] __main__:24 - Loaded train for SNLI.


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




[2020-11-07 10:03:37,695][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:03:38,190][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:03:38,191][INFO] __main__:24 - Loaded dev for SNLI.


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))






[2020-11-07 10:03:38,805][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:03:39,265][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:03:39,266][INFO] __main__:24 - Loaded test for SNLI.


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




[2020-11-07 10:03:40,028][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:03:40,268][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:03:40,269][INFO] __main__:24 - Loaded train for SST-2.


HBox(children=(FloatProgress(value=0.0, max=872.0), HTML(value='')))




[2020-11-07 10:03:40,693][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:03:41,082][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:03:41,082][INFO] __main__:24 - Loaded dev for SST-2.


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




[2020-11-07 10:03:41,508][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:03:41,955][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:03:41,956][INFO] __main__:24 - Loaded test for SST-2.


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




[2020-11-07 10:03:42,451][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:03:42,784][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:03:42,785][INFO] __main__:24 - Loaded train for STS-B.


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




[2020-11-07 10:03:43,218][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:03:43,719][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:03:43,720][INFO] __main__:24 - Loaded dev for STS-B.


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




[2020-11-07 10:03:44,174][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:03:44,613][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:03:44,614][INFO] __main__:24 - Loaded test for STS-B.


HBox(children=(FloatProgress(value=0.0, max=635.0), HTML(value='')))




[2020-11-07 10:03:45,041][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:03:45,365][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:03:45,367][INFO] __main__:24 - Loaded train for WNLI.


HBox(children=(FloatProgress(value=0.0, max=71.0), HTML(value='')))




[2020-11-07 10:03:45,828][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:03:45,908][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:03:45,909][INFO] __main__:24 - Loaded dev for WNLI.


HBox(children=(FloatProgress(value=0.0, max=146.0), HTML(value='')))




[2020-11-07 10:03:46,317][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
[2020-11-07 10:03:46,492][INFO] emmental.data:57 - Auto generate uids for dataset GLUE under _uids_.
[2020-11-07 10:03:46,493][INFO] __main__:24 - Loaded test for WNLI.


In [54]:
dataloaders = []

for task_name in GLUE_TASK_NAMES:
    for split in ["train", "dev", "test"]:
        dataloaders.append(
            EmmentalDataLoader(
                task_to_label_dict={task_name: "labels"},
                dataset=datasets[task_name][split],
                split=split,
                batch_size=BATCH_SIZE,
                shuffle=True if split == "train" else False,
            )
        )
        logger.info(f"Built dataloader for {task_name} {split} set.")

[2020-11-07 10:04:54,131][INFO] __main__:14 - Built dataloader for CoLA train set.
[2020-11-07 10:04:54,132][INFO] __main__:14 - Built dataloader for CoLA dev set.
[2020-11-07 10:04:54,133][INFO] __main__:14 - Built dataloader for CoLA test set.
[2020-11-07 10:04:54,133][INFO] __main__:14 - Built dataloader for MNLI train set.
[2020-11-07 10:04:54,136][INFO] __main__:14 - Built dataloader for MNLI dev set.
[2020-11-07 10:04:54,137][INFO] __main__:14 - Built dataloader for MNLI test set.
[2020-11-07 10:04:54,139][INFO] __main__:14 - Built dataloader for MRPC train set.
[2020-11-07 10:04:54,148][INFO] __main__:14 - Built dataloader for MRPC dev set.
[2020-11-07 10:04:54,150][INFO] __main__:14 - Built dataloader for MRPC test set.
[2020-11-07 10:04:54,152][INFO] __main__:14 - Built dataloader for QNLI train set.
[2020-11-07 10:04:54,153][INFO] __main__:14 - Built dataloader for QNLI dev set.
[2020-11-07 10:04:54,154][INFO] __main__:14 - Built dataloader for QNLI test set.
[2020-11-07 10:0

In [55]:
tasks = get_gule_task(GLUE_TASK_NAMES, BERT_MODEL_NAME)

[2020-11-07 10:05:07,192][INFO] pytorch_pretrained_bert.file_utils:224 - https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz not found in cache, downloading to /tmp/tmpuozz5nf0
100%|██████████| 407873900/407873900 [00:14<00:00, 27219286.12B/s]
[2020-11-07 10:05:22,769][INFO] pytorch_pretrained_bert.file_utils:237 - copying /tmp/tmpuozz5nf0 to cache at ./cache/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba
[2020-11-07 10:05:23,985][INFO] pytorch_pretrained_bert.file_utils:241 - creating metadata file for ./cache/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba
[2020-11-07 10:05:23,987][INFO] pytorch_pretrained_bert.file_utils:250 - removing temp file /tmp/tmpuozz5nf0
[2020-11-07 10:05:24,029][INFO] pytorch_pretrained_bert.modeling:580 - loading archive file https://s3.amazonaws.com/models.huggingface.

In [56]:
Meta.update_config(
    config={
        "meta_config": {"device": 1},
        "learner_config": {
            "n_epochs": 3,
            "valid_split": "dev",
            "optimizer_config": {"optimizer": "adam", "lr": 5e-5},
            "lr_scheduler_config": {
                "warmup_steps": 70,
                "warmup_unit": "batch",
                "lr_scheduler": "linear",
            },
        },
        "logging_config": {
            "evaluation_freq": 50,
            "checkpointing": None,
            #             "checkpointer_config": {
            #                 "checkpoint_metric": f"{TASK_NAME}/GLUE/train/accuracy",
            #                 "checkpoint_freq": 10,
            #             },
        },
    }
)


[2020-11-07 10:05:36,052][INFO] emmental.meta:157 - Updating Emmental config from user provided config.


In [60]:
mtl_model = EmmentalModel(name = 'GLUE_multi_task', tasks=tasks.values())

[2020-11-07 10:09:42,843][INFO] emmental.model:76 - No cuda device available. Switch to cpu instead.
[2020-11-07 10:09:42,845][INFO] emmental.model:76 - No cuda device available. Switch to cpu instead.
[2020-11-07 10:09:42,845][INFO] emmental.model:76 - No cuda device available. Switch to cpu instead.
[2020-11-07 10:09:42,846][INFO] emmental.model:76 - No cuda device available. Switch to cpu instead.
[2020-11-07 10:09:42,848][INFO] emmental.model:76 - No cuda device available. Switch to cpu instead.
[2020-11-07 10:09:42,849][INFO] emmental.model:76 - No cuda device available. Switch to cpu instead.
[2020-11-07 10:09:42,850][INFO] emmental.model:76 - No cuda device available. Switch to cpu instead.
[2020-11-07 10:09:42,851][INFO] emmental.model:76 - No cuda device available. Switch to cpu instead.
[2020-11-07 10:09:42,852][INFO] emmental.model:76 - No cuda device available. Switch to cpu instead.
[2020-11-07 10:09:42,853][INFO] emmental.model:76 - No cuda device available. Switch to cpu

In [58]:
emmental_learner = EmmentalLearner()

In [None]:
emmental_learner.learn(mtl_model, dataloaders)

[2020-11-07 10:09:46,227][INFO] emmental.logging.logging_manager:39 - Evaluating every 50 epoch.
[2020-11-07 10:09:46,228][INFO] emmental.logging.logging_manager:60 - No checkpointing.
[2020-11-07 10:09:46,238][INFO] emmental.learner:106 - Using optimizer Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 5e-05
    weight_decay: 0.0
)
[2020-11-07 10:09:46,241][INFO] emmental.learner:226 - Warmup 70 batchs.
[2020-11-07 10:09:46,350][INFO] emmental.learner:193 - Using lr_scheduler <torch.optim.lr_scheduler.LambdaLR object at 0x7f8e5d2844a8> with step every 1 batch.
[2020-11-07 10:09:46,352][INFO] emmental.learner:528 - Start learning...


HBox(children=(FloatProgress(value=0.0, description='Epoch 0:', max=607.0, style=ProgressStyle(description_wid…