In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from metal.mmtl.trainer import MultitaskTrainer
from metal.mmtl.glue.glue_tasks import create_glue_tasks_payloads
from metal.mmtl.metal_model import MetalModel

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


In [3]:
SEED = 1

### Initialize normal payloads

In [4]:
task_kwargs = {
    "dl_kwargs": {"batch_size": 8},
    "freeze_bert":False,
    "bert_model": 'bert-base-cased',
    "max_len": 200   
}
task_names = ["RTE"]

In [5]:
%%time

# Create tasks and payloads
tasks, payloads = create_glue_tasks_payloads(task_names, **task_kwargs)

Using random seed: 662957
Using soft attention head
Loading RTE Dataset


HBox(children=(IntProgress(value=0, max=2490), HTML(value='')))




HBox(children=(IntProgress(value=0, max=277), HTML(value='')))




HBox(children=(IntProgress(value=0, max=3000), HTML(value='')))


CPU times: user 19.1 s, sys: 2.34 s, total: 21.4 s
Wall time: 27.8 s


In [6]:
tasks, payloads

([ClassificationTask(name=RTE, loss_multiplier=1.0)],
 [Payload(RTE_train: labels_to_tasks=[{'RTE_gold': 'RTE'}], split=train),
  Payload(RTE_valid: labels_to_tasks=[{'RTE_gold': 'RTE'}], split=valid),
  Payload(RTE_test: labels_to_tasks=[{'RTE_gold': 'RTE'}], split=test)])

### Initialize slice payloads

In [7]:
# Create tasks and payloads
task_kwargs.update({"slice_dict": {
    "RTE": ["dash_semicolon", "more_people"]}
})
task_kwargs['attention'] = None

tasks_slice, payloads_slice = create_glue_tasks_payloads(
    task_names, **task_kwargs
)

Using random seed: 734007
Loading RTE Dataset


HBox(children=(IntProgress(value=0, max=2490), HTML(value='')))




HBox(children=(IntProgress(value=0, max=277), HTML(value='')))




HBox(children=(IntProgress(value=0, max=3000), HTML(value='')))


Added label_set with 1003/2490 labels for task RTE_slice:dash_semicolon to payload RTE_train.
Added label_set with 64/2490 labels for task RTE_slice:more_people to payload RTE_train.
Added label_set with 116/277 labels for task RTE_slice:dash_semicolon to payload RTE_valid.
Added label_set with 12/277 labels for task RTE_slice:more_people to payload RTE_valid.
Added label_set with 1103/3000 labels for task RTE_slice:dash_semicolon to payload RTE_test.
Added label_set with 67/3000 labels for task RTE_slice:more_people to payload RTE_test.


In [8]:
tasks_slice, payloads_slice

([ClassificationTask(name=RTE, loss_multiplier=1.0),
  ClassificationTask(name=RTE_slice:dash_semicolon, loss_multiplier=1.0),
  ClassificationTask(name=RTE_slice:more_people, loss_multiplier=1.0),
  ClassificationTask(name=RTE_slice:dash_semicolon, loss_multiplier=1.0),
  ClassificationTask(name=RTE_slice:more_people, loss_multiplier=1.0),
  ClassificationTask(name=RTE_slice:dash_semicolon, loss_multiplier=1.0),
  ClassificationTask(name=RTE_slice:more_people, loss_multiplier=1.0)],
 [Payload(RTE_train: labels_to_tasks=[{'RTE_gold': 'RTE', 'RTE_slice:dash_semicolon': 'RTE_slice:dash_semicolon', 'RTE_slice:more_people': 'RTE_slice:more_people'}], split=train),
  Payload(RTE_valid: labels_to_tasks=[{'RTE_gold': 'RTE', 'RTE_slice:dash_semicolon': 'RTE_slice:dash_semicolon', 'RTE_slice:more_people': 'RTE_slice:more_people'}], split=valid),
  Payload(RTE_test: labels_to_tasks=[{'RTE_gold': 'RTE', 'RTE_slice:dash_semicolon': 'RTE_slice:dash_semicolon', 'RTE_slice:more_people': 'RTE_slice:mo

### Initialize and train baseline model 

In [9]:
model = MetalModel(tasks, seed=SEED, verbose=False)

In [10]:
# trainer = MultitaskTrainer(seed=SEED)
# trainer.train_model(
#     model,
#     payloads,
#     checkpoint_metric="RTE/RTE_valid/accuracy",
#     checkpoint_metric_mode="max",
#     checkoint_best=True,
#     writer="tensorboard",
#     optimizer="adamax",
#     lr=5e-5,
#     l2=1e-3,
#     log_every=0.1, 
#     score_every=0.1,
#     n_epochs=3,
#     progress_bar=True,
#     checkpoint_tasks=True,
#     checkpoint_cleanup=False
# )

### Evaluate baseline slices

In [11]:
import copy
eval_payload = copy.deepcopy(payloads_slice[1])

# NOTE: we need to retarget slices to the original RTE head
for label_name in ['RTE_slice:dash_semicolon', 'RTE_slice:more_people']:
    eval_payload.retarget_labelset(label_name, 'RTE')

label_set RTE_slice:dash_semicolon now points to task RTE (originally, RTE_slice:dash_semicolon).
label_set RTE_slice:more_people now points to task RTE (originally, RTE_slice:more_people).


In [12]:
model.score(eval_payload)

{'RTE/RTE_valid/RTE_gold/accuracy': 0.4729241877256318,
 'RTE/RTE_valid/RTE_slice:dash_semicolon/accuracy': 0.4827586206896552,
 'RTE/RTE_valid/RTE_slice:more_people/accuracy': 0.5833333333333334}

### Initialize and train slice model

In [13]:
model = MetalModel(tasks_slice, seed=SEED, verbose=False)

In [14]:
# trainer.train_model(
#     model,
#     payloads_slice, # NOTE: training on payloads
#     checkpoint_metric="RTE/RTE_valid/accuracy",
#     checkpoint_metric_mode="max",
#     checkoint_best=True,
#     writer="tensorboard",
#     optimizer="adamax",
#     lr=5e-5,
#     l2=1e-3,
#     log_every=0.1, 
#     score_every=0.1,
#     n_epochs=3,
#     progress_bar=True,
#     checkpoint_tasks=True,
#     checkpoint_cleanup=False
# )

#### Did we improve?

In [15]:
from metal.mmtl.metal_model import MetalModel
model.score(payloads_slice[1])

{'RTE/RTE_valid/RTE_gold/accuracy': 0.5306859205776173,
 'RTE_slice:dash_semicolon/RTE_valid/RTE_slice:dash_semicolon/accuracy': 0.5258620689655172,
 'RTE_slice:more_people/RTE_valid/RTE_slice:more_people/accuracy': 0.4166666666666667}