In [1]:
import torch, gc
gc.collect()
torch.cuda.empty_cache()

!pip install -U adapter-transformers
!pip install datasets

Collecting adapter-transformers
  Downloading adapter_transformers-2.2.0-py3-none-any.whl (2.9 MB)
[K     |████████████████████████████████| 2.9 MB 8.4 MB/s 
Collecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 37.3 MB/s 
[?25hCollecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 35.5 MB/s 
[?25hCollecting sacremoses
  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 41.2 MB/s 
Collecting huggingface-hub>=0.0.17
  Downloading huggingface_hub-0.2.1-py3-none-any.whl (61 kB)
[K     |████████████████████████████████| 61 kB 530 kB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, ada

In [2]:
from datasets import load_dataset, DatasetDict

dataset = load_dataset("yxchar/sciie-tlm")
dataset.num_rows

Using custom data configuration yxchar___sciie-tlm-a32f1f2c4e9b5c0d


Downloading and preparing dataset csv/yxchar___sciie-tlm to /root/.cache/huggingface/datasets/csv/yxchar___sciie-tlm-a32f1f2c4e9b5c0d/0.0.0/bf68a4c4aefa545d0712b2fcbb1b327f905bbe2f6425fbc5e8c25234acb9e14a...


  0%|          | 0/3 [00:00<?, ?it/s]

Downloading:   0%|          | 0.00/663k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/199k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/94.8k [00:00<?, ?B/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/csv/yxchar___sciie-tlm-a32f1f2c4e9b5c0d/0.0.0/bf68a4c4aefa545d0712b2fcbb1b327f905bbe2f6425fbc5e8c25234acb9e14a. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

{'test': 974, 'train': 3219, 'validation': 455}

In [3]:
from transformers import RobertaTokenizer

tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

def encode_batch(batch):
  """Encodes a batch of input data using the model tokenizer."""
  return tokenizer(batch["text"], max_length=512, truncation=True, padding="max_length")

# Encode the input data
dataset = dataset.map(encode_batch, batched=True)
# The transformers model expects the target class column to be named "labels"
dataset.rename_column_("label", "labels")
# Transform to pytorch tensors and only output the required columns
dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.29M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/481 [00:00<?, ?B/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  if sys.path[0] == '':


In [4]:
from transformers import RobertaConfig, RobertaModelWithHeads

config = RobertaConfig.from_pretrained(
    "roberta-base",
    num_labels=7,
)
model = RobertaModelWithHeads.from_pretrained(
    "roberta-base",
    config=config,
)

Downloading:   0%|          | 0.00/478M [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModelWithHeads: ['lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModelWithHeads were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.embeddings.position_ids']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and infere

In [5]:
# Add a new adapter
from transformers import HoulsbyConfig
config_ad = HoulsbyConfig(reduction_factor=3)

model.add_adapter(
    "sciie",
    config = config_ad
)
# Add a matching classification head
model.add_classification_head(
    "sciie",
    num_labels=7,
  )

# Activate the adapter
model.train_adapter("sciie")

In [6]:
import numpy as np
from transformers import TrainingArguments, AdapterTrainer, EvalPrediction

training_args = TrainingArguments(
    learning_rate=1e-4,
    num_train_epochs=15,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    logging_steps=200,
    output_dir="./training_output",
    overwrite_output_dir=True,
    # The next line is important to ensure the dataset labels are properly passed to the model
    remove_unused_columns=False,
)

def compute_accuracy(p: EvalPrediction):
  preds = np.argmax(p.predictions, axis=1)
  return {"acc": (preds == p.label_ids).mean()}

trainer = AdapterTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["validation"],
    compute_metrics=compute_accuracy,
)

In [7]:
trainer.train()

***** Running training *****
  Num examples = 3219
  Num Epochs = 15
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 3030


Step,Training Loss
200,1.4315
400,0.8273
600,0.5003
800,0.3504
1000,0.2665
1200,0.194
1400,0.1734
1600,0.1318
1800,0.0834
2000,0.0882


Saving model checkpoint to ./training_output/checkpoint-500
Configuration saved in ./training_output/checkpoint-500/sciie/adapter_config.json
Module weights saved in ./training_output/checkpoint-500/sciie/pytorch_adapter.bin
Configuration saved in ./training_output/checkpoint-500/sciie/head_config.json
Module weights saved in ./training_output/checkpoint-500/sciie/pytorch_model_head.bin
Configuration saved in ./training_output/checkpoint-500/sciie/head_config.json
Module weights saved in ./training_output/checkpoint-500/sciie/pytorch_model_head.bin
Configuration saved in ./training_output/checkpoint-500/sciie/head_config.json
Module weights saved in ./training_output/checkpoint-500/sciie/pytorch_model_head.bin
Saving model checkpoint to ./training_output/checkpoint-1000
Configuration saved in ./training_output/checkpoint-1000/sciie/adapter_config.json
Module weights saved in ./training_output/checkpoint-1000/sciie/pytorch_adapter.bin
Configuration saved in ./training_output/checkpoint-

TrainOutput(global_step=3030, training_loss=0.2810830300987357, metrics={'train_runtime': 2198.6683, 'train_samples_per_second': 21.961, 'train_steps_per_second': 1.378, 'total_flos': 1.419596829448704e+16, 'train_loss': 0.2810830300987357, 'epoch': 15.0})

In [8]:
trainer.evaluate()

***** Running Evaluation *****
  Num examples = 455
  Batch size = 16


{'epoch': 15.0,
 'eval_acc': 0.9010989010989011,
 'eval_loss': 0.559148907661438,
 'eval_runtime': 9.5522,
 'eval_samples_per_second': 47.633,
 'eval_steps_per_second': 3.036}

In [9]:
from transformers import TextClassificationPipeline

classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer, device=training_args.device.index)

classifier("This is awesome!")

[{'label': 'LABEL_3', 'score': 0.7297330498695374}]

In [10]:
model.save_adapter("./final_adapter", "sciie")

!ls -lh final_adapter

Configuration saved in ./final_adapter/adapter_config.json
Module weights saved in ./final_adapter/pytorch_adapter.bin
Configuration saved in ./final_adapter/head_config.json
Module weights saved in ./final_adapter/pytorch_model_head.bin


total 39M
-rw-r--r-- 1 root root  571 Dec 16 01:40 adapter_config.json
-rw-r--r-- 1 root root  477 Dec 16 01:40 head_config.json
-rw-r--r-- 1 root root  37M Dec 16 01:40 pytorch_adapter.bin
-rw-r--r-- 1 root root 2.3M Dec 16 01:40 pytorch_model_head.bin


In [11]:
from google.colab import files
!zip -r /content/file.zip /content
files.download("/content/file.zip")

  adding: content/ (stored 0%)
  adding: content/.config/ (stored 0%)
  adding: content/.config/gce (stored 0%)
  adding: content/.config/active_config (stored 0%)
  adding: content/.config/configurations/ (stored 0%)
  adding: content/.config/configurations/config_default (deflated 15%)
  adding: content/.config/config_sentinel (stored 0%)
  adding: content/.config/.last_survey_prompt.yaml (stored 0%)
  adding: content/.config/.last_update_check.json (deflated 24%)
  adding: content/.config/logs/ (stored 0%)
  adding: content/.config/logs/2021.12.03/ (stored 0%)
  adding: content/.config/logs/2021.12.03/14.32.30.027140.log (deflated 91%)
  adding: content/.config/logs/2021.12.03/14.33.37.701606.log (deflated 53%)
  adding: content/.config/logs/2021.12.03/14.33.36.903459.log (deflated 54%)
  adding: content/.config/logs/2021.12.03/14.33.16.964195.log (deflated 54%)
  adding: content/.config/logs/2021.12.03/14.33.09.955489.log (deflated 86%)
  adding: content/.config/logs/2021.12.03/14.

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [12]:
pred, label, metrics = trainer.predict(dataset["test"])
pred_label = np.argmax(pred, axis = 1)

from sklearn.metrics import classification_report 
print(classification_report(label, pred_label))

***** Running Prediction *****
  Num examples = 974
  Batch size = 16


              precision    recall  f1-score   support

           0       0.91      0.97      0.94       123
           1       0.61      0.59      0.60        59
           2       0.90      0.90      0.90        67
           3       0.94      0.92      0.93       533
           4       0.63      0.62      0.62        63
           5       0.84      0.84      0.84        38
           6       0.82      0.88      0.85        91

    accuracy                           0.88       974
   macro avg       0.81      0.82      0.81       974
weighted avg       0.88      0.88      0.88       974

