# RoBERTa

This is adapted from [PyTorch Hub](https://pytorch.org/hub/pytorch_fairseq_roberta/).

### Requirements

In [1]:
pip install regex requests hydra-core omegaconf bitarray scikit-learn sacrebleu

Collecting hydra-core
  Downloading hydra_core-1.3.2-py3-none-any.whl (154 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.5/154.5 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting omegaconf
  Downloading omegaconf-2.3.0-py3-none-any.whl (79 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.5/79.5 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitarray
  Obtaining dependency information for bitarray from https://files.pythonhosted.org/packages/03/68/242187758a50bd6602ac4b245252f9a90cc2c43e1049e7d12d47ef62cd60/bitarray-2.8.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata
  Downloading bitarray-2.8.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (32 kB)
Collecting sacrebleu
  Downloading sacrebleu-2.3.1-py3-none-any.whl (118 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m118.9/118.9 kB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
Collecting antlr4-python3-runt

#### Load RoBERTa

In [2]:
import torch
roberta = torch.hub.load('pytorch/fairseq', 'roberta.large')
roberta.eval()  # disable dropout (or leave in train mode to finetune)

Downloading: "https://github.com/pytorch/fairseq/zipball/main" to /home/jovyan/.cache/torch/hub/main.zip
2023-08-19 07:51:09 | INFO | fairseq.tasks.text_to_speech | Please install tensorboardX: pip install tensorboardX


Unable to build Cython components. Please make sure Cython is installed if the torch.hub model you are loading depends on it.


2023-08-19 07:51:11 | INFO | fairseq.file_utils | http://dl.fbaipublicfiles.com/fairseq/models/roberta.large.tar.gz not found in cache, downloading to /tmp/tmp6o63dlrn
100%|██████████| 655283069/655283069 [00:16<00:00, 40675494.72B/s]
2023-08-19 07:51:28 | INFO | fairseq.file_utils | copying /tmp/tmp6o63dlrn to cache at /home/jovyan/.cache/torch/pytorch_fairseq/83e3a689e28e5e4696ecb0bbb05a77355444a5c8a3437e0f736d8a564e80035e.c687083d14776c1979f3f71654febb42f2bb3d9a94ff7ebdfe1ac6748dba89d2
2023-08-19 07:51:29 | INFO | fairseq.file_utils | creating metadata file for /home/jovyan/.cache/torch/pytorch_fairseq/83e3a689e28e5e4696ecb0bbb05a77355444a5c8a3437e0f736d8a564e80035e.c687083d14776c1979f3f71654febb42f2bb3d9a94ff7ebdfe1ac6748dba89d2
2023-08-19 07:51:29 | INFO | fairseq.file_utils | removing temp file /tmp/tmp6o63dlrn
2023-08-19 07:51:29 | INFO | fairseq.file_utils | loading archive file http://dl.fbaipublicfiles.com/fairseq/models/roberta.large.tar.gz from cache at /home/jovyan/.cache/

RobertaHubInterface(
  (model): RobertaModel(
    (encoder): RobertaEncoder(
      (sentence_encoder): TransformerEncoder(
        (dropout_module): FairseqDropout()
        (embed_tokens): Embedding(50265, 1024, padding_idx=1)
        (embed_positions): LearnedPositionalEmbedding(514, 1024, padding_idx=1)
        (layernorm_embedding): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (layers): ModuleList(
          (0): TransformerEncoderLayerBase(
            (self_attn): MultiheadAttention(
              (dropout_module): FairseqDropout()
              (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
            )
            (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
            (dr

#### Apply Byte-Pair Encoding (BPE) to input text

In [3]:
tokens = roberta.encode('Hello world!')
assert tokens.tolist() == [0, 31414, 232, 328, 2]
assert roberta.decode(tokens) == 'Hello world!'

#### Extract features from RoBERTa

In [4]:
# Extract the last layer's features
last_layer_features = roberta.extract_features(tokens)
assert last_layer_features.size() == torch.Size([1, 5, 1024])

# Extract all layer's features (layer 0 is the embedding layer)
all_layers = roberta.extract_features(tokens, return_all_hiddens=True)
assert len(all_layers) == 25
assert torch.all(all_layers[-1] == last_layer_features)

#### Use RoBERTa for sentence-pair classification tasks

In [5]:
# Download RoBERTa already finetuned for MNLI
roberta = torch.hub.load('pytorch/fairseq', 'roberta.large.mnli')
roberta.eval()  # disable dropout for evaluation

with torch.no_grad():
    # Encode a pair of sentences and make a prediction
    tokens = roberta.encode('Roberta is a heavily optimized version of BERT.', 'Roberta is not very optimized.')
    prediction = roberta.predict('mnli', tokens).argmax().item()
    assert prediction == 0  # contradiction

    # Encode another pair of sentences
    tokens = roberta.encode('Roberta is a heavily optimized version of BERT.', 'Roberta is based on BERT.')
    prediction = roberta.predict('mnli', tokens).argmax().item()
    assert prediction == 2  # entailment

Using cache found in /home/jovyan/.cache/torch/hub/pytorch_fairseq_main


Unable to build Cython components. Please make sure Cython is installed if the torch.hub model you are loading depends on it.


2023-08-19 07:52:50 | INFO | fairseq.file_utils | http://dl.fbaipublicfiles.com/fairseq/models/roberta.large.mnli.tar.gz not found in cache, downloading to /tmp/tmp1r795b8z
100%|██████████| 751652118/751652118 [00:16<00:00, 45244053.97B/s]
2023-08-19 07:53:06 | INFO | fairseq.file_utils | copying /tmp/tmp1r795b8z to cache at /home/jovyan/.cache/torch/pytorch_fairseq/7685ba8546f9a5ce1a00c7a6d7d44f7e748d22681172f0f391c3d48f487c801c.74e37d47306b3cc51c5f8d335022a392c29f1906c8cd9e9cd3446d7422cf55d8
2023-08-19 07:53:09 | INFO | fairseq.file_utils | creating metadata file for /home/jovyan/.cache/torch/pytorch_fairseq/7685ba8546f9a5ce1a00c7a6d7d44f7e748d22681172f0f391c3d48f487c801c.74e37d47306b3cc51c5f8d335022a392c29f1906c8cd9e9cd3446d7422cf55d8
2023-08-19 07:53:09 | INFO | fairseq.file_utils | removing temp file /tmp/tmp1r795b8z
2023-08-19 07:53:09 | INFO | fairseq.file_utils | loading archive file http://dl.fbaipublicfiles.com/fairseq/models/roberta.large.mnli.tar.gz from cache at /home/jovy

#### Register a new (randomly initialized) classification head

In [6]:
roberta.register_classification_head('new_task', num_classes=3)
logprobs = roberta.predict('new_task', tokens)  # tensor([[-1.1050, -1.0672, -1.1245]], grad_fn=<LogSoftmaxBackward>)

In [7]:
logprobs

tensor([[-0.9561, -1.1261, -1.2334]], grad_fn=<LogSoftmaxBackward0>)