In [14]:
import numpy as np
import torch
from transformers import AutoModel
from transformers import AutoTokenizer

torch.set_printoptions(edgeitems=2, precision=2, linewidth=100)

In [9]:
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [10]:
raw_inputs = [
    "I've been waiting for a HuggingFace course my whole life.",
    "I hate this so much!",
]
inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors="pt")
inputs

{'input_ids': tensor([[  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,  2607,  2026,  2878,
          2166,  1012,   102],
        [  101,  1045,  5223,  2023,  2061,  2172,   999,   102,     0,     0,     0,     0,     0,
             0,     0,     0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])}

In [12]:
inputs['input_ids'].shape, inputs['input_ids'].dtype

(torch.Size([2, 16]), torch.int64)

In [13]:
inputs['attention_mask'].shape, inputs['attention_mask'].dtype

(torch.Size([2, 16]), torch.int64)

###from transformers import AutoModel

We can download our pretrained model the same way we did with our tokenizer. 🤗 Transformers provides an AutoModel class which also has a from_pretrained() method:

In [16]:
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModel.from_pretrained(checkpoint)

Some weights of the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english were not used when initializing DistilBertModel: ['pre_classifier.weight', 'classifier.weight', 'pre_classifier.bias', 'classifier.bias']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [18]:
outputs = model(**inputs)
outputs, outputs.last_hidden_state.shape

(BaseModelOutput(last_hidden_state=tensor([[[-0.18,  0.23,  ...,  0.50,  0.15],
          [ 0.28,  0.65,  ...,  0.51,  0.13],
          ...,
          [ 0.75,  0.05,  ...,  0.00, -0.61],
          [ 0.05,  0.37,  ...,  0.65, -0.39]],
 
         [[-0.29,  0.73,  ..., -1.02, -0.04],
          [-0.22,  0.94,  ..., -0.66,  0.24],
          ...,
          [-0.33,  0.97,  ..., -0.82, -0.06],
          [-0.35,  0.88,  ..., -0.83, -0.11]]], grad_fn=<NativeLayerNormBackward0>), hidden_states=None, attentions=None),
 torch.Size([2, 16, 768]))