In [1]:
# 1) Install / import
!pip install transformers        # run once in Colab or your venv
from transformers import pipeline, DistilBertTokenizer

# 2) ‚Äî‚Äî Example A: use the ü§ó pipeline for sentiment analysis ‚Äî‚Äî
# Here we explicitly pick a model fine‚Äêtuned on SST-2 (binary sentiment).
sentiment_pipe = pipeline(
    task="sentiment-analysis",
    model="distilbert-base-uncased-finetuned-sst-2-english",
)

# Pass a list of texts and get back labels + scores
samples = [
    "This movie was absolutely fantastic!",
    "I really hated how it dragged on and on‚Ä¶",
]
preds = sentiment_pipe(samples)
print("pipeline output:\n", preds)



Device set to use mps:0


pipeline output:
 [{'label': 'POSITIVE', 'score': 0.999874472618103}, {'label': 'NEGATIVE', 'score': 0.9996241331100464}]


In [2]:
# 3) ‚Äî‚Äî Example B: raw DistilBERT tokenization + introspection ‚Äî‚Äî

# load the tokenizer
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")

sentence = "Tokenize this sentence using DistilBERT."
tokens = tokenizer(sentence, return_tensors="pt")

# pull out IDs & attention mask
input_ids     = tokens["input_ids"].squeeze().tolist()
attention_mask = tokens["attention_mask"].squeeze().tolist()

print(f"\nOriginal sentence:\n{sentence}\n")
print(f"Input IDs:\n{input_ids}\n")
print(f"Attention mask:\n{attention_mask}\n")

# map each ID back to its token
print("Decoded tokens:")
for idx in input_ids:
    print(f"{idx:6d} ‚Üí {tokenizer.decode([idx])}")


Original sentence:
Tokenize this sentence using DistilBERT.

Input IDs:
[101, 19204, 4697, 2023, 6251, 2478, 4487, 16643, 23373, 1012, 102]

Attention mask:
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

Decoded tokens:
   101 ‚Üí [CLS]
 19204 ‚Üí token
  4697 ‚Üí ##ize
  2023 ‚Üí this
  6251 ‚Üí sentence
  2478 ‚Üí using
  4487 ‚Üí di
 16643 ‚Üí ##sti
 23373 ‚Üí ##lbert
  1012 ‚Üí .
   102 ‚Üí [SEP]
