# Workshop 8: Sentiment Analysis with BERT
Perform sentiment analysis using pretrained BERT and Hugging Face Transformers.

## Setup
Install required libraries.

In [1]:

!pip install transformers torch --quiet


## Load Model and Tokenizer

In [2]:

from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

model_name = "textattack/bert-base-uncased-imdb"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
model.eval()


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/511 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

## Tokenize Sample Texts

In [3]:

texts = [
    "I love this movie! It was fantastic and thrilling.",
    "The movie was okay, not bad but not great.",
    "I hated the movie, it was a waste of time."
]

inputs = tokenizer(texts, return_tensors='pt', padding=True, truncation=True)
inputs


{'input_ids': tensor([[  101,  1045,  2293,  2023,  3185,   999,  2009,  2001, 10392,  1998,
         26162,  1012,   102,     0],
        [  101,  1996,  3185,  2001,  3100,  1010,  2025,  2919,  2021,  2025,
          2307,  1012,   102,     0],
        [  101,  1045,  6283,  1996,  3185,  1010,  2009,  2001,  1037,  5949,
          1997,  2051,  1012,   102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

## Predict Sentiments

In [4]:

with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits

probabilities = torch.softmax(logits, dim=1)
predicted_classes = torch.argmax(probabilities, dim=1)

for text, prob, pred in zip(texts, probabilities, predicted_classes):
    sentiment = "Positive" if pred == 1 else "Negative"
    print(f"Text: {text}\nSentiment: {sentiment} (Confidence: {prob[pred]:.4f})\n")


Text: I love this movie! It was fantastic and thrilling.
Sentiment: Positive (Confidence: 0.9844)

Text: The movie was okay, not bad but not great.
Sentiment: Negative (Confidence: 0.5018)

Text: I hated the movie, it was a waste of time.
Sentiment: Negative (Confidence: 0.9989)

