In [None]:
# Import required libraries
import torch
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
from datasets import load_dataset


[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


# Sentiment Analysis with DistilBERT Fine-tuning

This notebook demonstrates how to fine-tune a DistilBERT model for sentiment analysis using the IMDB dataset. We'll go through the following steps:

1. Setting up the required packages and environment
2. Loading and preprocessing the IMDB dataset
3. Initializing the DistilBERT model and tokenizer
4. Training the model
5. Evaluating the model's performance
6. Testing the model with custom examples

In [None]:
# Set device (GPU if available, else CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load IMDB dataset
dataset = load_dataset("imdb")
print("Dataset loaded successfully.")


[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
# Initialize tokenizer and model
model_name = "distilbert-base-uncased"
tokenizer = DistilBertTokenizer.from_pretrained(model_name)
model = DistilBertForSequenceClassification.from_pretrained(model_name, num_labels=2)
model = model.to(device)

print("Model and tokenizer initialized successfully.")


[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
print("Dataset tokenized successfully.")

# Convert to PyTorch format
tokenized_datasets = tokenized_datasets.remove_columns(["text"])
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets.set_format("torch")

In [None]:
# Create data loaders
from torch.utils.data import DataLoader

batch_size = 16

train_dataloader = DataLoader(
    tokenized_datasets["train"], 
    shuffle=True, 
    batch_size=batch_size
)

eval_dataloader = DataLoader(
    tokenized_datasets["test"], 
    batch_size=batch_size
)

print(f"Created data loaders with batch size: {batch_size}")

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 8
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 2
    })
})

In [None]:
# Training setup
from torch.optim import AdamW
from tqdm.notebook import tqdm

optimizer = AdamW(model.parameters(), lr=5e-5)
num_epochs = 3

print("Starting training...")
print(f"Number of epochs: {num_epochs}")
print(f"Learning rate: 5e-5")

Map:   0%|          | 0/8 [00:00<?, ? examples/s]

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Training loop
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    progress_bar = tqdm(train_dataloader, desc=f"Epoch {epoch + 1}")
    
    for batch in progress_bar:
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        loss.backward()
        
        optimizer.step()
        optimizer.zero_grad()
        
        total_loss += loss.item()
        progress_bar.set_postfix({"loss": total_loss / len(train_dataloader)})

In [None]:
# Evaluation
model.eval()
total_correct = 0
total_samples = 0

print("\nEvaluating the model...")

with torch.no_grad():
    for batch in tqdm(eval_dataloader):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        predictions = torch.argmax(outputs.logits, dim=-1)
        total_correct += (predictions == batch["labels"]).sum().item()
        total_samples += len(batch["labels"])

accuracy = total_correct / total_samples
print(f"\nTest Accuracy: {accuracy:.4f}")

In [32]:
!pip install "accelerate>=0.26.0" "transformers[torch]" --quiet


[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [22]:
!pip install "transformers[torch]" --quiet



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [19]:
!pip install "accelerate>=0.26.0" --quiet



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [16]:
# Debug: show TrainingArguments signature
import inspect
print(inspect.signature(TrainingArguments))
print(TrainingArguments.__doc__[:1000])



    TrainingArguments is the subset of the arguments we use in our example scripts **which relate to the training loop
    itself**.

    Using [`HfArgumentParser`] we can turn this class into
    [argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the
    command line.

    Parameters:
        output_dir (`str`, *optional*, defaults to `"trainer_output"`):
            The output directory where the model predictions and checkpoints will be written.
        overwrite_output_dir (`bool`, *optional*, defaults to `False`):
            If `True`, overwrite the content of the output directory. Use this to continue training if `output_dir`
            points to a checkpoint directory.
        do_train (`bool`, *optional*, defaults to `False`):
            Whether to run training or not. This argument is not directly used by [`Trainer`], it's intended to be used
            by your training/evaluation scripts instead. See the [example
   

In [17]:
import inspect
params = inspect.signature(TrainingArguments).parameters
print('evaluation_strategy' in params)
print('logging_steps' in params)


False
True


In [40]:
# Test with custom examples
def predict_sentiment(text):
    # Tokenize input
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
    
    # Get prediction
    with torch.no_grad():
        outputs = model(**inputs)
        predictions = torch.softmax(outputs.logits, dim=-1)
        prediction = torch.argmax(predictions, dim=-1)
        confidence = predictions[0][prediction[0]].item()
    
    sentiment = "Positive" if prediction.item() == 1 else "Negative"
    return sentiment, confidence

# Test examples
test_texts = [
    "This movie was absolutely amazing! I loved every minute of it.",
    "What a terrible waste of time. I wouldn't recommend this to anyone.",
    "The film was okay, but nothing special."
]

print("Testing model with example reviews:\n")
for text in test_texts:
    sentiment, confidence = predict_sentiment(text)
    print(f"Text: {text}")
    print(f"Sentiment: {sentiment} (Confidence: {confidence:.4f})\n")

Epoch 1: Average Loss = 0.6935, Accuracy = 0.50
Epoch 2: Average Loss = 0.6316, Accuracy = 1.00
Epoch 2: Average Loss = 0.6316, Accuracy = 1.00
Epoch 3: Average Loss = 0.6110, Accuracy = 1.00
Epoch 3: Average Loss = 0.6110, Accuracy = 1.00


In [41]:
# 💬 Cell 8 — Test Predictions
test_texts = [
    "I really enjoyed this!", 
    "This was disappointing and boring."
]

inputs = tokenizer(test_texts, return_tensors="pt", padding=True, truncation=True)
outputs = model(**inputs)
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
labels = torch.argmax(predictions, dim=1)

for i, (text, label) in enumerate(zip(test_texts, labels)):
    # show per-sample confidence for the chosen label
    confidence = predictions[i][label].item()
    print(f"{text} → {'Positive' if label == 1 else 'Negative'} ({confidence:.2f} confidence)")

I really enjoyed this! → Positive (0.60 confidence)
This was disappointing and boring. → Negative (0.51 confidence)


# 📊 Cell 9 — Reflection (Markdown)

"""
Reflection:
Through this project, I learned how to fine-tune a pre-trained transformer model (DistilBERT) for a custom classification task.
It was interesting to see how powerful transfer learning can be even with a small dataset.
The main challenge was setting up the tokenizer and ensuring correct input shapes for training.
"""