Install necessary libraries

In [1]:
pip install shap transformers torch

Note: you may need to restart the kernel to use updated packages.


In [2]:
import torch
import shap
import pandas as pd
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification

Import data model downloaded from colab

In [3]:
model_path = r"C:\Users\USER\Downloads\distilbert_sentiment_model"   # folder location on your PC
#Load the fine-tuned DistilBERT model and tokenizer
tokenizer = DistilBertTokenizerFast.from_pretrained(model_path)
model = DistilBertForSequenceClassification.from_pretrained(model_path)
model.eval()

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)


In [4]:
#import test reviews file
test_df = pd.read_csv("test_reviews.csv")  

In [5]:
#extract 5 sample reviews from test data
sample_texts = (
    test_df["Clean_Review"]
    .dropna()
    .astype(str)
    .sample(5, random_state=42)
    .tolist()
)

In [6]:
def predict_logits(texts):
    if isinstance(texts, str):
        texts = [texts]

    texts = [str(t) for t in texts]

    inputs = tokenizer(
        texts,
        padding=True,
        truncation=True,
        max_length=64,
        return_tensors="pt"
    )

    with torch.no_grad():
        outputs = model(**inputs)

    return outputs.logits.detach().cpu().numpy()

In [7]:
#Initialise the SHAP explainer using a Transformer-compatible wrapper.
explainer = shap.Explainer(predict_logits, shap.maskers.Text(tokenizer))

In [8]:
#Generate SHAP values
shap_values = explainer(sample_texts)

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  20%|██        | 1/5 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  60%|██████    | 3/5 [01:06<00:34, 17.49s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  80%|████████  | 4/5 [01:35<00:22, 22.07s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 100%|██████████| 5/5 [01:52<00:00, 20.38s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 6it [02:16, 27.29s/it]                       


Visualization

In [9]:
#Display a token-level explanation for one review.
shap.text_plot(shap_values[0])