In [1]:
import torch
import pandas as pd
import numpy as np
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# https://pytorch.org/tutorials/beginner/introyt/captumyt.html

In [None]:
# https://captum.ai/tutorials/Bert_SQUAD_Interpret
# https://github.com/pytorch/captum/issues/373

In [2]:
# read predictions
predicitons = pd.read_csv("/content/drive/My Drive/LIN371/predictions/predictions_classification_only.csv")
predicitons.head()

Unnamed: 0.1,Unnamed: 0,text,label,new_label,prediction
0,0,Those pussy lips need more cleaning with my to...,1,explicit_source_has_explicit_words,1
1,1,"I have choices for you. Choice seating, at that",1,explicit_source_no_explicit_words,1
2,2,I want to finish.,1,explicit_source_no_explicit_words,0
3,3,"Oh it Will, one way or an other 😉",1,explicit_source_no_explicit_words,1
4,4,"No need to thank me, thank you so much for sha...",1,explicit_source_no_explicit_words,0


In [3]:
# get some examples where pred is 1

pred_1 = predicitons[predicitons['prediction'] == 1].sample(5, random_state=5)
texts = pred_1['text'].tolist()
texts

['Looks wet and ready',
 'Come here you cuddly little rascal',
 'Exactly what I need after a long day at work.',
 'man i love a Bush! u are stunning',
 'Mmmmm mmmm mmm\n']

In [4]:
import transformers
from transformers import BertTokenizer, BertForSequenceClassification

model_path = 'dxhf100/lin371_classification_only'

# set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# load model
model = BertForSequenceClassification.from_pretrained(model_path)
model.to(device)
model.eval()
model.zero_grad()
# load tokenizer
tokenizer = BertTokenizer.from_pretrained(model_path)

# model = BertForSequenceClassification.from_pretrained('/content/drive/My Drive/LIN371/bert-base-uncased-mlm-classifier')
# tokenizer = BertTokenizer.from_pretrained('/content/drive/My Drive/LIN371/bert-base-uncased-mlm-classifier')
# model.to(device)
# model.eval()
# model.zero_grad()

In [5]:
def predict(inputs, token_type_ids=None, position_ids=None, attention_mask=None):
    output = model(
        inputs,
        token_type_ids=token_type_ids,
        position_ids=position_ids,
        attention_mask=attention_mask
    )
    return output.logits  # For sequence classification


In [6]:
def class_forward_func(inputs, attention_mask=None):
    pred = predict(inputs,
                   attention_mask=attention_mask)
    class_1_logits = pred[:,1]
    return class_1_logits

In [8]:
!pip install captum

Collecting captum
  Downloading captum-0.7.0-py3-none-any.whl.metadata (26 kB)
Downloading captum-0.7.0-py3-none-any.whl (1.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m15.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: captum
Successfully installed captum-0.7.0


In [9]:
from captum.attr import LayerConductance, LayerIntegratedGradients

In [10]:
def integrated_grad(model, tokenizer, text):


    inputs = tokenizer(
    [text],
    return_tensors="pt",
    truncation=True,
    max_length=512,
    #padding='max_length',
)

    input_ids = inputs["input_ids"]
    attention_mask = inputs["attention_mask"]
    pred = predict(inputs=input_ids.to(device), attention_mask=attention_mask.to(device))

    lig = LayerIntegratedGradients(class_forward_func, model.bert.embeddings)

    pad_token_id = tokenizer.pad_token_id
    ref_input_ids = torch.full_like(input_ids, pad_token_id).to(device)
    input_ids = input_ids.to(device)
    attention_mask = attention_mask.to(device)


    attributions, delta = lig.attribute(
        inputs=input_ids,
        baselines=ref_input_ids,  # Baseline, `[PAD]` token embeddings
        additional_forward_args=( attention_mask),
        return_convergence_delta=True
    )

    return attributions, delta


In [11]:
def summarize_attributions(attributions):
    attributions = attributions.sum(dim=-1).squeeze(0)
    attributions = attributions / torch.norm(attributions)
    return attributions

In [12]:
def summarize_integrated_grad(model, tokenizer, text):
  attributions, delta = integrated_grad(model, tokenizer, text)
  summarized_attributions = summarize_attributions(attributions)
  return summarized_attributions

In [13]:
text0_attributions = summarize_integrated_grad(model, tokenizer, texts[0])
text1_attributions = summarize_integrated_grad(model, tokenizer, texts[1])
text2_attributions = summarize_integrated_grad(model, tokenizer, texts[2])
text3_attributions = summarize_integrated_grad(model, tokenizer, texts[3])
text4_attributions = summarize_integrated_grad(model, tokenizer, texts[4])

In [16]:
for attribution, text in zip([text0_attributions, text1_attributions, text2_attributions, text3_attributions, text4_attributions], texts):
  text_tokens = tokenizer(text, return_tensors="pt")
  text_tokens = tokenizer.convert_ids_to_tokens(text_tokens['input_ids'][0].numpy())
  print(text_tokens)
  print(attribution)

['[CLS]', 'looks', 'wet', 'and', 'ready', '[SEP]']
tensor([ 0.0895, -0.0678,  0.6737,  0.1338,  0.5200,  0.4951],
       dtype=torch.float64)
['[CLS]', 'come', 'here', 'you', 'cu', '##dd', '##ly', 'little', 'ras', '##cal', '[SEP]']
tensor([-0.1430,  0.3008,  0.1181,  0.3007,  0.0157,  0.6022,  0.0781,  0.0916,
        -0.0965, -0.1662, -0.6086], dtype=torch.float64)
['[CLS]', 'exactly', 'what', 'i', 'need', 'after', 'a', 'long', 'day', 'at', 'work', '.', '[SEP]']
tensor([ 0.0470,  0.3485,  0.5126,  0.0418,  0.1618,  0.1911,  0.1741,  0.2510,
         0.1260,  0.2882,  0.4625, -0.0783,  0.3699], dtype=torch.float64)
['[CLS]', 'man', 'i', 'love', 'a', 'bush', '!', 'u', 'are', 'stunning', '[SEP]']
tensor([ 0.0307, -0.0743,  0.0638,  0.3680,  0.1936,  0.2205,  0.1317,  0.2406,
         0.0433,  0.2329,  0.7979], dtype=torch.float64)
['[CLS]', 'mmm', '##mm', 'mmm', '##m', 'mmm', '[SEP]']
tensor([0.0613, 0.1811, 0.1546, 0.2442, 0.1794, 0.5576, 0.7327],
       dtype=torch.float64)


In [17]:
# load the mlm model
mlm_model_path = 'dxhf100/lin371_mlm_tuned_classification_model'

model = BertForSequenceClassification.from_pretrained(mlm_model_path)
tokenizer = BertTokenizer.from_pretrained(mlm_model_path)
model.to(device)
model.eval()
model.zero_grad()


In [18]:
mlm0_attributions = summarize_integrated_grad(model, tokenizer, texts[0])
mlm1_attributions = summarize_integrated_grad(model, tokenizer, texts[1])
mlm2_attributions = summarize_integrated_grad(model, tokenizer, texts[2])
mlm3_attributions = summarize_integrated_grad(model, tokenizer, texts[3])
mlm4_attributions = summarize_integrated_grad(model, tokenizer, texts[4])


In [19]:
for attribution, text in zip([mlm0_attributions, mlm1_attributions, mlm2_attributions, mlm3_attributions, mlm4_attributions], texts):
  text_tokens = tokenizer(text, return_tensors="pt")
  text_tokens = tokenizer.convert_ids_to_tokens(text_tokens['input_ids'][0].numpy())
  print(text_tokens)
  print(attribution)

['[CLS]', 'looks', 'wet', 'and', 'ready', '[SEP]']
tensor([ 0.0836,  0.0394,  0.4431,  0.3749,  0.8067, -0.0621],
       dtype=torch.float64)
['[CLS]', 'come', 'here', 'you', 'cu', '##dd', '##ly', 'little', 'ras', '##cal', '[SEP]']
tensor([-0.0585,  0.3938, -0.0605,  0.6081, -0.0402,  0.1627,  0.1148,  0.5357,
        -0.2296,  0.0804,  0.2839], dtype=torch.float64)
['[CLS]', 'exactly', 'what', 'i', 'need', 'after', 'a', 'long', 'day', 'at', 'work', '.', '[SEP]']
tensor([-0.0943,  0.1563,  0.1696, -0.1734,  0.3216,  0.3031,  0.2491,  0.2333,
         0.2119,  0.1527,  0.6794,  0.1266, -0.2244], dtype=torch.float64)
['[CLS]', 'man', 'i', 'love', 'a', 'bush', '!', 'u', 'are', 'stunning', '[SEP]']
tensor([ 0.0126, -0.0797,  0.0348,  0.3555,  0.1795,  0.0608,  0.2363,  0.3933,
        -0.0416,  0.5714,  0.5396], dtype=torch.float64)
['[CLS]', 'mmm', '##mm', 'mmm', '##m', 'mmm', '[SEP]']
tensor([0.0045, 0.3401, 0.3311, 0.4415, 0.3949, 0.6061, 0.2374],
       dtype=torch.float64)
