In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Setup

In [3]:
!pip install captum

Collecting captum
  Downloading captum-0.7.0-py3-none-any.whl.metadata (26 kB)
Downloading captum-0.7.0-py3-none-any.whl (1.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[?25hInstalling collected packages: captum
Successfully installed captum-0.7.0


In [4]:
!pip install transformers



In [5]:
import torch
import torch.nn.functional as F
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from captum.attr import LayerIntegratedGradients
from captum.attr import visualization as viz

In [29]:
model_name = 'CAMeL-Lab/bert-base-arabic-camelbert-da-sentiment'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

tokenizer_config.json:   0%|          | 0.00/86.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/841 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/305k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/436M [00:00<?, ?B/s]

# Construct Original and Baseline Input

In [30]:
def construct_input_and_baseline(text):

    max_length = 510
    baseline_token_id = tokenizer.pad_token_id 
    sep_token_id = tokenizer.sep_token_id 
    cls_token_id = tokenizer.cls_token_id 

    text_ids = tokenizer.encode(text, max_length=max_length, truncation=True, add_special_tokens=False)
   
    input_ids = [cls_token_id] + text_ids + [sep_token_id]
    token_list = tokenizer.convert_ids_to_tokens(input_ids)
  

    baseline_input_ids = [cls_token_id] + [baseline_token_id] * len(text_ids) + [sep_token_id]
    return torch.tensor([input_ids], device='cpu'), torch.tensor([baseline_input_ids], device='cpu'), token_list

# Define Model Input and Output

In [32]:
# Define model output
def model_output(inputs):
    return model(inputs)[0]

# Define model input
model_input = model.bert.embeddings

# Instantiate Integrated Gradients Method

In [33]:
lig = LayerIntegratedGradients(model_output, model_input)

In [85]:
text = 'لم أحب الطعام الذي تناولته بالأمس' # true negative
input_ids, baseline_input_ids, all_tokens = construct_input_and_baseline(text)
print(f'original text: {input_ids}')
print(f'baseline text: {baseline_input_ids}')

original text: tensor([[    2,  2043,  7699,  6437,  2130, 22578,  1028,  3876,  2632,     3]])
baseline text: tensor([[2, 0, 0, 0, 0, 0, 0, 0, 0, 3]])


# Compute Attributions

In [86]:
attributions, delta = lig.attribute(input_ids, baseline_input_ids, target=true_class, return_convergence_delta=True )
print(attributions.size())

torch.Size([1, 10, 768])


# Compute Attribution for Each Token

In [87]:
def summarize_attributions(attributions):

    attributions = attributions.sum(dim=-1).squeeze(0)
    attributions = attributions / torch.norm(attributions)
    
    return attributions

attributions_sum = summarize_attributions(attributions)
print(attributions_sum.size())

torch.Size([10])


In [89]:
# 0: Negative, 1: Neutral, 2: Positive 
score_vis = viz.VisualizationDataRecord(
                        word_attributions = attributions_sum,
                        pred_prob = torch.max(model(input_ids)[0]),
                        pred_class = torch.argmax(model(input_ids)[0]).numpy(),
                        true_class = 0,
                        attr_class = text,
                        attr_score = attributions_sum.sum(),       
                        raw_input_ids = all_tokens,
                        convergence_score = delta)

viz.visualize_text([score_vis])

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,0 (0.84),لم أحب الطعام الذي تناولته بالأمس,0.62,[CLS] لم أحب الطعام الذي تناولت ##ه بالأ ##مس [SEP]
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,0 (0.84),لم أحب الطعام الذي تناولته بالأمس,0.62,[CLS] لم أحب الطعام الذي تناولت ##ه بالأ ##مس [SEP]
,,,,
