In [1]:
import sys, time, pickle
import torch

sys.path.insert(0, '../../Utils')
from global_constants import gpu_device

sys.path.insert(0, '../')
from BERT_models import BERT_SST2_MODEL

from _utils import sample_random_glue_sst2, get_continuation_mapping, \
                    get_continuous_attributions, get_continuous_raw_inputs, \
                    collect_info_for_metric, save_info, download_HTML

In [2]:
sst2_data_raw, targets, idxs = sample_random_glue_sst2()

Reusing dataset glue (/home/user/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


  0%|          | 0/3 [00:00<?, ?it/s]

Loading cached processed dataset at /home/user/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-3b24abff24d1d8c0.arrow
Loading cached processed dataset at /home/user/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-5960909ab3834668.arrow


In [3]:
model = BERT_SST2_MODEL()
tokenizer = model.tokenizer

In [4]:
model_out_list, raw_attr_list, conti_attr_list, raw_input_list = [], [], [], []

In [5]:
from captum.attr import KernelShap
from captum.attr import visualization 

In [6]:
ks = KernelShap(model)

In [7]:
def generate_record(raw_review, target):
    #tokenizer operations
    tokenized = tokenizer(raw_review, truncation=True, return_offsets_mapping=True)
    offset_mapping = tokenized['offset_mapping']
    conti_map = get_continuation_mapping(offset_mapping)
    input_ids = torch.tensor(tokenized['input_ids']).unsqueeze(0).to(gpu_device)
    detokenized = [t.replace('#', '') for t in tokenizer.convert_ids_to_tokens(input_ids[0])]
    
    #feeding input forward 
    input_emb = model.get_embeddings(input_ids)
    pred_prob = model(input_emb).item()
    
    #categorizing results
    pred_class = 'Pos' if pred_prob > 0.5 else 'Neg' 
    true_class = 'Pos' if target > 0.5 else 'Neg' 
    
    #attribution algorithm working
    attribution = ks.attribute(input_emb, n_samples=2000, perturbations_per_eval=200, show_progress=True)
    word_attributions = attribution.squeeze(0).sum(dim=1)
    word_attributions /= torch.norm(word_attributions)
#     word_attributions = attr_normalizing_func(word_attributions)
    attr_score = torch.sum(word_attributions)
    attr_class = 'Pos' if attr_score > 0.5 else 'Neg'
    convergence_score = None
    
    
    #re-organizing tensors and arrays because words get split down
    conti_attr = get_continuous_attributions(conti_map, word_attributions)
    raw_input = get_continuous_raw_inputs(conti_map, detokenized)

#     print(f'word attributions {word_attributions}')
#     print(f'pred_prob {pred_prob}')
#     print(f'pred_class {pred_class}')
#     print(f'true_class {true_class}')
#     print(f'attribution {attribution}')
#     print(f'attr_class {attr_class}')
#     print(f'attr_score {attr_score}')
#     print(f'raw_input {raw_input}')

        
#     collect info for metrics later
    collect_info_for_metric(model_out_list, pred_prob, raw_attr_list, attribution, conti_attr_list, conti_attr, raw_input_list, raw_input)
        
    
    visual_record = visualization.VisualizationDataRecord(word_attributions=conti_attr,
                                                         pred_prob=pred_prob,
                                                         pred_class=pred_class,
                                                         true_class=true_class,
                                                         attr_class=attr_class,
                                                         attr_score=attr_score,
                                                         raw_input_ids=raw_input,
                                                         convergence_score=convergence_score)
        
        
    return visual_record
      
    

In [8]:
for i, (datum_raw, target) in enumerate(zip(sst2_data_raw, targets), start=1):
    print(f'Raw review: {datum_raw}')
    print(f'GT target: {target}')
    visual_record=generate_record(datum_raw, target)
    print(visualization.visualize_text([visual_record]))


Raw review: its oscar nomination 
GT target: 1


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 15.24it/s]


word attr tensor([ 0.3326, -0.7398,  0.0989,  0.5315, -0.2232], device='cuda:0')
conti attr [tensor(0.3326, device='cuda:0'), tensor(-0.7398, device='cuda:0'), tensor(0.0989, device='cuda:0'), tensor(0.5315, device='cuda:0'), tensor(-0.2232, device='cuda:0')]
detokenized ['[CLS]', 'its', 'oscar', 'nomination', '[SEP]']
len conti_raw 5
conti_raw ['[CLS]', 'its', 'oscar', 'nomination', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.96),Neg,0.0,[CLS] its oscar nomination [SEP]
,,,,


<IPython.core.display.HTML object>
Raw review: shenanigans and slapstick 
GT target: 1


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 11.51it/s]


word attr tensor([-0.1049, -0.4902, -0.1817,  0.4962,  0.0640, -0.4695, -0.0862,  0.4502,
         0.1868], device='cuda:0')
conti attr [tensor(-0.1049, device='cuda:0'), tensor(0.0721, device='cuda:0'), tensor(-0.4695, device='cuda:0'), tensor(0.1820, device='cuda:0'), tensor(0.1868, device='cuda:0')]
detokenized ['[CLS]', 'shen', 'ani', 'gan', 's', 'and', 'slap', 'stick', '[SEP]']
len conti_raw 5
conti_raw ['[CLS]', 'shenanigans', 'and', 'slapstick', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Neg (0.03),Neg,-0.14,[CLS] shenanigans and slapstick [SEP]
,,,,


<IPython.core.display.HTML object>
Raw review: an unsettling sight , 
GT target: 0


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 12.65it/s]


word attr tensor([ 0.7390, -0.4312,  0.2279, -0.1670, -0.1000, -0.1417, -0.3374,  0.2103],
       device='cuda:0')
conti attr [tensor(0.7390, device='cuda:0'), tensor(-0.4312, device='cuda:0'), tensor(-0.0348, device='cuda:0'), tensor(-0.1417, device='cuda:0'), tensor(-0.3374, device='cuda:0'), tensor(0.2103, device='cuda:0')]
detokenized ['[CLS]', 'an', 'un', 'sett', 'ling', 'sight', ',', '[SEP]']
len conti_raw 6
conti_raw ['[CLS]', 'an', 'unsettling', 'sight', ',', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Pos (0.93),Neg,-0.0,"[CLS] an unsettling sight , [SEP]"
,,,,


<IPython.core.display.HTML object>
Raw review: the climactic hourlong cricket match 
GT target: 1


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 11.29it/s]


word attr tensor([-2.6536e-01, -3.5579e-01, -8.0240e-02,  2.5442e-01,  2.5774e-01,
         4.0406e-01,  6.6838e-01, -6.1052e-04, -4.4255e-02, -2.3117e-01],
       device='cuda:0')
conti attr [tensor(-0.2654, device='cuda:0'), tensor(-0.3558, device='cuda:0'), tensor(0.1724, device='cuda:0'), tensor(0.5362, device='cuda:0'), tensor(-0.0006, device='cuda:0'), tensor(-0.0443, device='cuda:0'), tensor(-0.2312, device='cuda:0')]
detokenized ['[CLS]', 'the', 'cl', 'ima', 'ctic', 'hour', 'long', 'cricket', 'match', '[SEP]']
len conti_raw 7
conti_raw ['[CLS]', 'the', 'climactic', 'hourlong', 'cricket', 'match', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.60),Pos,0.61,[CLS] the climactic hourlong cricket match [SEP]
,,,,




<IPython.core.display.HTML object>
Raw review: alternating between facetious comic parody and pulp melodrama , this smart-aleck movie ... tosses around some intriguing questions about the difference between human and android life 
GT target: 1


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:02<00:00,  4.75it/s]


word attr tensor([ 0.1451, -0.1695,  0.0501,  0.1400, -0.1298,  0.1503,  0.4115,  0.1220,
         0.1210,  0.2512,  0.1019,  0.2528, -0.0822,  0.2141, -0.0008,  0.2444,
         0.2349,  0.2293, -0.0148,  0.0067,  0.1043,  0.3112,  0.1624,  0.1112,
         0.3164,  0.0090, -0.0686,  0.0008,  0.1792,  0.0504, -0.0493,  0.0848,
         0.1538,  0.0248,  0.0873, -0.0169,  0.0648], device='cuda:0')
conti attr [tensor(0.1451, device='cuda:0'), tensor(-0.1695, device='cuda:0'), tensor(0.0501, device='cuda:0'), tensor(0.0051, device='cuda:0'), tensor(0.1503, device='cuda:0'), tensor(0.4115, device='cuda:0'), tensor(0.1220, device='cuda:0'), tensor(0.1210, device='cuda:0'), tensor(0.2147, device='cuda:0'), tensor(-0.0822, device='cuda:0'), tensor(0.2141, device='cuda:0'), tensor(0.2038, device='cuda:0'), tensor(-0.0148, device='cuda:0'), tensor(0.1833, device='cuda:0'), tensor(0.1368, device='cuda:0'), tensor(0.3164, device='cuda:0'), tensor(0.0090, device='cuda:0'), tensor(-0.0686, device=

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.93),Pos,3.8,"[CLS] alternating between facetious comic parody and pulp melodrama , this smart-aleck movie ... tosses around some intriguing questions about the difference between human and android life [SEP]"
,,,,


<IPython.core.display.HTML object>
Raw review: to be a part of that elusive adult world 
GT target: 1


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 10.90it/s]


word attr tensor([ 0.1292,  0.1135,  0.1927,  0.8598,  0.2565, -0.0644,  0.1480,  0.0362,
         0.0113,  0.2581, -0.1846], device='cuda:0')
conti attr [tensor(0.1292, device='cuda:0'), tensor(0.1135, device='cuda:0'), tensor(0.1927, device='cuda:0'), tensor(0.8598, device='cuda:0'), tensor(0.2565, device='cuda:0'), tensor(-0.0644, device='cuda:0'), tensor(0.1480, device='cuda:0'), tensor(0.0362, device='cuda:0'), tensor(0.0113, device='cuda:0'), tensor(0.2581, device='cuda:0'), tensor(-0.1846, device='cuda:0')]
detokenized ['[CLS]', 'to', 'be', 'a', 'part', 'of', 'that', 'elusive', 'adult', 'world', '[SEP]']
len conti_raw 11
conti_raw ['[CLS]', 'to', 'be', 'a', 'part', 'of', 'that', 'elusive', 'adult', 'world', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.96),Pos,1.76,[CLS] to be a part of that elusive adult world [SEP]
,,,,


<IPython.core.display.HTML object>
Raw review: emotional power 
GT target: 1


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 15.78it/s]


word attr tensor([-0.5807,  0.4241, -0.4068,  0.5634], device='cuda:0')
conti attr [tensor(-0.5807, device='cuda:0'), tensor(0.4241, device='cuda:0'), tensor(-0.4068, device='cuda:0'), tensor(0.5634, device='cuda:0')]
detokenized ['[CLS]', 'emotional', 'power', '[SEP]']
len conti_raw 4
conti_raw ['[CLS]', 'emotional', 'power', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.98),Neg,0.0,[CLS] emotional power [SEP]
,,,,




<IPython.core.display.HTML object>
Raw review: reminds you of why animation is such a perfect medium for children , because of the way it allows the mind to enter and accept another world 
GT target: 1


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00,  6.04it/s]


word attr tensor([ 0.7433, -0.1922, -0.0622, -0.1250, -0.0260, -0.2024, -0.0613,  0.1184,
         0.2343,  0.1448, -0.1484,  0.0040, -0.0597, -0.0799,  0.1555, -0.1340,
         0.1744,  0.1302,  0.0618,  0.0566, -0.1422, -0.3057, -0.0531, -0.0648,
        -0.0232, -0.0338, -0.0166, -0.0595, -0.0330], device='cuda:0')
conti attr [tensor(0.7433, device='cuda:0'), tensor(-0.1922, device='cuda:0'), tensor(-0.0622, device='cuda:0'), tensor(-0.1250, device='cuda:0'), tensor(-0.0260, device='cuda:0'), tensor(-0.2024, device='cuda:0'), tensor(-0.0613, device='cuda:0'), tensor(0.1184, device='cuda:0'), tensor(0.2343, device='cuda:0'), tensor(0.1448, device='cuda:0'), tensor(-0.1484, device='cuda:0'), tensor(0.0040, device='cuda:0'), tensor(-0.0597, device='cuda:0'), tensor(-0.0799, device='cuda:0'), tensor(0.1555, device='cuda:0'), tensor(-0.1340, device='cuda:0'), tensor(0.1744, device='cuda:0'), tensor(0.1302, device='cuda:0'), tensor(0.0618, device='cuda:0'), tensor(0.0566, device='cuda:0'

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.97),Neg,0.0,"[CLS] reminds you of why animation is such a perfect medium for children , because of the way it allows the mind to enter and accept another world [SEP]"
,,,,


<IPython.core.display.HTML object>
Raw review: unparalleled proportions , writer-director parker 
GT target: 1


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 10.58it/s]


word attr tensor([-0.0794, -0.0851,  0.4910,  0.2646,  0.6184,  0.1582,  0.0274,  0.0917,
        -0.2353, -0.2457,  0.3761,  0.0392], device='cuda:0')
conti attr [tensor(-0.0794, device='cuda:0'), tensor(0.4261, device='cuda:0'), tensor(0.1582, device='cuda:0'), tensor(0.0274, device='cuda:0'), tensor(-0.1588, device='cuda:0'), tensor(0.3761, device='cuda:0'), tensor(0.0392, device='cuda:0')]
detokenized ['[CLS]', 'un', 'para', 'lle', 'led', 'proportions', ',', 'writer', '-', 'director', 'parker', '[SEP]']
len conti_raw 7
conti_raw ['[CLS]', 'unparalleled', 'proportions', ',', 'writer-director', 'parker', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.88),Pos,1.42,"[CLS] unparalleled proportions , writer-director parker [SEP]"
,,,,


<IPython.core.display.HTML object>
Raw review: this surprisingly decent flick 
GT target: 1


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 13.25it/s]


word attr tensor([-0.1055,  0.4394,  0.0479, -0.3466,  0.5623, -0.5976], device='cuda:0')
conti attr [tensor(-0.1055, device='cuda:0'), tensor(0.4394, device='cuda:0'), tensor(0.0479, device='cuda:0'), tensor(-0.3466, device='cuda:0'), tensor(0.5623, device='cuda:0'), tensor(-0.5976, device='cuda:0')]
detokenized ['[CLS]', 'this', 'surprisingly', 'decent', 'flick', '[SEP]']
len conti_raw 6
conti_raw ['[CLS]', 'this', 'surprisingly', 'decent', 'flick', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.97),Neg,0.0,[CLS] this surprisingly decent flick [SEP]
,,,,




<IPython.core.display.HTML object>
Raw review: about the best thing you could say about narc is that it 's a rock-solid little genre picture . 
GT target: 1


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00,  6.69it/s]


word attr tensor([ 0.8805,  0.0200, -0.0245, -0.0649, -0.0273,  0.0251, -0.1198,  0.1387,
        -0.0812, -0.2530, -0.0149,  0.1445, -0.0340, -0.0854, -0.0128, -0.2325,
         0.0045, -0.0676,  0.0112,  0.0789, -0.0666, -0.0562, -0.0884, -0.0137,
        -0.0608], device='cuda:0')
conti attr [tensor(0.8805, device='cuda:0'), tensor(0.0200, device='cuda:0'), tensor(-0.0245, device='cuda:0'), tensor(-0.0649, device='cuda:0'), tensor(-0.0273, device='cuda:0'), tensor(0.0251, device='cuda:0'), tensor(-0.1198, device='cuda:0'), tensor(0.1387, device='cuda:0'), tensor(-0.0812, device='cuda:0'), tensor(-0.1340, device='cuda:0'), tensor(0.1445, device='cuda:0'), tensor(-0.0340, device='cuda:0'), tensor(-0.0854, device='cuda:0'), tensor(-0.1226, device='cuda:0'), tensor(0.0045, device='cuda:0'), tensor(0.0254, device='cuda:0'), tensor(-0.0666, device='cuda:0'), tensor(-0.0562, device='cuda:0'), tensor(-0.0884, device='cuda:0'), tensor(-0.0137, device='cuda:0'), tensor(-0.0608, device='cuda:0

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.95),Neg,0.0,[CLS] about the best thing you could say about narc is that it 's a rock-solid little genre picture . [SEP]
,,,,


<IPython.core.display.HTML object>
Raw review: the very best 
GT target: 1


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 12.98it/s]


word attr tensor([ 0.8739, -0.1230, -0.3194, -0.1016, -0.3299], device='cuda:0')
conti attr [tensor(0.8739, device='cuda:0'), tensor(-0.1230, device='cuda:0'), tensor(-0.3194, device='cuda:0'), tensor(-0.1016, device='cuda:0'), tensor(-0.3299, device='cuda:0')]
detokenized ['[CLS]', 'the', 'very', 'best', '[SEP]']
len conti_raw 5
conti_raw ['[CLS]', 'the', 'very', 'best', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.97),Neg,0.0,[CLS] the very best [SEP]
,,,,




<IPython.core.display.HTML object>
Raw review: been modeled on the worst revenge-of-the-nerds clichés the filmmakers could dredge up 
GT target: 0


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00,  6.93it/s]


word attr tensor([ 0.4211,  0.0913,  0.1179, -0.3123, -0.1223,  0.1247, -0.3730,  0.1266,
        -0.2008,  0.0352,  0.0295, -0.2281, -0.1392, -0.2733,  0.3969,  0.0081,
         0.0157,  0.2179, -0.0101,  0.0547,  0.2544,  0.0112, -0.0105, -0.2355],
       device='cuda:0')
conti attr [tensor(0.4211, device='cuda:0'), tensor(0.0913, device='cuda:0'), tensor(0.1179, device='cuda:0'), tensor(-0.3123, device='cuda:0'), tensor(-0.1223, device='cuda:0'), tensor(0.1247, device='cuda:0'), tensor(-0.2021, device='cuda:0'), tensor(0.1091, device='cuda:0'), tensor(0.2179, device='cuda:0'), tensor(-0.0101, device='cuda:0'), tensor(0.0547, device='cuda:0'), tensor(0.1328, device='cuda:0'), tensor(-0.0105, device='cuda:0'), tensor(-0.2355, device='cuda:0')]
detokenized ['[CLS]', 'been', 'modeled', 'on', 'the', 'worst', 'revenge', '-', 'of', '-', 'the', '-', 'ne', 'rds', 'cl', 'iche', 's', 'the', 'filmmakers', 'could', 'dr', 'edge', 'up', '[SEP]']
len conti_raw 14
conti_raw ['[CLS]', 'been', 'modele

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.03),Neg,-0.0,[CLS] been modeled on the worst revenge-of-the-nerds cliches the filmmakers could dredge up [SEP]
,,,,


<IPython.core.display.HTML object>
Raw review: tell you 
GT target: 1


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 15.94it/s]


word attr tensor([ 0.3197, -0.6898, -0.2353,  0.6054], device='cuda:0')
conti attr [tensor(0.3197, device='cuda:0'), tensor(-0.6898, device='cuda:0'), tensor(-0.2353, device='cuda:0'), tensor(0.6054, device='cuda:0')]
detokenized ['[CLS]', 'tell', 'you', '[SEP]']
len conti_raw 4
conti_raw ['[CLS]', 'tell', 'you', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.90),Neg,-0.0,[CLS] tell you [SEP]
,,,,


<IPython.core.display.HTML object>
Raw review: utterly absorbing 
GT target: 1


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 16.69it/s]


word attr tensor([-0.4526, -0.4183,  0.7825,  0.0883], device='cuda:0')
conti attr [tensor(-0.4526, device='cuda:0'), tensor(-0.4183, device='cuda:0'), tensor(0.7825, device='cuda:0'), tensor(0.0883, device='cuda:0')]
detokenized ['[CLS]', 'utterly', 'absorbing', '[SEP]']
len conti_raw 4
conti_raw ['[CLS]', 'utterly', 'absorbing', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.97),Neg,0.0,[CLS] utterly absorbing [SEP]
,,,,


<IPython.core.display.HTML object>
Raw review: restate 
GT target: 0


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 16.05it/s]


word attr tensor([ 0.0540,  0.5929,  0.1436, -0.7905], device='cuda:0')
conti attr [tensor(0.0540, device='cuda:0'), tensor(0.3683, device='cuda:0'), tensor(-0.7905, device='cuda:0')]
detokenized ['[CLS]', 'rest', 'ate', '[SEP]']
len conti_raw 3
conti_raw ['[CLS]', 'restate', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Pos (0.55),Neg,0.0,[CLS] restate [SEP]
,,,,




<IPython.core.display.HTML object>
Raw review: bears about as much resemblance to the experiences of most battered women as spider-man 
GT target: 0


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00,  8.18it/s]


word attr tensor([ 0.3110,  0.0367, -0.0359,  0.0170, -0.1098,  0.0040,  0.1547,  0.0384,
        -0.2739,  0.3828,  0.2265, -0.5864,  0.0457, -0.1851,  0.0932,  0.1176,
         0.1640, -0.4005], device='cuda:0')
conti attr [tensor(0.3110, device='cuda:0'), tensor(0.0367, device='cuda:0'), tensor(-0.0359, device='cuda:0'), tensor(0.0170, device='cuda:0'), tensor(-0.1098, device='cuda:0'), tensor(0.0040, device='cuda:0'), tensor(0.1547, device='cuda:0'), tensor(0.0384, device='cuda:0'), tensor(-0.2739, device='cuda:0'), tensor(0.3828, device='cuda:0'), tensor(0.2265, device='cuda:0'), tensor(-0.5864, device='cuda:0'), tensor(0.0457, device='cuda:0'), tensor(-0.1851, device='cuda:0'), tensor(0.1347, device='cuda:0'), tensor(-0.4005, device='cuda:0')]
detokenized ['[CLS]', 'bears', 'about', 'as', 'much', 'resemblance', 'to', 'the', 'experiences', 'of', 'most', 'battered', 'women', 'as', 'spider', '-', 'man', '[SEP]']
len conti_raw 16
conti_raw ['[CLS]', 'bears', 'about', 'as', 'much', 'r

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Pos (0.71),Neg,0.0,[CLS] bears about as much resemblance to the experiences of most battered women as spider-man [SEP]
,,,,


<IPython.core.display.HTML object>
Raw review: expressively performed 
GT target: 1


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 15.15it/s]


word attr tensor([ 0.2992, -0.8253,  0.2077,  0.4196, -0.1012], device='cuda:0')
conti attr [tensor(0.2992, device='cuda:0'), tensor(-0.3088, device='cuda:0'), tensor(0.4196, device='cuda:0'), tensor(-0.1012, device='cuda:0')]
detokenized ['[CLS]', 'expressive', 'ly', 'performed', '[SEP]']
len conti_raw 4
conti_raw ['[CLS]', 'expressively', 'performed', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.98),Neg,-0.0,[CLS] expressively performed [SEP]
,,,,




<IPython.core.display.HTML object>
Raw review: the acting is amateurish , the cinematography is atrocious 
GT target: 0


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00,  9.58it/s]


word attr tensor([-0.2121,  0.1039,  0.1689,  0.2544, -0.1005, -0.1136,  0.2507, -0.3212,
        -0.0012,  0.4168, -0.5198, -0.2944,  0.2951, -0.2102], device='cuda:0')
conti attr [tensor(-0.2121, device='cuda:0'), tensor(0.1039, device='cuda:0'), tensor(0.1689, device='cuda:0'), tensor(0.2544, device='cuda:0'), tensor(-0.1071, device='cuda:0'), tensor(0.2507, device='cuda:0'), tensor(-0.3212, device='cuda:0'), tensor(-0.0012, device='cuda:0'), tensor(0.4168, device='cuda:0'), tensor(-0.0560, device='cuda:0'), tensor(-0.2102, device='cuda:0')]
detokenized ['[CLS]', 'the', 'acting', 'is', 'amateur', 'ish', ',', 'the', 'cinematography', 'is', 'at', 'ro', 'cious', '[SEP]']
len conti_raw 11
conti_raw ['[CLS]', 'the', 'acting', 'is', 'amateurish', ',', 'the', 'cinematography', 'is', 'atrocious', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.03),Neg,-0.28,"[CLS] the acting is amateurish , the cinematography is atrocious [SEP]"
,,,,


<IPython.core.display.HTML object>
Raw review: solidly constructed 
GT target: 1


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 15.48it/s]


word attr tensor([ 0.8459, -0.0138, -0.0884, -0.3701, -0.3735], device='cuda:0')
conti attr [tensor(0.8459, device='cuda:0'), tensor(-0.0511, device='cuda:0'), tensor(-0.3701, device='cuda:0'), tensor(-0.3735, device='cuda:0')]
detokenized ['[CLS]', 'solid', 'ly', 'constructed', '[SEP]']
len conti_raw 4
conti_raw ['[CLS]', 'solidly', 'constructed', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.97),Neg,-0.0,[CLS] solidly constructed [SEP]
,,,,




<IPython.core.display.HTML object>
Raw review: are undermined by the movie 's presentation , which is way too stagy 
GT target: 0


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00,  7.49it/s]


word attr tensor([-0.0990, -0.3927,  0.1616, -0.0124,  0.1128,  0.0275,  0.4562,  0.0481,
         0.0016,  0.0926, -0.0508,  0.1004, -0.4334, -0.0079,  0.3881, -0.2185,
        -0.1315, -0.4012,  0.0051], device='cuda:0')
conti attr [tensor(-0.0990, device='cuda:0'), tensor(-0.3927, device='cuda:0'), tensor(0.0746, device='cuda:0'), tensor(0.1128, device='cuda:0'), tensor(0.0275, device='cuda:0'), tensor(0.4562, device='cuda:0'), tensor(0.0248, device='cuda:0'), tensor(0.0926, device='cuda:0'), tensor(-0.0508, device='cuda:0'), tensor(0.1004, device='cuda:0'), tensor(-0.4334, device='cuda:0'), tensor(-0.0079, device='cuda:0'), tensor(0.3881, device='cuda:0'), tensor(-0.2881, device='cuda:0'), tensor(0.0051, device='cuda:0')]
detokenized ['[CLS]', 'are', 'undermine', 'd', 'by', 'the', 'movie', "'", 's', 'presentation', ',', 'which', 'is', 'way', 'too', 'st', 'ag', 'y', '[SEP]']
len conti_raw 15
conti_raw ['[CLS]', 'are', 'undermined', 'by', 'the', 'movie', "'s", 'presentation', ',', 'w

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.04),Neg,-0.35,"[CLS] are undermined by the movie 's presentation , which is way too stagy [SEP]"
,,,,


<IPython.core.display.HTML object>
Raw review: a great film 
GT target: 1


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 13.68it/s]


word attr tensor([-0.2777, -0.4970, -0.2901,  0.4212,  0.6436], device='cuda:0')
conti attr [tensor(-0.2777, device='cuda:0'), tensor(-0.4970, device='cuda:0'), tensor(-0.2901, device='cuda:0'), tensor(0.4212, device='cuda:0'), tensor(0.6436, device='cuda:0')]
detokenized ['[CLS]', 'a', 'great', 'film', '[SEP]']
len conti_raw 5
conti_raw ['[CLS]', 'a', 'great', 'film', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.97),Neg,0.0,[CLS] a great film [SEP]
,,,,


<IPython.core.display.HTML object>
Raw review: charm 
GT target: 1


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 17.77it/s]


word attr tensor([ 0.7026, -0.7116,  0.0090], device='cuda:0')
conti attr [tensor(0.7026, device='cuda:0'), tensor(-0.7116, device='cuda:0'), tensor(0.0090, device='cuda:0')]
detokenized ['[CLS]', 'charm', '[SEP]']
len conti_raw 3
conti_raw ['[CLS]', 'charm', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.97),Neg,0.0,[CLS] charm [SEP]
,,,,


<IPython.core.display.HTML object>
Raw review: this new jangle of noise , mayhem and stupidity 
GT target: 0


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00,  9.84it/s]


word attr tensor([-0.6357,  0.0989, -0.1168, -0.2163,  0.2615, -0.2071,  0.2146, -0.3145,
         0.1303,  0.3897,  0.3037,  0.0918], device='cuda:0')
conti attr [tensor(-0.6357, device='cuda:0'), tensor(0.0989, device='cuda:0'), tensor(-0.1168, device='cuda:0'), tensor(0.0226, device='cuda:0'), tensor(-0.2071, device='cuda:0'), tensor(0.2146, device='cuda:0'), tensor(-0.3145, device='cuda:0'), tensor(0.1303, device='cuda:0'), tensor(0.3897, device='cuda:0'), tensor(0.3037, device='cuda:0'), tensor(0.0918, device='cuda:0')]
detokenized ['[CLS]', 'this', 'new', 'jang', 'le', 'of', 'noise', ',', 'mayhem', 'and', 'stupidity', '[SEP]']
len conti_raw 11
conti_raw ['[CLS]', 'this', 'new', 'jangle', 'of', 'noise', ',', 'mayhem', 'and', 'stupidity', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.09),Neg,-0.0,"[CLS] this new jangle of noise , mayhem and stupidity [SEP]"
,,,,


<IPython.core.display.HTML object>
Raw review: sustains it 
GT target: 1


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 15.66it/s]


word attr tensor([ 0.3879,  0.4213, -0.3740, -0.6853,  0.2501], device='cuda:0')
conti attr [tensor(0.3879, device='cuda:0'), tensor(0.0236, device='cuda:0'), tensor(-0.6853, device='cuda:0'), tensor(0.2501, device='cuda:0')]
detokenized ['[CLS]', 'sustain', 's', 'it', '[SEP]']
len conti_raw 4
conti_raw ['[CLS]', 'sustains', 'it', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.97),Neg,-0.0,[CLS] sustains it [SEP]
,,,,




<IPython.core.display.HTML object>
Raw review: is so deadly dull that watching the proverbial paint dry would be a welcome improvement . 
GT target: 0


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00,  7.56it/s]


word attr tensor([ 0.2036, -0.1039,  0.4590, -0.1060,  0.1163,  0.1166, -0.0231,  0.1173,
         0.0438, -0.1768, -0.1123, -0.0370,  0.0470, -0.5497,  0.2209,  0.3041,
         0.2508, -0.0519, -0.0206,  0.3543], device='cuda:0')
conti attr [tensor(0.2036, device='cuda:0'), tensor(-0.1039, device='cuda:0'), tensor(0.4590, device='cuda:0'), tensor(-0.1060, device='cuda:0'), tensor(0.1163, device='cuda:0'), tensor(0.1166, device='cuda:0'), tensor(-0.0231, device='cuda:0'), tensor(0.1173, device='cuda:0'), tensor(-0.0894, device='cuda:0'), tensor(-0.0370, device='cuda:0'), tensor(0.0470, device='cuda:0'), tensor(-0.5497, device='cuda:0'), tensor(0.2209, device='cuda:0'), tensor(0.3041, device='cuda:0'), tensor(0.2508, device='cuda:0'), tensor(-0.0519, device='cuda:0'), tensor(-0.0206, device='cuda:0'), tensor(0.3543, device='cuda:0')]
detokenized ['[CLS]', 'is', 'so', 'deadly', 'dull', 'that', 'watching', 'the', 'prove', 'rb', 'ial', 'paint', 'dry', 'would', 'be', 'a', 'welcome', 'impro

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.42),Pos,1.05,[CLS] is so deadly dull that watching the proverbial paint dry would be a welcome improvement . [SEP]
,,,,


<IPython.core.display.HTML object>
Raw review: to accomplish 
GT target: 1


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 16.37it/s]


word attr tensor([ 0.8128,  0.0059, -0.3638, -0.4549], device='cuda:0')
conti attr [tensor(0.8128, device='cuda:0'), tensor(0.0059, device='cuda:0'), tensor(-0.3638, device='cuda:0'), tensor(-0.4549, device='cuda:0')]
detokenized ['[CLS]', 'to', 'accomplish', '[SEP]']
len conti_raw 4
conti_raw ['[CLS]', 'to', 'accomplish', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.97),Neg,-0.0,[CLS] to accomplish [SEP]
,,,,


<IPython.core.display.HTML object>
Raw review: does n't work . 
GT target: 0


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 12.89it/s]


word attr tensor([ 0.4084, -0.1373, -0.0938,  0.1541, -0.0369, -0.5328,  0.6030, -0.3647],
       device='cuda:0')
conti attr [tensor(0.4084, device='cuda:0'), tensor(-0.1373, device='cuda:0'), tensor(-0.0034, device='cuda:0'), tensor(-0.5328, device='cuda:0'), tensor(0.6030, device='cuda:0'), tensor(-0.3647, device='cuda:0')]
detokenized ['[CLS]', 'does', 'n', "'", 't', 'work', '.', '[SEP]']
len conti_raw 6
conti_raw ['[CLS]', 'does', "n't", 'work', '.', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.04),Neg,-0.0,[CLS] does n't work . [SEP]
,,,,


<IPython.core.display.HTML object>
Raw review: given it a one-star rating 
GT target: 0


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 11.50it/s]


word attr tensor([-0.7041,  0.0735,  0.1402,  0.3322, -0.2397,  0.4284,  0.2734, -0.0929,
        -0.2110], device='cuda:0')
conti attr [tensor(-0.7041, device='cuda:0'), tensor(0.0735, device='cuda:0'), tensor(0.1402, device='cuda:0'), tensor(0.3322, device='cuda:0'), tensor(0.1839, device='cuda:0'), tensor(-0.0929, device='cuda:0'), tensor(-0.2110, device='cuda:0')]
detokenized ['[CLS]', 'given', 'it', 'a', 'one', '-', 'star', 'rating', '[SEP]']
len conti_raw 7
conti_raw ['[CLS]', 'given', 'it', 'a', 'one-star', 'rating', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Pos (0.73),Neg,0.0,[CLS] given it a one-star rating [SEP]
,,,,




<IPython.core.display.HTML object>
Raw review: ice cube is n't quite out of ripe screwball ideas , but friday after next spreads them pretty thin 
GT target: 0


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00,  7.03it/s]


word attr tensor([-0.2063, -0.4812, -0.0132,  0.1630, -0.0054, -0.0031, -0.0774, -0.0728,
        -0.2791, -0.1714,  0.1918,  0.3215,  0.2927, -0.1793,  0.1315,  0.1108,
         0.2194, -0.0964,  0.1740,  0.1320, -0.1432, -0.3230, -0.2498,  0.0245],
       device='cuda:0')
conti attr [tensor(-0.2063, device='cuda:0'), tensor(-0.4812, device='cuda:0'), tensor(-0.0132, device='cuda:0'), tensor(0.1630, device='cuda:0'), tensor(-0.0408, device='cuda:0'), tensor(-0.0728, device='cuda:0'), tensor(-0.2791, device='cuda:0'), tensor(-0.1714, device='cuda:0'), tensor(0.1918, device='cuda:0'), tensor(0.3071, device='cuda:0'), tensor(-0.1793, device='cuda:0'), tensor(0.1315, device='cuda:0'), tensor(0.1108, device='cuda:0'), tensor(0.2194, device='cuda:0'), tensor(-0.0964, device='cuda:0'), tensor(0.1740, device='cuda:0'), tensor(0.1320, device='cuda:0'), tensor(-0.1432, device='cuda:0'), tensor(-0.3230, device='cuda:0'), tensor(-0.2498, device='cuda:0'), tensor(0.0245, device='cuda:0')]
detokeni

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.04),Neg,-0.54,"[CLS] ice cube is n't quite out of ripe screwball ideas , but friday after next spreads them pretty thin [SEP]"
,,,,


<IPython.core.display.HTML object>
Raw review: been trying to forget 
GT target: 0


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 14.61it/s]


word attr tensor([ 0.7933, -0.1453, -0.1265, -0.5727, -0.0206,  0.0718], device='cuda:0')
conti attr [tensor(0.7933, device='cuda:0'), tensor(-0.1453, device='cuda:0'), tensor(-0.1265, device='cuda:0'), tensor(-0.5727, device='cuda:0'), tensor(-0.0206, device='cuda:0'), tensor(0.0718, device='cuda:0')]
detokenized ['[CLS]', 'been', 'trying', 'to', 'forget', '[SEP]']
len conti_raw 6
conti_raw ['[CLS]', 'been', 'trying', 'to', 'forget', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.07),Neg,-0.0,[CLS] been trying to forget [SEP]
,,,,


<IPython.core.display.HTML object>
Raw review: big stars and 
GT target: 1


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 15.22it/s]


word attr tensor([ 0.7062, -0.4011, -0.5442,  0.2082,  0.0309], device='cuda:0')
conti attr [tensor(0.7062, device='cuda:0'), tensor(-0.4011, device='cuda:0'), tensor(-0.5442, device='cuda:0'), tensor(0.2082, device='cuda:0'), tensor(0.0309, device='cuda:0')]
detokenized ['[CLS]', 'big', 'stars', 'and', '[SEP]']
len conti_raw 5
conti_raw ['[CLS]', 'big', 'stars', 'and', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.97),Neg,0.0,[CLS] big stars and [SEP]
,,,,


<IPython.core.display.HTML object>
Raw review: of cheesy dialogue 
GT target: 0


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 12.92it/s]


word attr tensor([ 0.6418,  0.1822, -0.3411,  0.0398, -0.0580, -0.6358,  0.1712],
       device='cuda:0')
conti attr [tensor(0.6418, device='cuda:0'), tensor(0.1822, device='cuda:0'), tensor(-0.1043, device='cuda:0'), tensor(-0.6358, device='cuda:0'), tensor(0.1712, device='cuda:0')]
detokenized ['[CLS]', 'of', 'che', 'es', 'y', 'dialogue', '[SEP]']
len conti_raw 5
conti_raw ['[CLS]', 'of', 'cheesy', 'dialogue', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.02),Neg,0.0,[CLS] of cheesy dialogue [SEP]
,,,,




<IPython.core.display.HTML object>
Raw review: the château is never quite able to overcome the cultural moat surrounding its ludicrous and contrived plot . ' 
GT target: 0


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00,  6.70it/s]


word attr tensor([-0.0501,  0.1095,  0.1668, -0.0148, -0.2483,  0.1835, -0.0111, -0.2491,
        -0.0426, -0.3468, -0.1017, -0.0433,  0.1754,  0.1610, -0.0461,  0.2792,
        -0.3373,  0.0512,  0.4461, -0.2485, -0.1387, -0.3136,  0.1728,  0.0259,
         0.0607], device='cuda:0')
conti attr [tensor(-0.0501, device='cuda:0'), tensor(0.1095, device='cuda:0'), tensor(0.1668, device='cuda:0'), tensor(-0.0148, device='cuda:0'), tensor(-0.2483, device='cuda:0'), tensor(0.1835, device='cuda:0'), tensor(-0.0111, device='cuda:0'), tensor(-0.2491, device='cuda:0'), tensor(-0.0426, device='cuda:0'), tensor(-0.3468, device='cuda:0'), tensor(-0.1017, device='cuda:0'), tensor(-0.0433, device='cuda:0'), tensor(0.1754, device='cuda:0'), tensor(0.1610, device='cuda:0'), tensor(-0.1104, device='cuda:0'), tensor(0.0512, device='cuda:0'), tensor(-0.0199, device='cuda:0'), tensor(-0.3136, device='cuda:0'), tensor(0.1728, device='cuda:0'), tensor(0.0259, device='cuda:0'), tensor(0.0607, device='cuda:0')

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.06),Neg,-0.36,[CLS] the chateau is never quite able to overcome the cultural moat surrounding its ludicrous and contrived plot . ' [SEP]
,,,,


<IPython.core.display.HTML object>
Raw review: principled 
GT target: 1


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 15.51it/s]


word attr tensor([ 0.3217, -0.0544,  0.5213, -0.7886], device='cuda:0')
conti attr [tensor(0.3217, device='cuda:0'), tensor(0.2334, device='cuda:0'), tensor(-0.7886, device='cuda:0')]
detokenized ['[CLS]', 'principle', 'd', '[SEP]']
len conti_raw 3
conti_raw ['[CLS]', 'principled', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.97),Neg,-0.0,[CLS] principled [SEP]
,,,,




<IPython.core.display.HTML object>
Raw review: end it all by stuffing himself into an electric pencil sharpener 
GT target: 0


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00,  8.30it/s]


word attr tensor([ 0.8886,  0.0435,  0.0801, -0.0292, -0.2766, -0.0287, -0.1871, -0.0153,
        -0.0641, -0.1691,  0.0785, -0.0175, -0.1089, -0.1941], device='cuda:0')
conti attr [tensor(0.8886, device='cuda:0'), tensor(0.0435, device='cuda:0'), tensor(0.0801, device='cuda:0'), tensor(-0.0292, device='cuda:0'), tensor(-0.2766, device='cuda:0'), tensor(-0.0287, device='cuda:0'), tensor(-0.1871, device='cuda:0'), tensor(-0.0153, device='cuda:0'), tensor(-0.0641, device='cuda:0'), tensor(-0.1691, device='cuda:0'), tensor(0.0785, device='cuda:0'), tensor(-0.0632, device='cuda:0'), tensor(-0.1941, device='cuda:0')]
detokenized ['[CLS]', 'end', 'it', 'all', 'by', 'stuffing', 'himself', 'into', 'an', 'electric', 'pencil', 'sharpe', 'ner', '[SEP]']
len conti_raw 13
conti_raw ['[CLS]', 'end', 'it', 'all', 'by', 'stuffing', 'himself', 'into', 'an', 'electric', 'pencil', 'sharpener', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.06),Neg,-0.0,[CLS] end it all by stuffing himself into an electric pencil sharpener [SEP]
,,,,


<IPython.core.display.HTML object>
Raw review: funniest idea 
GT target: 1


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00,  9.31it/s]


word attr tensor([-0.4874,  0.5387,  0.0616,  0.1887,  0.2894, -0.5909], device='cuda:0')
conti attr [tensor(-0.4874, device='cuda:0'), tensor(0.2444, device='cuda:0'), tensor(0.2894, device='cuda:0'), tensor(-0.5909, device='cuda:0')]
detokenized ['[CLS]', 'fun', 'nies', 't', 'idea', '[SEP]']
len conti_raw 4
conti_raw ['[CLS]', 'funniest', 'idea', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.96),Neg,0.0,[CLS] funniest idea [SEP]
,,,,


<IPython.core.display.HTML object>
Raw review: silly and tedious 
GT target: 0


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 14.23it/s]


word attr tensor([-0.5663, -0.2550,  0.7700, -0.0979,  0.0572,  0.0920], device='cuda:0')
conti attr [tensor(-0.5663, device='cuda:0'), tensor(-0.2550, device='cuda:0'), tensor(0.7700, device='cuda:0'), tensor(-0.0204, device='cuda:0'), tensor(0.0920, device='cuda:0')]
detokenized ['[CLS]', 'silly', 'and', 'ted', 'ious', '[SEP]']
len conti_raw 5
conti_raw ['[CLS]', 'silly', 'and', 'tedious', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.04),Neg,0.0,[CLS] silly and tedious [SEP]
,,,,


<IPython.core.display.HTML object>
Raw review: two surefire , beloved genres 
GT target: 1


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 13.07it/s]


word attr tensor([-0.7157,  0.2344,  0.1770,  0.2156, -0.0388,  0.4310,  0.0949, -0.3984],
       device='cuda:0')
conti attr [tensor(-0.7157, device='cuda:0'), tensor(0.2344, device='cuda:0'), tensor(0.1963, device='cuda:0'), tensor(-0.0388, device='cuda:0'), tensor(0.4310, device='cuda:0'), tensor(0.0949, device='cuda:0'), tensor(-0.3984, device='cuda:0')]
detokenized ['[CLS]', 'two', 'sure', 'fire', ',', 'beloved', 'genres', '[SEP]']
len conti_raw 7
conti_raw ['[CLS]', 'two', 'surefire', ',', 'beloved', 'genres', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.98),Neg,0.0,"[CLS] two surefire , beloved genres [SEP]"
,,,,


<IPython.core.display.HTML object>
Raw review: most charmless 
GT target: 0


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 10.21it/s]


word attr tensor([-0.4875,  0.3123,  0.1727, -0.5623,  0.5647], device='cuda:0')
conti attr [tensor(-0.4875, device='cuda:0'), tensor(0.3123, device='cuda:0'), tensor(-0.1948, device='cuda:0'), tensor(0.5647, device='cuda:0')]
detokenized ['[CLS]', 'most', 'charm', 'less', '[SEP]']
len conti_raw 4
conti_raw ['[CLS]', 'most', 'charmless', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Pos (0.97),Neg,-0.0,[CLS] most charmless [SEP]
,,,,




<IPython.core.display.HTML object>
Raw review: offers us the sense that on some elemental level , lilia deeply wants to break free of her old life . 
GT target: 1


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00,  6.88it/s]


word attr tensor([ 0.2248,  0.1939,  0.3009,  0.0104,  0.1405, -0.0317, -0.0775, -0.2094,
         0.2983,  0.3004, -0.1615, -0.0870,  0.1727,  0.0203,  0.2732, -0.0950,
         0.1855,  0.3175,  0.1519,  0.2952,  0.3340,  0.1184,  0.1793,  0.1437],
       device='cuda:0')
conti attr [tensor(0.2248, device='cuda:0'), tensor(0.1939, device='cuda:0'), tensor(0.3009, device='cuda:0'), tensor(0.0104, device='cuda:0'), tensor(0.1405, device='cuda:0'), tensor(-0.0317, device='cuda:0'), tensor(-0.0775, device='cuda:0'), tensor(-0.2094, device='cuda:0'), tensor(0.2983, device='cuda:0'), tensor(0.3004, device='cuda:0'), tensor(-0.1615, device='cuda:0'), tensor(0.0428, device='cuda:0'), tensor(0.0203, device='cuda:0'), tensor(0.2732, device='cuda:0'), tensor(-0.0950, device='cuda:0'), tensor(0.1855, device='cuda:0'), tensor(0.3175, device='cuda:0'), tensor(0.1519, device='cuda:0'), tensor(0.2952, device='cuda:0'), tensor(0.3340, device='cuda:0'), tensor(0.1184, device='cuda:0'), tensor(0.1793, 

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.96),Pos,3.0,"[CLS] offers us the sense that on some elemental level , lilia deeply wants to break free of her old life . [SEP]"
,,,,


<IPython.core.display.HTML object>
Raw review: might be best forgotten 
GT target: 0


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 13.61it/s]


word attr tensor([ 0.8426,  0.1311, -0.3086, -0.1810, -0.1244, -0.3597], device='cuda:0')
conti attr [tensor(0.8426, device='cuda:0'), tensor(0.1311, device='cuda:0'), tensor(-0.3086, device='cuda:0'), tensor(-0.1810, device='cuda:0'), tensor(-0.1244, device='cuda:0'), tensor(-0.3597, device='cuda:0')]
detokenized ['[CLS]', 'might', 'be', 'best', 'forgotten', '[SEP]']
len conti_raw 6
conti_raw ['[CLS]', 'might', 'be', 'best', 'forgotten', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.07),Neg,0.0,[CLS] might be best forgotten [SEP]
,,,,




<IPython.core.display.HTML object>
Raw review: a substantial arc of change that does n't produce any real transformation 
GT target: 0


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00,  8.97it/s]


word attr tensor([-0.2711,  0.0711, -0.0034,  0.0835,  0.0884, -0.0485, -0.3474, -0.2208,
         0.5955,  0.1691, -0.4476,  0.0936, -0.2256, -0.0106, -0.0392,  0.3000],
       device='cuda:0')
conti attr [tensor(-0.2711, device='cuda:0'), tensor(0.0711, device='cuda:0'), tensor(-0.0034, device='cuda:0'), tensor(0.0835, device='cuda:0'), tensor(0.0884, device='cuda:0'), tensor(-0.0485, device='cuda:0'), tensor(-0.3474, device='cuda:0'), tensor(-0.2208, device='cuda:0'), tensor(-0.0326, device='cuda:0'), tensor(0.0936, device='cuda:0'), tensor(-0.2256, device='cuda:0'), tensor(-0.0106, device='cuda:0'), tensor(-0.0392, device='cuda:0'), tensor(0.3000, device='cuda:0')]
detokenized ['[CLS]', 'a', 'substantial', 'arc', 'of', 'change', 'that', 'does', 'n', "'", 't', 'produce', 'any', 'real', 'transformation', '[SEP]']
len conti_raw 14
conti_raw ['[CLS]', 'a', 'substantial', 'arc', 'of', 'change', 'that', 'does', "n't", 'produce', 'any', 'real', 'transformation', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.04),Neg,-0.21,[CLS] a substantial arc of change that does n't produce any real transformation [SEP]
,,,,


<IPython.core.display.HTML object>
Raw review: unadorned 
GT target: 0


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 15.11it/s]


word attr tensor([-8.6352e-01,  8.1038e-04,  2.9264e-01,  3.4074e-01,  2.2933e-01],
       device='cuda:0')
conti attr [tensor(-0.8635, device='cuda:0'), tensor(0.2437, device='cuda:0'), tensor(0.2293, device='cuda:0')]
detokenized ['[CLS]', 'una', 'dor', 'ned', '[SEP]']
len conti_raw 3
conti_raw ['[CLS]', 'unadorned', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.06),Neg,0.0,[CLS] unadorned [SEP]
,,,,




<IPython.core.display.HTML object>
Raw review: this is the kind of movie that you only need to watch for about thirty seconds before you say to yourself , ` ah , yes , here we have a bad , bad , bad movie . ' 
GT target: 0


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:02<00:00,  4.49it/s]


word attr tensor([ 0.2091,  0.0083, -0.1521,  0.0018, -0.2665, -0.1561,  0.0346,  0.0381,
         0.2276,  0.0772, -0.0652, -0.0588,  0.1040, -0.0516,  0.0293, -0.1113,
         0.0329,  0.0694,  0.0472,  0.0258, -0.1787,  0.1736,  0.0627,  0.0536,
        -0.1430, -0.2884,  0.0383, -0.3550, -0.0553, -0.0167,  0.2365, -0.0527,
         0.3283,  0.1910, -0.0869, -0.2162, -0.0053, -0.0676, -0.2507, -0.2099,
         0.2488], device='cuda:0')
conti attr [tensor(0.2091, device='cuda:0'), tensor(0.0083, device='cuda:0'), tensor(-0.1521, device='cuda:0'), tensor(0.0018, device='cuda:0'), tensor(-0.2665, device='cuda:0'), tensor(-0.1561, device='cuda:0'), tensor(0.0346, device='cuda:0'), tensor(0.0381, device='cuda:0'), tensor(0.2276, device='cuda:0'), tensor(0.0772, device='cuda:0'), tensor(-0.0652, device='cuda:0'), tensor(-0.0588, device='cuda:0'), tensor(0.1040, device='cuda:0'), tensor(-0.0516, device='cuda:0'), tensor(0.0293, device='cuda:0'), tensor(-0.1113, device='cuda:0'), tensor(0

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.07),Neg,-0.55,"[CLS] this is the kind of movie that you only need to watch for about thirty seconds before you say to yourself , ` ah , yes , here we have a bad , bad , bad movie . ' [SEP]"
,,,,


<IPython.core.display.HTML object>
Raw review: feel sanitised and stagey 
GT target: 0


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 10.60it/s]


word attr tensor([-0.2562, -0.7082, -0.1079,  0.1549,  0.5160,  0.0356, -0.0705, -0.0507,
         0.3494], device='cuda:0')
conti attr [tensor(-0.2562, device='cuda:0'), tensor(-0.7082, device='cuda:0'), tensor(0.2698, device='cuda:0'), tensor(0.0356, device='cuda:0'), tensor(-0.0606, device='cuda:0'), tensor(0.3494, device='cuda:0')]
detokenized ['[CLS]', 'feel', 'san', 'itis', 'ed', 'and', 'stage', 'y', '[SEP]']
len conti_raw 6
conti_raw ['[CLS]', 'feel', 'sanitised', 'and', 'stagey', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.03),Neg,-0.14,[CLS] feel sanitised and stagey [SEP]
,,,,




<IPython.core.display.HTML object>
Raw review: replete with stereotypical familial quandaries 
GT target: 0


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00,  9.02it/s]


word attr tensor([ 0.8313, -0.1048,  0.1184, -0.0543, -0.0492, -0.4081, -0.1582, -0.0224,
        -0.1420, -0.1898,  0.1124,  0.1043, -0.0741,  0.0364], device='cuda:0')
conti attr [tensor(0.8313, device='cuda:0'), tensor(0.0068, device='cuda:0'), tensor(-0.0543, device='cuda:0'), tensor(-0.2286, device='cuda:0'), tensor(-0.1530, device='cuda:0'), tensor(0.0171, device='cuda:0'), tensor(0.0364, device='cuda:0')]
detokenized ['[CLS]', 'rep', 'lete', 'with', 'stereo', 'typical', 'fa', 'mi', 'lia', 'l', 'quan', 'dar', 'ies', '[SEP]']
len conti_raw 7
conti_raw ['[CLS]', 'replete', 'with', 'stereotypical', 'familial', 'quandaries', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.08),Neg,-0.0,[CLS] replete with stereotypical familial quandaries [SEP]
,,,,


<IPython.core.display.HTML object>
Raw review: quirky comedy 
GT target: 1


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00,  9.65it/s]


word attr tensor([ 0.4416, -0.6267,  0.0487, -0.3794,  0.5158], device='cuda:0')
conti attr [tensor(0.4416, device='cuda:0'), tensor(-0.2890, device='cuda:0'), tensor(-0.3794, device='cuda:0'), tensor(0.5158, device='cuda:0')]
detokenized ['[CLS]', 'qui', 'rky', 'comedy', '[SEP]']
len conti_raw 4
conti_raw ['[CLS]', 'quirky', 'comedy', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.97),Neg,-0.0,[CLS] quirky comedy [SEP]
,,,,


<IPython.core.display.HTML object>
Raw review: unrewarding 
GT target: 0


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 11.45it/s]


word attr tensor([-0.8901,  0.1814,  0.2651,  0.2774,  0.1661], device='cuda:0')
conti attr [tensor(-0.8901, device='cuda:0'), tensor(0.2503, device='cuda:0'), tensor(0.1661, device='cuda:0')]
detokenized ['[CLS]', 'un', 'rew', 'arding', '[SEP]']
len conti_raw 3
conti_raw ['[CLS]', 'unrewarding', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.14),Neg,0.0,[CLS] unrewarding [SEP]
,,,,


<IPython.core.display.HTML object>
Raw review: old-hat 
GT target: 0


Kernel Shap attribution: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00,  9.35it/s]


word attr tensor([ 0.2989, -0.8318, -0.0988,  0.2472,  0.3845], device='cuda:0')
conti attr [tensor(0.2989, device='cuda:0'), tensor(-0.1091, device='cuda:0'), tensor(0.3845, device='cuda:0')]
detokenized ['[CLS]', 'old', '-', 'hat', '[SEP]']
len conti_raw 3
conti_raw ['[CLS]', 'old-hat', '[SEP]']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.05),Neg,0.0,[CLS] old-hat [SEP]
,,,,


<IPython.core.display.HTML object>


In [9]:
file_name_base = 'Kernel_SHAP'

In [10]:
save_info(idxs, sst2_data_raw, targets, model_out_list, raw_attr_list, conti_attr_list, raw_input_list, 
          fname=f'../MethodOutputs/{file_name_base}_out.pkl')

{'indices': [27798,
  28244,
  48083,
  8795,
  91,
  20682,
  59890,
  41763,
  19795,
  9957,
  28334,
  55664,
  16438,
  18031,
  9297,
  66913,
  9289,
  6789,
  25294,
  33294,
  29699,
  41960,
  6554,
  8772,
  17921,
  8027,
  49091,
  54186,
  15080,
  22382,
  23856,
  9464,
  23831,
  59434,
  17862,
  24584,
  26216,
  63094,
  55787,
  3993,
  25463,
  17540,
  51128,
  46224,
  32656,
  30105,
  28646,
  17011,
  7812,
  48236],
 'raw_data': ['its oscar nomination ',
  'shenanigans and slapstick ',
  'an unsettling sight , ',
  'the climactic hourlong cricket match ',
  'alternating between facetious comic parody and pulp melodrama , this smart-aleck movie ... tosses around some intriguing questions about the difference between human and android life ',
  'to be a part of that elusive adult world ',
  'emotional power ',
  'reminds you of why animation is such a perfect medium for children , because of the way it allows the mind to enter and accept another world ',
  'un

In [11]:
download_HTML(cur_file_name=f'{file_name_base}_SST2_BERT.ipynb',
              out_file_name=f'{file_name_base}_SST2_BERT.html')

In [12]:
torch.cuda.get_device_name()

'NVIDIA GeForce RTX 3090'