In [1]:
import sys, time, pickle, torch
sys.path.insert(0, '../../Models')
sys.path.insert(0, '../../Utils')
sys.path.insert(0, '../../Preprocess')
import numpy as np
import pandas as pd
from preload_models import get_sst2_tok_n_model
from _utils import sample_random_glue_sst2, get_continuation_mapping, \
                    get_continuous_attributions, get_continuous_raw_inputs, \
                    attr_normalizing_func, \
                    collect_info_for_metric, save_info

In [2]:
sst2_data_raw, targets, idxs = sample_random_glue_sst2()

Reusing dataset glue (/home/user/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


  0%|          | 0/3 [00:00<?, ?it/s]

Loading cached processed dataset at /home/user/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-3b24abff24d1d8c0.arrow
Loading cached processed dataset at /home/user/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-5960909ab3834668.arrow


In [3]:
tokenizer, model = get_sst2_tok_n_model()

In [4]:
#define some containers to save some info
model_out_list, raw_attr_list, conti_attr_list, raw_input_list = [], [], [], []

In [5]:
from captum.attr import KernelShap
from captum.attr import visualization 

In [7]:
# lime = Lime(forward_func=model.forward)
ks   = KernelShap(model)

In [8]:
def generate_record(raw_review, target):
    #tokenizer operations
    tokenized = tokenizer(raw_review, truncation=True, return_offsets_mapping=True)
    offset_mapping = tokenized['offset_mapping']
    conti_map = get_continuation_mapping(offset_mapping)
    input_ids = torch.tensor(tokenized['input_ids']).unsqueeze(0)
    detokenized = [t.replace('Ġ', '') for t in tokenizer.convert_ids_to_tokens(input_ids[0])]
    
    #feeding input forward 
    input_emb = model.get_embeddings(input_ids)
    pred_prob = model(input_emb).item()
    
    #categorizing results
    pred_class = 'Pos' if pred_prob > 0.5 else 'Neg' 
    true_class = 'Pos' if target > 0.5 else 'Neg' 
    
    #attribution algorithm working
    attribution = ks.attribute(input_emb, n_samples=2000, perturbations_per_eval=200, show_progress=True)
    word_attributions = attribution.squeeze(0).sum(dim=1)
    word_attributions /= torch.norm(word_attributions)
    word_attributions = attr_normalizing_func(word_attributions)
    attr_score = torch.sum(word_attributions)
    attr_class = 'Pos' if attr_score > 0.5 else 'Neg'
    convergence_score = None
    
    
    #re-organizing tensors and arrays because words get split down
    conti_attr = get_continuous_attributions(conti_map, word_attributions)
    raw_input = get_continuous_raw_inputs(conti_map, detokenized)

#     print(f'word attributions {word_attributions}')
#     print(f'pred_prob {pred_prob}')
#     print(f'pred_class {pred_class}')
#     print(f'true_class {true_class}')
#     print(f'attribution {attribution}')
#     print(f'attr_class {attr_class}')
#     print(f'attr_score {attr_score}')
#     print(f'raw_input {raw_input}')

        
#     collect info for metrics later
    collect_info_for_metric(model_out_list, pred_prob, raw_attr_list, attribution, conti_attr_list, conti_attr, raw_input_list, raw_input)
        
    
    visual_record = visualization.VisualizationDataRecord(word_attributions=conti_attr,
                                                         pred_prob=pred_prob,
                                                         pred_class=pred_class,
                                                         true_class=true_class,
                                                         attr_class=attr_class,
                                                         attr_score=attr_score,
                                                         raw_input=raw_input,
                                                         convergence_score=convergence_score)
        
        
    return visual_record
      
    

In [9]:
for i, (datum_raw, target) in enumerate(zip(sst2_data_raw, targets), start=1):
    print(f'Raw review: {datum_raw}')
    print(f'GT target: {target}')
    visual_record=generate_record(datum_raw, target)
    print(visualization.visualize_text([visual_record]))
   

Raw review: its oscar nomination 
GT target: 1


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:10<00:00,  1.02s/it]


attr dtype torch.float32
word attr tensor([ 1.0000,  0.2784, -0.6545, -1.0000, -0.7009, -0.6343,  0.2675])
conti attr [tensor(1.), tensor(0.2784), tensor(-1.6545), tensor(-0.7009), tensor(-0.6343), tensor(0.2675)]
detokenized ['<s>', 'its', 'o', 'scar', 'nomination', '', '</s>']
len conti_raw 6
conti_raw ['<s>', 'its', 'oscar', 'nomination', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (1.00),Neg,-1.44,#s its oscar nomination #/s
,,,,




<IPython.core.display.HTML object>
Raw review: shenanigans and slapstick 
GT target: 1


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:14<00:00,  1.43s/it]


attr dtype torch.float32
word attr tensor([ 0.3274,  1.0000, -0.1998, -0.5437, -0.3201,  0.2680, -0.3575, -1.0000,
         0.6524,  0.2469])
conti attr [tensor(0.3274), tensor(-0.0636), tensor(0.2680), tensor(-1.3575), tensor(0.6524), tensor(0.2469)]
detokenized ['<s>', 's', 'hen', 'an', 'igans', 'and', 'slap', 'stick', '', '</s>']
len conti_raw 6
conti_raw ['<s>', 'shenanigans', 'and', 'slapstick', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (1.00),Neg,0.07,#s shenanigans and slapstick #/s
,,,,


<IPython.core.display.HTML object>
Raw review: an unsettling sight , 
GT target: 0


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:09<00:00,  1.02it/s]


attr dtype torch.float32
word attr tensor([ 0.5727, -0.1701, -0.0722, -0.4545,  0.3485,  1.0000, -1.0000])
conti attr [tensor(0.5727), tensor(-0.1701), tensor(-0.0722), tensor(-0.4545), tensor(0.3485), tensor(1.), tensor(-1.)]
detokenized ['<s>', 'an', 'unsettling', 'sight', ',', '', '</s>']
len conti_raw 7
conti_raw ['<s>', 'an', 'unsettling', 'sight', ',', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.01),Neg,0.22,"#s an unsettling sight , #/s"
,,,,




<IPython.core.display.HTML object>
Raw review: the climactic hourlong cricket match 
GT target: 1


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:14<00:00,  1.44s/it]


attr dtype torch.float32
word attr tensor([ 0.1660,  0.6161, -1.0000,  1.0000, -0.0201,  0.7464, -0.6447, -0.7888,
        -0.5329,  0.8344])
conti attr [tensor(0.1660), tensor(0.6161), tensor(0.), tensor(0.7263), tensor(-0.6447), tensor(-0.7888), tensor(-0.5329), tensor(0.8344)]
detokenized ['<s>', 'the', 'clim', 'actic', 'hour', 'long', 'cricket', 'match', '', '</s>']
len conti_raw 8
conti_raw ['<s>', 'the', 'climactic', 'hourlong', 'cricket', 'match', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (1.00),Neg,0.38,#s the climactic hourlong cricket match #/s
,,,,




<IPython.core.display.HTML object>
Raw review: alternating between facetious comic parody and pulp melodrama , this smart-aleck movie ... tosses around some intriguing questions about the difference between human and android life 
GT target: 1


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:55<00:00,  5.56s/it]


attr dtype torch.float32
word attr tensor([ 1.0000, -0.5082, -0.3429, -0.3895, -0.4550, -0.9932, -0.3514, -0.4625,
        -0.4907,  0.0046,  0.0164, -1.0000, -0.4243, -0.6827, -0.6346, -0.4877,
         0.0134,  0.0846, -0.9600,  0.0183, -0.3586, -0.4504, -0.2494, -0.8272,
         0.0010,  0.0707, -0.6796, -0.0363,  0.0688,  0.0593, -0.5592,  0.0328,
         0.0541, -0.3760, -0.4554, -0.6743, -0.1327])
conti attr [tensor(1.), tensor(-0.8510), tensor(-0.3895), tensor(-1.4483), tensor(-0.3514), tensor(-0.4625), tensor(-0.4907), tensor(0.0046), tensor(-1.4079), tensor(-0.6827), tensor(-0.6346), tensor(-1.3497), tensor(0.0183), tensor(-0.3586), tensor(-0.6999), tensor(-0.8272), tensor(0.0010), tensor(0.0707), tensor(-0.6796), tensor(-0.0363), tensor(0.0688), tensor(0.0593), tensor(-0.5592), tensor(0.0328), tensor(0.0541), tensor(-0.3760), tensor(-0.4554), tensor(-0.6743), tensor(-0.1327)]
detokenized ['<s>', 'altern', 'ating', 'between', 'facet', 'ious', 'comic', 'parody', 'and', 'pulp'

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (1.00),Neg,-11.56,"#s alternating between facetious comic parody and pulp melodrama , this smart-aleck movie ... tosses around some intriguing questions about the difference between human and android life #/s"
,,,,




<IPython.core.display.HTML object>
Raw review: to be a part of that elusive adult world 
GT target: 1


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:16<00:00,  1.66s/it]


attr dtype torch.float32
word attr tensor([-1.0000,  0.3724,  0.5226, -0.1462,  0.3356,  0.5951, -0.6349,  0.4762,
         0.2690,  0.6604,  1.0000, -0.0533])
conti attr [tensor(-1.), tensor(0.3724), tensor(0.5226), tensor(-0.1462), tensor(0.3356), tensor(0.5951), tensor(-0.6349), tensor(0.4762), tensor(0.2690), tensor(0.6604), tensor(1.), tensor(-0.0533)]
detokenized ['<s>', 'to', 'be', 'a', 'part', 'of', 'that', 'elusive', 'adult', 'world', '', '</s>']
len conti_raw 12
conti_raw ['<s>', 'to', 'be', 'a', 'part', 'of', 'that', 'elusive', 'adult', 'world', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (1.00),Pos,2.4,#s to be a part of that elusive adult world #/s
,,,,


<IPython.core.display.HTML object>
Raw review: emotional power 
GT target: 1


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:08<00:00,  1.17it/s]


attr dtype torch.float32
word attr tensor([-1.0000,  1.0000, -0.1323, -0.3114, -0.7308,  0.6924])
conti attr [tensor(-1.), tensor(0.8677), tensor(-0.3114), tensor(-0.7308), tensor(0.6924)]
detokenized ['<s>', 'em', 'otional', 'power', '', '</s>']
len conti_raw 5
conti_raw ['<s>', 'emotional', 'power', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (1.00),Neg,-0.48,#s emotional power #/s
,,,,




<IPython.core.display.HTML object>
Raw review: reminds you of why animation is such a perfect medium for children , because of the way it allows the mind to enter and accept another world 
GT target: 1


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:47<00:00,  4.79s/it]


attr dtype torch.float32
word attr tensor([ 0.2683, -0.2372,  0.3891, -0.3485, -0.1214, -0.5584,  0.6380, -0.9796,
         0.1722,  0.5153,  0.2854, -0.3466, -0.7322,  1.0000,  0.4542, -0.1678,
         0.2246, -0.4732, -0.2011,  0.3632,  0.0736, -0.0257,  0.1866, -1.0000,
        -0.5275,  0.4589,  0.4325,  0.2833,  0.2892, -0.1364, -0.7283,  0.4037])
conti attr [tensor(0.2683), tensor(-0.1967), tensor(-0.1214), tensor(-0.5584), tensor(0.6380), tensor(-0.9796), tensor(0.1722), tensor(0.5153), tensor(0.2854), tensor(-0.3466), tensor(-0.7322), tensor(1.), tensor(0.4542), tensor(-0.1678), tensor(0.2246), tensor(-0.4732), tensor(-0.2011), tensor(0.3632), tensor(0.0736), tensor(-0.0257), tensor(0.1866), tensor(-1.), tensor(-0.5275), tensor(0.4589), tensor(0.4325), tensor(0.2833), tensor(0.2892), tensor(-0.1364), tensor(-0.7283), tensor(0.4037)]
detokenized ['<s>', 'rem', 'ind', 's', 'you', 'of', 'why', 'animation', 'is', 'such', 'a', 'perfect', 'medium', 'for', 'children', ',', 'because',

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (1.00),Neg,-0.15,"#s reminds you of why animation is such a perfect medium for children , because of the way it allows the mind to enter and accept another world #/s"
,,,,




<IPython.core.display.HTML object>
Raw review: unparalleled proportions , writer-director parker 
GT target: 1


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:16<00:00,  1.69s/it]


attr dtype torch.float32
word attr tensor([ 0.3437, -0.0747,  0.0877,  1.0000, -1.0000,  0.6869, -0.3787, -0.6043,
        -0.2512, -0.0725,  0.5389,  0.2061])
conti attr [tensor(0.3437), tensor(0.0130), tensor(1.), tensor(-1.), tensor(-0.2961), tensor(-0.3237), tensor(0.5389), tensor(0.2061)]
detokenized ['<s>', 'un', 'paralleled', 'proportions', ',', 'writer', '-', 'director', 'park', 'er', '', '</s>']
len conti_raw 8
conti_raw ['<s>', 'unparalleled', 'proportions', ',', 'writer-director', 'parker', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (1.00),Neg,0.48,"#s unparalleled proportions , writer-director parker #/s"
,,,,


<IPython.core.display.HTML object>
Raw review: this surprisingly decent flick 
GT target: 1


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:09<00:00,  1.03it/s]


attr dtype torch.float32
word attr tensor([ 0.4010,  1.0000, -1.0000, -0.9665, -0.4801, -0.8841,  0.2490])
conti attr [tensor(0.4010), tensor(1.), tensor(-1.), tensor(-0.9665), tensor(-0.4801), tensor(-0.8841), tensor(0.2490)]
detokenized ['<s>', 'this', 'surprisingly', 'decent', 'flick', '', '</s>']
len conti_raw 7
conti_raw ['<s>', 'this', 'surprisingly', 'decent', 'flick', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (1.00),Neg,-1.68,#s this surprisingly decent flick #/s
,,,,




<IPython.core.display.HTML object>
Raw review: about the best thing you could say about narc is that it 's a rock-solid little genre picture . 
GT target: 1


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:37<00:00,  3.72s/it]


attr dtype torch.float32
word attr tensor([-0.0193,  0.0319,  0.1049, -0.3754,  1.0000, -1.0000, -0.3529,  0.7383,
         0.5017, -0.5345, -0.2777, -0.2025,  0.6378,  0.0518,  0.9739,  0.8634,
         0.5468,  0.3591,  0.0300,  0.1292, -0.2095,  0.0902, -0.4821, -0.6404,
         0.7070])
conti attr [tensor(-0.0193), tensor(0.0319), tensor(0.1049), tensor(-0.3754), tensor(1.), tensor(-1.), tensor(-0.3529), tensor(0.7383), tensor(0.5017), tensor(-0.5345), tensor(-0.2777), tensor(-0.2025), tensor(0.6378), tensor(1.0257), tensor(0.8634), tensor(0.9360), tensor(0.1292), tensor(-0.2095), tensor(0.0902), tensor(-0.4821), tensor(-0.6404), tensor(0.7070)]
detokenized ['<s>', 'about', 'the', 'best', 'thing', 'you', 'could', 'say', 'about', 'narc', 'is', 'that', 'it', "'", 's', 'a', 'rock', '-', 'solid', 'little', 'genre', 'picture', '.', '', '</s>']
len conti_raw 22
conti_raw ['<s>', 'about', 'the', 'best', 'thing', 'you', 'could', 'say', 'about', 'narc', 'is', 'that', 'it', "'s", 'a', 'rock

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (1.00),Pos,2.67,#s about the best thing you could say about narc is that it 's a rock-solid little genre picture . #/s
,,,,


<IPython.core.display.HTML object>
Raw review: the very best 
GT target: 1


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:08<00:00,  1.19it/s]


attr dtype torch.float32
word attr tensor([ 1.0000, -1.0000, -0.4282,  0.5683,  0.4327,  0.3508])
conti attr [tensor(1.), tensor(-1.), tensor(-0.4282), tensor(0.5683), tensor(0.4327), tensor(0.3508)]
detokenized ['<s>', 'the', 'very', 'best', '', '</s>']
len conti_raw 6
conti_raw ['<s>', 'the', 'very', 'best', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (1.00),Pos,0.92,#s the very best #/s
,,,,




<IPython.core.display.HTML object>
Raw review: been modeled on the worst revenge-of-the-nerds clichés the filmmakers could dredge up 
GT target: 0


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:35<00:00,  3.54s/it]


attr dtype torch.float32
word attr tensor([-0.7265, -0.4582, -0.5482, -0.4506, -0.0760, -0.1792, -1.0000, -0.3327,
        -0.1289, -0.6816,  0.1205, -0.4033, -0.3253, -0.2258, -0.2803, -0.1896,
        -0.5255,  0.3509, -0.5931, -0.2857, -0.4782,  1.0000, -0.8297, -0.3993])
conti attr [tensor(-0.7265), tensor(-0.4582), tensor(-0.5482), tensor(-0.4506), tensor(-0.0760), tensor(-0.1792), tensor(-2.9771), tensor(-0.4700), tensor(-0.5255), tensor(0.3509), tensor(-0.5931), tensor(-0.7639), tensor(1.), tensor(-0.8297), tensor(-0.3993)]
detokenized ['<s>', 'been', 'modeled', 'on', 'the', 'worst', 'revenge', '-', 'of', '-', 'the', '-', 'ner', 'ds', 'clich', 'Ã©s', 'the', 'filmmakers', 'could', 'dred', 'ge', 'up', '', '</s>']
len conti_raw 15
conti_raw ['<s>', 'been', 'modeled', 'on', 'the', 'worst', 'revenge-of-the-nerds', 'clichÃ©s', 'the', 'filmmakers', 'could', 'dredge', 'up', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.00),Neg,-7.65,#s been modeled on the worst revenge-of-the-nerds clichÃ©s the filmmakers could dredge up #/s
,,,,


<IPython.core.display.HTML object>
Raw review: tell you 
GT target: 1


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:07<00:00,  1.43it/s]


attr dtype torch.float32
word attr tensor([ 0.1462,  1.0000, -0.6980,  0.1513, -1.0000])
conti attr [tensor(0.1462), tensor(1.), tensor(-0.6980), tensor(0.1513), tensor(-1.)]
detokenized ['<s>', 'tell', 'you', '', '</s>']
len conti_raw 5
conti_raw ['<s>', 'tell', 'you', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.93),Neg,-0.4,#s tell you #/s
,,,,


<IPython.core.display.HTML object>
Raw review: utterly absorbing 
GT target: 1


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:08<00:00,  1.17it/s]


attr dtype torch.float32
word attr tensor([ 1.0000, -0.0578,  0.4449, -0.3522,  0.1492, -1.0000])
conti attr [tensor(1.), tensor(0.3871), tensor(-0.3522), tensor(0.1492), tensor(-1.)]
detokenized ['<s>', 'utter', 'ly', 'absorbing', '', '</s>']
len conti_raw 5
conti_raw ['<s>', 'utterly', 'absorbing', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (1.00),Neg,0.18,#s utterly absorbing #/s
,,,,


<IPython.core.display.HTML object>
Raw review: restate 
GT target: 0


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:07<00:00,  1.38it/s]


attr dtype torch.float32
word attr tensor([-0.0081, -1.0000,  0.4220, -0.1164,  1.0000])
conti attr [tensor(-0.0081), tensor(-0.5780), tensor(-0.1164), tensor(1.)]
detokenized ['<s>', 'rest', 'ate', '', '</s>']
len conti_raw 4
conti_raw ['<s>', 'restate', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.05),Neg,0.3,#s restate #/s
,,,,




<IPython.core.display.HTML object>
Raw review: bears about as much resemblance to the experiences of most battered women as spider-man 
GT target: 0


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:29<00:00,  2.95s/it]


attr dtype torch.float32
word attr tensor([-0.2661,  0.4456, -0.5483, -0.3658,  0.4520, -1.0000, -0.3875, -0.3787,
        -0.8646, -0.5617,  1.0000, -0.0476, -0.4859, -0.2032, -0.6937, -0.3215,
        -0.7128,  0.9225, -0.3222, -0.3193])
conti attr [tensor(-0.2661), tensor(-0.1028), tensor(-0.3658), tensor(0.4520), tensor(-1.), tensor(-0.3875), tensor(-0.3787), tensor(-0.8646), tensor(-0.5617), tensor(1.), tensor(-0.0476), tensor(-0.4859), tensor(-0.2032), tensor(-0.6937), tensor(-0.1118), tensor(-0.3222), tensor(-0.3193)]
detokenized ['<s>', 'b', 'ears', 'about', 'as', 'much', 'resemblance', 'to', 'the', 'experiences', 'of', 'most', 'battered', 'women', 'as', 'spider', '-', 'man', '', '</s>']
len conti_raw 17
conti_raw ['<s>', 'bears', 'about', 'as', 'much', 'resemblance', 'to', 'the', 'experiences', 'of', 'most', 'battered', 'women', 'as', 'spider-man', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.00),Neg,-4.66,#s bears about as much resemblance to the experiences of most battered women as spider-man #/s
,,,,


<IPython.core.display.HTML object>
Raw review: expressively performed 
GT target: 1


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:08<00:00,  1.20it/s]


attr dtype torch.float32
word attr tensor([-1.0000, -0.1612,  0.9828,  1.0000,  0.7861, -0.0726])
conti attr [tensor(-1.), tensor(0.8216), tensor(1.), tensor(0.7861), tensor(-0.0726)]
detokenized ['<s>', 'express', 'ively', 'performed', '', '</s>']
len conti_raw 5
conti_raw ['<s>', 'expressively', 'performed', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (1.00),Pos,1.54,#s expressively performed #/s
,,,,




<IPython.core.display.HTML object>
Raw review: the acting is amateurish , the cinematography is atrocious 
GT target: 0


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:21<00:00,  2.17s/it]


attr dtype torch.float32
word attr tensor([-1.0000e+00,  1.0000e+00,  9.2378e-01,  8.6230e-01,  2.6556e-01,
         8.1396e-01,  3.7552e-01,  4.4780e-01,  9.0640e-01,  2.2590e-01,
        -3.6108e-04,  6.1212e-01,  3.2440e-01,  4.0477e-01,  1.6803e-01])
conti attr [tensor(-1.), tensor(1.), tensor(0.9238), tensor(0.8623), tensor(1.0795), tensor(0.3755), tensor(0.4478), tensor(1.1323), tensor(-0.0004), tensor(0.9365), tensor(0.4048), tensor(0.1680)]
detokenized ['<s>', 'the', 'acting', 'is', 'amateur', 'ish', ',', 'the', 'cinem', 'atography', 'is', 'atro', 'cious', '', '</s>']
len conti_raw 12
conti_raw ['<s>', 'the', 'acting', 'is', 'amateurish', ',', 'the', 'cinematography', 'is', 'atrocious', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.00),Pos,6.33,"#s the acting is amateurish , the cinematography is atrocious #/s"
,,,,


<IPython.core.display.HTML object>
Raw review: solidly constructed 
GT target: 1


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:08<00:00,  1.19it/s]


attr dtype torch.float32
word attr tensor([-1.0000,  1.0000, -0.8826, -0.1446,  0.8447,  0.5026])
conti attr [tensor(-1.), tensor(0.1174), tensor(-0.1446), tensor(0.8447), tensor(0.5026)]
detokenized ['<s>', 'solid', 'ly', 'constructed', '', '</s>']
len conti_raw 5
conti_raw ['<s>', 'solidly', 'constructed', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (1.00),Neg,0.32,#s solidly constructed #/s
,,,,




<IPython.core.display.HTML object>
Raw review: are undermined by the movie 's presentation , which is way too stagy 
GT target: 0


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:25<00:00,  2.59s/it]


attr dtype torch.float32
word attr tensor([ 0.4469, -1.0000, -0.0821,  0.5635,  0.1307,  0.1910, -0.5156,  0.6196,
         1.0000,  0.0160, -0.1458,  0.1770, -0.5625,  0.1852,  0.7991, -0.9728,
        -0.0758, -0.4474])
conti attr [tensor(0.4469), tensor(-1.), tensor(-0.0821), tensor(0.5635), tensor(0.1307), tensor(0.1910), tensor(0.1039), tensor(1.), tensor(0.0160), tensor(-0.1458), tensor(0.1770), tensor(-0.5625), tensor(0.1852), tensor(-0.1737), tensor(-0.0758), tensor(-0.4474)]
detokenized ['<s>', 'are', 'undermined', 'by', 'the', 'movie', "'", 's', 'presentation', ',', 'which', 'is', 'way', 'too', 'stag', 'y', '', '</s>']
len conti_raw 16
conti_raw ['<s>', 'are', 'undermined', 'by', 'the', 'movie', "'s", 'presentation', ',', 'which', 'is', 'way', 'too', 'stagy', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.00),Neg,0.33,"#s are undermined by the movie 's presentation , which is way too stagy #/s"
,,,,


<IPython.core.display.HTML object>
Raw review: a great film 
GT target: 1


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:08<00:00,  1.18it/s]


attr dtype torch.float32
word attr tensor([ 1.0000, -0.2470, -0.3284, -1.0000,  0.3682, -0.8242])
conti attr [tensor(1.), tensor(-0.2470), tensor(-0.3284), tensor(-1.), tensor(0.3682), tensor(-0.8242)]
detokenized ['<s>', 'a', 'great', 'film', '', '</s>']
len conti_raw 6
conti_raw ['<s>', 'a', 'great', 'film', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (1.00),Neg,-1.03,#s a great film #/s
,,,,


<IPython.core.display.HTML object>
Raw review: charm 
GT target: 1


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:07<00:00,  1.38it/s]


attr dtype torch.float32
word attr tensor([ 1.0000, -0.6612, -0.3243,  0.8962, -1.0000])
conti attr [tensor(1.), tensor(-0.9855), tensor(0.8962), tensor(-1.)]
detokenized ['<s>', 'ch', 'arm', '', '</s>']
len conti_raw 4
conti_raw ['<s>', 'charm', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (1.00),Neg,-0.09,#s charm #/s
,,,,




<IPython.core.display.HTML object>
Raw review: this new jangle of noise , mayhem and stupidity 
GT target: 0


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:18<00:00,  1.83s/it]


attr dtype torch.float32
word attr tensor([-1.0000, -0.1781,  0.2631, -0.5244, -0.2693, -0.0845, -0.6526, -0.0094,
        -0.8384, -0.5183, -0.5302, -0.4488,  1.0000])
conti attr [tensor(-1.), tensor(-0.1781), tensor(0.2631), tensor(-0.7937), tensor(-0.0845), tensor(-0.6526), tensor(-0.0094), tensor(-0.8384), tensor(-0.5183), tensor(-0.5302), tensor(-0.4488), tensor(1.)]
detokenized ['<s>', 'this', 'new', 'j', 'angle', 'of', 'noise', ',', 'mayhem', 'and', 'stupidity', '', '</s>']
len conti_raw 12
conti_raw ['<s>', 'this', 'new', 'jangle', 'of', 'noise', ',', 'mayhem', 'and', 'stupidity', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.00),Neg,-3.79,"#s this new jangle of noise , mayhem and stupidity #/s"
,,,,


<IPython.core.display.HTML object>
Raw review: sustains it 
GT target: 1


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:09<00:00,  1.02it/s]


attr dtype torch.float32
word attr tensor([ 0.0118, -0.5686, -0.4574,  0.5786, -1.0000,  1.0000,  0.4488])
conti attr [tensor(0.0118), tensor(-0.4474), tensor(-1.), tensor(1.), tensor(0.4488)]
detokenized ['<s>', 's', 'ust', 'ains', 'it', '', '</s>']
len conti_raw 5
conti_raw ['<s>', 'sustains', 'it', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (1.00),Neg,0.01,#s sustains it #/s
,,,,




<IPython.core.display.HTML object>
Raw review: is so deadly dull that watching the proverbial paint dry would be a welcome improvement . 
GT target: 0


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:28<00:00,  2.80s/it]


attr dtype torch.float32
word attr tensor([-0.9710, -0.4860, -0.4656, -0.6303, -1.0000, -0.1873,  0.6883, -0.0946,
        -0.4777, -0.4002,  0.2970, -0.6943, -0.4494, -0.1424, -0.7258,  1.0000,
        -0.9360, -0.3511, -0.1878])
conti attr [tensor(-0.9710), tensor(-0.4860), tensor(-0.4656), tensor(-0.6303), tensor(-1.), tensor(-0.1873), tensor(0.6883), tensor(-0.0946), tensor(-0.4777), tensor(-0.4002), tensor(0.2970), tensor(-0.6943), tensor(-0.4494), tensor(-0.1424), tensor(-0.7258), tensor(1.), tensor(-0.9360), tensor(-0.3511), tensor(-0.1878)]
detokenized ['<s>', 'is', 'so', 'deadly', 'dull', 'that', 'watching', 'the', 'proverbial', 'paint', 'dry', 'would', 'be', 'a', 'welcome', 'improvement', '.', '', '</s>']
len conti_raw 19
conti_raw ['<s>', 'is', 'so', 'deadly', 'dull', 'that', 'watching', 'the', 'proverbial', 'paint', 'dry', 'would', 'be', 'a', 'welcome', 'improvement', '.', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.00),Neg,-6.21,#s is so deadly dull that watching the proverbial paint dry would be a welcome improvement . #/s
,,,,


<IPython.core.display.HTML object>
Raw review: to accomplish 
GT target: 1


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:07<00:00,  1.34it/s]


attr dtype torch.float32
word attr tensor([ 1.0000, -0.0947, -0.0293,  0.4548, -1.0000])
conti attr [tensor(1.), tensor(-0.0947), tensor(-0.0293), tensor(0.4548), tensor(-1.)]
detokenized ['<s>', 'to', 'accomplish', '', '</s>']
len conti_raw 5
conti_raw ['<s>', 'to', 'accomplish', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.99),Neg,0.33,#s to accomplish #/s
,,,,


<IPython.core.display.HTML object>
Raw review: does n't work . 
GT target: 0


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:10<00:00,  1.10s/it]


attr dtype torch.float32
word attr tensor([ 0.0339, -0.0334,  0.3015,  1.0000, -0.2309, -0.1541, -1.0000,  0.1786])
conti attr [tensor(0.0339), tensor(-0.0334), tensor(1.3015), tensor(-0.2309), tensor(-0.1541), tensor(-1.), tensor(0.1786)]
detokenized ['<s>', 'does', 'n', "'t", 'work', '.', '', '</s>']
len conti_raw 7
conti_raw ['<s>', 'does', "n't", 'work', '.', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.00),Neg,0.1,#s does n't work . #/s
,,,,




<IPython.core.display.HTML object>
Raw review: given it a one-star rating 
GT target: 0


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:14<00:00,  1.41s/it]


attr dtype torch.float32
word attr tensor([-0.3572, -0.1806, -1.0000, -0.0803,  1.0000, -0.4377, -0.2284, -0.2971,
         0.3388,  0.4117])
conti attr [tensor(-0.3572), tensor(-0.1806), tensor(-1.), tensor(-0.0803), tensor(0.3339), tensor(-0.2971), tensor(0.3388), tensor(0.4117)]
detokenized ['<s>', 'given', 'it', 'a', 'one', '-', 'star', 'rating', '', '</s>']
len conti_raw 8
conti_raw ['<s>', 'given', 'it', 'a', 'one-star', 'rating', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.00),Neg,-0.83,#s given it a one-star rating #/s
,,,,




<IPython.core.display.HTML object>
Raw review: ice cube is n't quite out of ripe screwball ideas , but friday after next spreads them pretty thin 
GT target: 0


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:37<00:00,  3.77s/it]


attr dtype torch.float32
word attr tensor([ 0.6924, -0.2138, -0.2505,  0.2884, -0.7924, -0.3839, -0.0883, -0.3653,
        -0.5188,  0.8935,  0.2602, -0.8725,  0.0915,  0.6206,  0.1044, -0.7408,
         0.2366,  0.4069,  0.4151,  0.3788,  0.3577,  1.0000, -0.3170,  0.0042,
        -1.0000])
conti attr [tensor(0.6924), tensor(-0.2138), tensor(-0.2505), tensor(0.2884), tensor(-1.1762), tensor(-0.0883), tensor(-0.3653), tensor(-0.5188), tensor(0.8935), tensor(-0.6123), tensor(0.0915), tensor(0.6206), tensor(0.1044), tensor(-0.5042), tensor(0.4069), tensor(0.4151), tensor(0.3788), tensor(0.3577), tensor(1.), tensor(-0.3170), tensor(0.0042), tensor(-1.)]
detokenized ['<s>', 'ice', 'cube', 'is', 'n', "'t", 'quite', 'out', 'of', 'ripe', 'screw', 'ball', 'ideas', ',', 'but', 'fr', 'iday', 'after', 'next', 'spreads', 'them', 'pretty', 'thin', '', '</s>']
len conti_raw 22
conti_raw ['<s>', 'ice', 'cube', 'is', "n't", 'quite', 'out', 'of', 'ripe', 'screwball', 'ideas', ',', 'but', 'friday', 'aft

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.00),Neg,0.21,"#s ice cube is n't quite out of ripe screwball ideas , but friday after next spreads them pretty thin #/s"
,,,,


<IPython.core.display.HTML object>
Raw review: been trying to forget 
GT target: 0


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:09<00:00,  1.02it/s]


attr dtype torch.float32
word attr tensor([-1.0000,  0.5158,  0.9535, -0.0480,  0.0745,  0.7206,  1.0000])
conti attr [tensor(-1.), tensor(0.5158), tensor(0.9535), tensor(-0.0480), tensor(0.0745), tensor(0.7206), tensor(1.)]
detokenized ['<s>', 'been', 'trying', 'to', 'forget', '', '</s>']
len conti_raw 7
conti_raw ['<s>', 'been', 'trying', 'to', 'forget', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.01),Pos,2.22,#s been trying to forget #/s
,,,,


<IPython.core.display.HTML object>
Raw review: big stars and 
GT target: 1


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:08<00:00,  1.20it/s]


attr dtype torch.float32
word attr tensor([ 0.4053,  1.0000, -1.0000,  0.1715, -0.5310,  0.1530])
conti attr [tensor(0.4053), tensor(1.), tensor(-1.), tensor(0.1715), tensor(-0.5310), tensor(0.1530)]
detokenized ['<s>', 'big', 'stars', 'and', '', '</s>']
len conti_raw 6
conti_raw ['<s>', 'big', 'stars', 'and', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (1.00),Neg,0.2,#s big stars and #/s
,,,,


<IPython.core.display.HTML object>
Raw review: of cheesy dialogue 
GT target: 0


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:08<00:00,  1.18it/s]


attr dtype torch.float32
word attr tensor([-0.6597,  0.7339,  0.7563, -1.0000, -0.0793,  1.0000])
conti attr [tensor(-0.6597), tensor(0.7339), tensor(0.7563), tensor(-1.), tensor(-0.0793), tensor(1.)]
detokenized ['<s>', 'of', 'cheesy', 'dialogue', '', '</s>']
len conti_raw 6
conti_raw ['<s>', 'of', 'cheesy', 'dialogue', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.00),Pos,0.75,#s of cheesy dialogue #/s
,,,,




<IPython.core.display.HTML object>
Raw review: the château is never quite able to overcome the cultural moat surrounding its ludicrous and contrived plot . ' 
GT target: 0


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:40<00:00,  4.07s/it]


attr dtype torch.float32
word attr tensor([ 0.9362, -0.3854, -0.3337, -0.0698,  0.3780,  0.2141, -0.5537, -0.2624,
        -0.0035,  0.2771, -0.0646, -0.2555,  0.0706,  0.0038,  0.1399, -0.6354,
         0.1603,  0.5924, -0.4567, -1.0000, -0.2972,  1.0000, -0.6699, -0.0035,
        -0.4162, -0.1425,  0.0364])
conti attr [tensor(0.9362), tensor(-0.3854), tensor(0.1886), tensor(-0.5537), tensor(-0.2624), tensor(-0.0035), tensor(0.2771), tensor(-0.0646), tensor(-0.2555), tensor(0.0706), tensor(0.0038), tensor(-0.4956), tensor(0.1603), tensor(0.5924), tensor(-0.4567), tensor(-1.), tensor(0.7028), tensor(-0.6699), tensor(-0.0035), tensor(-0.4162), tensor(-0.1425), tensor(0.0364)]
detokenized ['<s>', 'the', 'ch', 'Ã¢', 'te', 'au', 'is', 'never', 'quite', 'able', 'to', 'overcome', 'the', 'cultural', 'mo', 'at', 'surrounding', 'its', 'ludicrous', 'and', 'cont', 'rived', 'plot', '.', "'", '', '</s>']
len conti_raw 22
conti_raw ['<s>', 'the', 'chÃ¢teau', 'is', 'never', 'quite', 'able', 'to', 'ov

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.00),Neg,-1.74,#s the chÃ¢teau is never quite able to overcome the cultural moat surrounding its ludicrous and contrived plot . ' #/s
,,,,


<IPython.core.display.HTML object>
Raw review: principled 
GT target: 1


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:08<00:00,  1.19it/s]


attr dtype torch.float32
word attr tensor([ 0.2513,  1.0000, -1.0000, -0.1010,  0.2007,  0.5303])
conti attr [tensor(0.2513), tensor(-0.1010), tensor(0.2007), tensor(0.5303)]
detokenized ['<s>', 'pr', 'in', 'cipled', '', '</s>']
len conti_raw 4
conti_raw ['<s>', 'principled', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (1.00),Pos,0.88,#s principled #/s
,,,,




<IPython.core.display.HTML object>
Raw review: end it all by stuffing himself into an electric pencil sharpener 
GT target: 0


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:21<00:00,  2.16s/it]


attr dtype torch.float32
word attr tensor([ 1.0000, -0.5282, -0.7054, -0.7574, -0.6512,  0.1426, -0.6338, -0.5221,
        -0.6679, -0.8661, -0.9594, -0.3637, -1.0000, -0.0186,  0.4116])
conti attr [tensor(1.), tensor(-0.5282), tensor(-0.7054), tensor(-0.7574), tensor(-0.6512), tensor(0.1426), tensor(-0.6338), tensor(-0.5221), tensor(-0.6679), tensor(-0.8661), tensor(-0.9594), tensor(-1.3637), tensor(-0.0186), tensor(0.4116)]
detokenized ['<s>', 'end', 'it', 'all', 'by', 'stuffing', 'himself', 'into', 'an', 'electric', 'pencil', 'sharp', 'ener', '', '</s>']
len conti_raw 14
conti_raw ['<s>', 'end', 'it', 'all', 'by', 'stuffing', 'himself', 'into', 'an', 'electric', 'pencil', 'sharpener', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.00),Neg,-6.12,#s end it all by stuffing himself into an electric pencil sharpener #/s
,,,,


<IPython.core.display.HTML object>
Raw review: funniest idea 
GT target: 1


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:09<00:00,  1.02it/s]


attr dtype torch.float32
word attr tensor([ 0.2757,  1.0000, -0.4784,  0.0392, -1.0000, -0.8813, -0.4515])
conti attr [tensor(0.2757), tensor(0.5608), tensor(-1.), tensor(-0.8813), tensor(-0.4515)]
detokenized ['<s>', 'fun', 'n', 'iest', 'idea', '', '</s>']
len conti_raw 5
conti_raw ['<s>', 'funniest', 'idea', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.99),Neg,-1.5,#s funniest idea #/s
,,,,


<IPython.core.display.HTML object>
Raw review: silly and tedious 
GT target: 0


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:09<00:00,  1.00it/s]


attr dtype torch.float32
word attr tensor([ 1.0000, -0.4747, -0.7560,  0.2575,  0.4180, -1.0000,  0.2627])
conti attr [tensor(1.), tensor(-1.2307), tensor(0.2575), tensor(0.4180), tensor(-1.), tensor(0.2627)]
detokenized ['<s>', 's', 'illy', 'and', 'tedious', '', '</s>']
len conti_raw 6
conti_raw ['<s>', 'silly', 'and', 'tedious', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.00),Neg,-0.29,#s silly and tedious #/s
,,,,


<IPython.core.display.HTML object>
Raw review: two surefire , beloved genres 
GT target: 1


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:12<00:00,  1.24s/it]


attr dtype torch.float32
word attr tensor([ 0.2217,  0.5879,  1.0000, -0.3070, -0.6674, -1.0000,  0.7292, -0.5009,
         0.8706])
conti attr [tensor(0.2217), tensor(0.5879), tensor(0.6930), tensor(-0.6674), tensor(-1.), tensor(0.7292), tensor(-0.5009), tensor(0.8706)]
detokenized ['<s>', 'two', 'sure', 'fire', ',', 'beloved', 'genres', '', '</s>']
len conti_raw 8
conti_raw ['<s>', 'two', 'surefire', ',', 'beloved', 'genres', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (1.00),Pos,0.93,"#s two surefire , beloved genres #/s"
,,,,


<IPython.core.display.HTML object>
Raw review: most charmless 
GT target: 0


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:08<00:00,  1.18it/s]


attr dtype torch.float32
word attr tensor([ 0.7607, -0.9188, -0.1212, -1.0000,  1.0000,  0.5716])
conti attr [tensor(0.7607), tensor(-0.9188), tensor(-1.1212), tensor(1.), tensor(0.5716)]
detokenized ['<s>', 'most', 'charm', 'less', '', '</s>']
len conti_raw 5
conti_raw ['<s>', 'most', 'charmless', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.00),Neg,0.29,#s most charmless #/s
,,,,




<IPython.core.display.HTML object>
Raw review: offers us the sense that on some elemental level , lilia deeply wants to break free of her old life . 
GT target: 1


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:39<00:00,  3.92s/it]


attr dtype torch.float32
word attr tensor([ 0.0320,  0.2634, -1.0000,  0.3746, -0.2377,  0.2108, -0.4584, -0.5030,
        -0.3162, -0.0486, -0.1056, -0.6096, -0.7937,  0.0652,  0.2309,  0.6330,
         0.7885, -0.2392, -0.1708, -0.2703, -0.2782,  0.5043,  0.3712,  0.1337,
         0.8490,  1.0000])
conti attr [tensor(0.0320), tensor(-0.7366), tensor(0.3746), tensor(-0.2377), tensor(0.2108), tensor(-0.4584), tensor(-0.5030), tensor(-0.3162), tensor(-0.0486), tensor(-0.1056), tensor(-0.6096), tensor(-0.7285), tensor(0.2309), tensor(0.6330), tensor(0.7885), tensor(-0.2392), tensor(-0.1708), tensor(-0.2703), tensor(-0.2782), tensor(0.5043), tensor(0.3712), tensor(0.1337), tensor(0.8490), tensor(1.)]
detokenized ['<s>', 'off', 'ers', 'us', 'the', 'sense', 'that', 'on', 'some', 'elemental', 'level', ',', 'l', 'ilia', 'deeply', 'wants', 'to', 'break', 'free', 'of', 'her', 'old', 'life', '.', '', '</s>']
len conti_raw 24
conti_raw ['<s>', 'offers', 'us', 'the', 'sense', 'that', 'on', 'some',

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (1.00),Neg,0.43,"#s offers us the sense that on some elemental level , lilia deeply wants to break free of her old life . #/s"
,,,,


<IPython.core.display.HTML object>
Raw review: might be best forgotten 
GT target: 0


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:09<00:00,  1.02it/s]


attr dtype torch.float32
word attr tensor([ 0.9474,  1.0000, -0.0539, -0.2257, -0.6070, -0.4454, -1.0000])
conti attr [tensor(0.9474), tensor(1.), tensor(-0.0539), tensor(-0.2257), tensor(-0.6070), tensor(-0.4454), tensor(-1.)]
detokenized ['<s>', 'might', 'be', 'best', 'forgotten', '', '</s>']
len conti_raw 7
conti_raw ['<s>', 'might', 'be', 'best', 'forgotten', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.00),Neg,-0.38,#s might be best forgotten #/s
,,,,




<IPython.core.display.HTML object>
Raw review: a substantial arc of change that does n't produce any real transformation 
GT target: 0


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:23<00:00,  2.34s/it]


attr dtype torch.float32
word attr tensor([ 1.0000, -0.2251,  0.1946, -0.3686,  0.0204,  0.0534, -0.2313,  0.0664,
         0.0441, -1.0000, -0.3968,  0.0982, -0.0252, -0.2732, -0.3768, -0.2795])
conti attr [tensor(1.), tensor(-0.2251), tensor(0.1946), tensor(-0.3686), tensor(0.0204), tensor(0.0534), tensor(-0.2313), tensor(0.0664), tensor(-0.9559), tensor(-0.3968), tensor(0.0982), tensor(-0.0252), tensor(-0.2732), tensor(-0.3768), tensor(-0.2795)]
detokenized ['<s>', 'a', 'substantial', 'arc', 'of', 'change', 'that', 'does', 'n', "'t", 'produce', 'any', 'real', 'transformation', '', '</s>']
len conti_raw 15
conti_raw ['<s>', 'a', 'substantial', 'arc', 'of', 'change', 'that', 'does', "n't", 'produce', 'any', 'real', 'transformation', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.00),Neg,-1.7,#s a substantial arc of change that does n't produce any real transformation #/s
,,,,


<IPython.core.display.HTML object>
Raw review: unadorned 
GT target: 0


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:08<00:00,  1.15it/s]


attr dtype torch.float32
word attr tensor([ 1.0000, -1.0000, -0.4209, -0.8522,  0.3705, -0.3220])
conti attr [tensor(1.), tensor(-2.2730), tensor(0.3705), tensor(-0.3220)]
detokenized ['<s>', 'un', 'ad', 'orned', '', '</s>']
len conti_raw 4
conti_raw ['<s>', 'unadorned', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Pos (1.00),Neg,-1.22,#s unadorned #/s
,,,,




<IPython.core.display.HTML object>
Raw review: this is the kind of movie that you only need to watch for about thirty seconds before you say to yourself , ` ah , yes , here we have a bad , bad , bad movie . ' 
GT target: 0


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [01:09<00:00,  6.91s/it]


attr dtype torch.float32
word attr tensor([-0.6936, -0.0456, -0.2879, -0.3701, -0.5284, -0.0671, -0.1359,  1.0000,
        -0.2423, -0.2147, -0.9969,  0.1338,  0.3289, -0.0308, -0.5886, -0.5807,
        -0.2481, -0.3777, -0.1175, -0.2869, -0.2500, -0.3473, -0.5214, -0.2559,
        -0.8163, -0.3614,  0.7105, -0.0888, -1.0000, -0.3062, -0.5702, -0.8291,
        -0.6921, -0.5338, -0.4144, -0.4482, -0.7987, -0.4811, -0.7724, -0.1439,
        -0.0821, -0.5708])
conti attr [tensor(-0.6936), tensor(-0.0456), tensor(-0.2879), tensor(-0.3701), tensor(-0.5284), tensor(-0.0671), tensor(-0.1359), tensor(1.), tensor(-0.2423), tensor(-0.2147), tensor(-0.9969), tensor(0.1338), tensor(0.3289), tensor(-0.0308), tensor(-0.5886), tensor(-0.5807), tensor(-0.2481), tensor(-0.3777), tensor(-0.1175), tensor(-0.2869), tensor(-0.2500), tensor(-0.3473), tensor(-0.5214), tensor(-0.2559), tensor(-0.8163), tensor(-0.3614), tensor(0.7105), tensor(-0.0888), tensor(-1.), tensor(-0.3062), tensor(-0.5702), tensor(-0.8

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.00),Neg,-13.92,"#s this is the kind of movie that you only need to watch for about thirty seconds before you say to yourself , ` ah , yes , here we have a bad , bad , bad movie . ' #/s"
,,,,




<IPython.core.display.HTML object>
Raw review: feel sanitised and stagey 
GT target: 0


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:14<00:00,  1.42s/it]


attr dtype torch.float32
word attr tensor([-0.1598,  0.1127, -0.1428, -1.0000, -0.0030,  1.0000,  0.2781, -0.9990,
        -0.5987, -0.9706])
conti attr [tensor(-0.1598), tensor(0.1127), tensor(-1.1458), tensor(1.), tensor(-0.7209), tensor(-0.5987), tensor(-0.9706)]
detokenized ['<s>', 'feel', 'san', 'it', 'ised', 'and', 'stage', 'y', '', '</s>']
len conti_raw 7
conti_raw ['<s>', 'feel', 'sanitised', 'and', 'stagey', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.00),Neg,-2.48,#s feel sanitised and stagey #/s
,,,,




<IPython.core.display.HTML object>
Raw review: replete with stereotypical familial quandaries 
GT target: 0


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:15<00:00,  1.56s/it]


attr dtype torch.float32
word attr tensor([-0.1086,  1.0000, -0.2696, -0.3729, -0.2094,  0.3955, -1.0000, -0.3691,
        -0.4089,  0.5821, -0.2535])
conti attr [tensor(-0.1086), tensor(0.7304), tensor(-0.3729), tensor(-0.2094), tensor(0.3955), tensor(-1.7780), tensor(0.5821), tensor(-0.2535)]
detokenized ['<s>', 're', 'plete', 'with', 'stereotypical', 'familial', 'qu', 'and', 'aries', '', '</s>']
len conti_raw 8
conti_raw ['<s>', 'replete', 'with', 'stereotypical', 'familial', 'quandaries', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.00),Neg,-1.01,#s replete with stereotypical familial quandaries #/s
,,,,


<IPython.core.display.HTML object>
Raw review: quirky comedy 
GT target: 1


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:10<00:00,  1.01s/it]


attr dtype torch.float32
word attr tensor([ 1.0000, -0.2850, -0.8568,  0.2476, -0.3851,  0.2711, -1.0000])
conti attr [tensor(1.), tensor(-0.8941), tensor(-0.3851), tensor(0.2711), tensor(-1.)]
detokenized ['<s>', 'qu', 'ir', 'ky', 'comedy', '', '</s>']
len conti_raw 5
conti_raw ['<s>', 'quirky', 'comedy', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.98),Neg,-1.01,#s quirky comedy #/s
,,,,


<IPython.core.display.HTML object>
Raw review: unrewarding 
GT target: 0


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:09<00:00,  1.03it/s]


attr dtype torch.float32
word attr tensor([-0.3638,  0.3450,  0.6276, -1.0000, -0.9054,  1.0000, -0.6905])
conti attr [tensor(-0.3638), tensor(-0.9327), tensor(1.), tensor(-0.6905)]
detokenized ['<s>', 'un', 're', 'ward', 'ing', '', '</s>']
len conti_raw 4
conti_raw ['<s>', 'unrewarding', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.00),Neg,-0.99,#s unrewarding #/s
,,,,


<IPython.core.display.HTML object>
Raw review: old-hat 
GT target: 0


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:08<00:00,  1.18it/s]


attr dtype torch.float32
word attr tensor([ 0.9203, -0.1649, -1.0000, -0.3694,  1.0000, -0.3732])
conti attr [tensor(0.9203), tensor(-1.5344), tensor(1.), tensor(-0.3732)]
detokenized ['<s>', 'old', '-', 'hat', '', '</s>']
len conti_raw 4
conti_raw ['<s>', 'old-hat', '', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Neg,Neg (0.00),Neg,0.01,#s old-hat #/s
,,,,


<IPython.core.display.HTML object>


In [10]:
save_info(idxs, sst2_data_raw, targets, model_out_list, raw_attr_list, conti_attr_list, raw_input_list, fname='kernel_shap.pkl')

{'indices': [27798,
  28244,
  48083,
  8795,
  91,
  20682,
  59890,
  41763,
  19795,
  9957,
  28334,
  55664,
  16438,
  18031,
  9297,
  66913,
  9289,
  6789,
  25294,
  33294,
  29699,
  41960,
  6554,
  8772,
  17921,
  8027,
  49091,
  54186,
  15080,
  22382,
  23856,
  9464,
  23831,
  59434,
  17862,
  24584,
  26216,
  63094,
  55787,
  3993,
  25463,
  17540,
  51128,
  46224,
  32656,
  30105,
  28646,
  17011,
  7812,
  48236],
 'raw_data': ['its oscar nomination ',
  'shenanigans and slapstick ',
  'an unsettling sight , ',
  'the climactic hourlong cricket match ',
  'alternating between facetious comic parody and pulp melodrama , this smart-aleck movie ... tosses around some intriguing questions about the difference between human and android life ',
  'to be a part of that elusive adult world ',
  'emotional power ',
  'reminds you of why animation is such a perfect medium for children , because of the way it allows the mind to enter and accept another world ',
  'un