In [1]:
import sys
import torch
sys.path.insert(0, '../../Models')
sys.path.insert(0, '../../Utils')
from _utils import  sample_random_glue_stsb, collect_info_for_metric, save_info, \
                    get_continuation_mapping, get_continuous_attributions, get_continuous_raw_inputs,\
                    attr_normalizing_func
from preload_models import get_stsb_tokenizer_n_model

In [2]:
stsb_data_raw, targets, idxs = sample_random_glue_stsb()

Reusing dataset glue (/home/user/.cache/huggingface/datasets/glue/stsb/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


  0%|          | 0/3 [00:00<?, ?it/s]

[ 436  138  931 1462 1491  577  873  899  268  603 1341  648  941  901
  335 1306  234  132 1257 1433 1351 1484  977  146 1275  225  396  757
  949 1151  605 1041  350  788  626   88  599 1080  882  927 1209  647
  486  214  317  895  897   50 1054 1454]


In [3]:
tokenizer, model = get_stsb_tokenizer_n_model()

In [4]:
#define some containers to save some info
model_out_list, raw_attr_list, conti_attr_list, raw_input_list = [], [], [], []

In [5]:
from captum.attr import KernelShap
from captum.attr import visualization 

In [6]:
ks = KernelShap(model)

In [7]:
def generate_record(raw_datum, target): #raw_datum expected to be a tuple/list of 2 sentences
    #tokenizer operations
    tokenized = tokenizer(raw_datum, truncation=True, return_offsets_mapping=True)
    offset_mappings = tokenized['offset_mapping']
    #concatenate the two offset_mappings together because they are fed in together
    conti_map = get_continuation_mapping(offset_mappings[0]) + get_continuation_mapping(offset_mappings[1])
    #change the first input_id of the second sentence to be the last input_id of the 1st sentence (i.e. an [END] token))
    tokenized_input_ids = tokenized['input_ids'][0] + \
                        [tokenized['input_ids'][1][i] if i != 0 else tokenized['input_ids'][0][-1] \
                         for i in range(len(tokenized['input_ids'][1]))]
    input_ids = torch.tensor(tokenized_input_ids).unsqueeze(0)
    detokenized = [t.replace('Ġ', '') for t in tokenizer.convert_ids_to_tokens(input_ids[0])]
    
    #feeding input forward 
    input_emb = model.get_embeddings(input_ids)
    pred_prob = model(input_emb).item()
    print(f'pred_prob {pred_prob*5}')

     #categorizing results
    pred_class = 'Similar' if pred_prob > 0.5 else 'Not Similar' 
    true_class = 'Similar' if target > 2.5 else 'Not Similar' 
    
    #attribution algorithm working
    attribution = ks.attribute(input_emb, n_samples=2000, perturbations_per_eval=200, show_progress=True)
    word_attributions = attribution.squeeze(0).sum(dim=1)
#     word_attributions = attr_normalizing_func(word_attributions)
    word_attributions /= torch.norm(word_attributions)
    attr_score = torch.sum(word_attributions)
    attr_class = 'Similar' if attr_score > 0.5 else 'Not Similar'
    convergence_score = None
    
    
#     #re-organizing tensors and arrays because words get split down
    conti_attr = get_continuous_attributions(conti_map, word_attributions)
    raw_input = get_continuous_raw_inputs(conti_map, detokenized)

#     print(f'word attributions {word_attributions}')
#     print(f'pred_prob {pred_prob}')
#     print(f'pred_class {pred_class}')
#     print(f'true_class {true_class}')
#     print(f'attribution {attribution}')
#     print(f'attr_class {attr_class}')
#     print(f'attr_score {attr_score}')
#     print(f'raw_input {raw_input}')

        
# #     collect info for metrics later
    collect_info_for_metric(model_out_list, pred_prob, raw_attr_list, attribution, conti_attr_list, conti_attr, raw_input_list, raw_input)
        
    
    visual_record = visualization.VisualizationDataRecord(word_attributions=conti_attr,
                                                         pred_prob=pred_prob,
                                                         pred_class=pred_class,
                                                         true_class=true_class,
                                                         attr_class=attr_class,
                                                         attr_score=attr_score,
                                                         raw_input=raw_input,
                                                         convergence_score=convergence_score)
        
        
    return visual_record
      
    

In [8]:
for i, (datum_raw, target) in enumerate(zip(stsb_data_raw, targets), start=1):
#     example_1 = 'The man cut down a tree with an axe.'
#     example_2 = 'A man chops down a tree with an axe.'
#     datum_raw, target = [example_1, example_1], 1
    print(f'Raw review: {datum_raw}') #datum expected to be a list of 2 sentences
    print(f'GT target: {target}')
    visual_record=generate_record(datum_raw, target)
    print(visualization.visualize_text([visual_record]))   



Raw review: ['Two dogs are sniffing something in the rocks.', 'Two dogs running down a path in the woods.']
GT target: 1.600000023841858
pred_prob 2.0290477573871613


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:37<00:00,  3.72s/it]


word attr tensor([-0.1765,  0.0470, -0.2364, -0.4286, -0.0180,  0.1176, -0.1075, -0.1090,
         0.4784,  0.0747, -0.1948,  0.2106,  0.1298,  0.0059, -0.1594,  0.1279,
         0.4817, -0.1669,  0.0591, -0.0524,  0.1453,  0.0712,  0.1467,  0.0137])
conti attr [tensor(-0.1765), tensor(0.0470), tensor(-0.2364), tensor(-0.4286), tensor(0.0997), tensor(-0.1075), tensor(-0.1090), tensor(0.4784), tensor(-0.1201), tensor(0.2106), tensor(0.1298), tensor(0.0059), tensor(-0.1594), tensor(0.1279), tensor(0.4817), tensor(-0.1669), tensor(0.0591), tensor(-0.0524), tensor(0.1453), tensor(0.2178), tensor(0.0137)]
detokenized ['<s>', 'Two', 'dogs', 'are', 'sniff', 'ing', 'something', 'in', 'the', 'rocks', '.', '</s>', '</s>', 'Two', 'dogs', 'running', 'down', 'a', 'path', 'in', 'the', 'woods', '.', '</s>']
len conti_raw 21
conti_raw ['<s>', 'Two', 'dogs', 'are', 'sniffing', 'something', 'in', 'the', 'rocks.', '</s>', '</s>', 'Two', 'dogs', 'running', 'down', 'a', 'path', 'in', 'the', 'woods.', '</s>

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Not Similar,Not Similar (0.41),Not Similar,0.46,#s Two dogs are sniffing something in the rocks. #/s #/s Two dogs running down a path in the woods. #/s
,,,,




<IPython.core.display.HTML object>
Raw review: ['A woman is slicing a potato.', 'A woman is slicing carrot.']
GT target: 2.5
pred_prob 2.1180687844753265


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:25<00:00,  2.55s/it]


word attr tensor([ 0.1530,  0.1624, -0.0148, -0.3530,  0.1403,  0.0859,  0.1151, -0.0245,
        -0.0734, -0.1577,  0.6133,  0.0306,  0.1376,  0.1186, -0.1104,  0.1626,
        -0.5527])
conti attr [tensor(0.1530), tensor(0.1624), tensor(-0.0148), tensor(-0.3530), tensor(0.1403), tensor(0.0859), tensor(0.0906), tensor(-0.0734), tensor(-0.1577), tensor(0.6133), tensor(0.0306), tensor(0.1376), tensor(0.1186), tensor(0.0522), tensor(-0.5527)]
detokenized ['<s>', 'A', 'woman', 'is', 'slicing', 'a', 'potato', '.', '</s>', '</s>', 'A', 'woman', 'is', 'slicing', 'carrot', '.', '</s>']
len conti_raw 15
conti_raw ['<s>', 'A', 'woman', 'is', 'slicing', 'a', 'potato.', '</s>', '</s>', 'A', 'woman', 'is', 'slicing', 'carrot.', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Not Similar,Not Similar (0.42),Not Similar,0.43,#s A woman is slicing a potato. #/s #/s A woman is slicing carrot. #/s
,,,,




<IPython.core.display.HTML object>
Raw review: ['It was before the season began because Amy saw the woman before the Silence took her.', "The exact timeline hasn't been listed yet, but there are a few assumptions we can make."]
GT target: 0.20000000298023224
pred_prob 0.16538595780730247


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [01:00<00:00,  6.10s/it]


word attr tensor([ 0.2854,  0.0705, -0.0839,  0.0988, -0.1507, -0.0565,  0.1482, -0.0893,
        -0.1408,  0.1634,  0.1382,  0.0219,  0.0424, -0.0630,  0.1835,  0.1004,
        -0.3094, -0.1456,  0.2376,  0.0809, -0.0264,  0.0797,  0.2760,  0.0398,
        -0.1349,  0.1267, -0.0832, -0.0858, -0.0748, -0.3532, -0.0803, -0.1585,
        -0.1955, -0.2267, -0.1916,  0.2214, -0.0352,  0.1522, -0.0738, -0.2121])
conti attr [tensor(0.2854), tensor(0.0705), tensor(-0.0839), tensor(0.0988), tensor(-0.1507), tensor(-0.0565), tensor(0.1482), tensor(-0.0893), tensor(-0.1408), tensor(0.1634), tensor(0.1382), tensor(0.0219), tensor(0.0424), tensor(-0.0630), tensor(0.1835), tensor(0.1004), tensor(-0.4550), tensor(0.2376), tensor(0.0809), tensor(-0.0264), tensor(0.0797), tensor(0.2760), tensor(-0.0951), tensor(0.1267), tensor(-0.0832), tensor(-0.1606), tensor(-0.3532), tensor(-0.0803), tensor(-0.1585), tensor(-0.1955), tensor(-0.2267), tensor(-0.1916), tensor(0.2214), tensor(-0.0352), tensor(0.0785),

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Not Similar,Not Similar (0.03),Not Similar,-0.5,"#s It was before the season began because Amy saw the woman before the Silence took her. #/s #/s The exact timeline hasn't been listed yet, but there are a few assumptions we can make. #/s"
,,,,




<IPython.core.display.HTML object>
Raw review: ['Protests for kidnapped girls banned in Nigerian capital', "Scores 'killed' in Boko Haram raid in Nigeria"]
GT target: 1.0
pred_prob 0.34498412162065506


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:35<00:00,  3.55s/it]


word attr tensor([-0.8657, -0.0510,  0.1201, -0.0091,  0.1886, -0.0282, -0.0080, -0.0224,
         0.1560,  0.1770,  0.2627,  0.0268, -0.0627,  0.0513, -0.1228,  0.0907,
        -0.0759, -0.0998,  0.1006,  0.0454,  0.0060,  0.0884, -0.0344,  0.0664])
conti attr [tensor(-0.8657), tensor(0.0690), tensor(-0.0091), tensor(0.1886), tensor(-0.0282), tensor(-0.0080), tensor(-0.0224), tensor(0.1560), tensor(0.1770), tensor(0.2627), tensor(0.0268), tensor(-0.0113), tensor(-0.1080), tensor(-0.0998), tensor(0.1006), tensor(0.0454), tensor(0.0060), tensor(0.0884), tensor(-0.0344), tensor(0.0664)]
detokenized ['<s>', 'Prot', 'ests', 'for', 'kidnapped', 'girls', 'banned', 'in', 'Nigerian', 'capital', '</s>', '</s>', 'Sc', 'ores', "'", 'killed', "'", 'in', 'Boko', 'Haram', 'raid', 'in', 'Nigeria', '</s>']
len conti_raw 20
conti_raw ['<s>', 'Protests', 'for', 'kidnapped', 'girls', 'banned', 'in', 'Nigerian', 'capital', '</s>', '</s>', 'Scores', "'killed'", 'in', 'Boko', 'Haram', 'raid', 'in', 'Nigeria

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Not Similar,Not Similar (0.07),Not Similar,-0.0,#s Protests for kidnapped girls banned in Nigerian capital #/s #/s Scores 'killed' in Boko Haram raid in Nigeria #/s
,,,,




<IPython.core.display.HTML object>
Raw review: ['Oil falls in Asian trade', 'Oil prices down in Asian trade']
GT target: 5.0
pred_prob 4.716218113899231


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:21<00:00,  2.15s/it]


word attr tensor([ 0.3211,  0.1527,  0.0216,  0.1530,  0.4553, -0.0942, -0.0131, -0.3128,
        -0.4303,  0.0914,  0.2093,  0.3010,  0.2474,  0.3725, -0.0873])
conti attr [tensor(0.3211), tensor(0.1527), tensor(0.0216), tensor(0.1530), tensor(0.4553), tensor(-0.0942), tensor(-0.0131), tensor(-0.3128), tensor(-0.4303), tensor(0.0914), tensor(0.2093), tensor(0.3010), tensor(0.2474), tensor(0.3725), tensor(-0.0873)]
detokenized ['<s>', 'Oil', 'falls', 'in', 'Asian', 'trade', '</s>', '</s>', 'Oil', 'prices', 'down', 'in', 'Asian', 'trade', '</s>']
len conti_raw 15
conti_raw ['<s>', 'Oil', 'falls', 'in', 'Asian', 'trade', '</s>', '</s>', 'Oil', 'prices', 'down', 'in', 'Asian', 'trade', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Similar,Similar (0.94),Similar,1.39,#s Oil falls in Asian trade #/s #/s Oil prices down in Asian trade #/s
,,,,




<IPython.core.display.HTML object>
Raw review: ['There are people out on the street.', 'People are out on the street.']
GT target: 5.0
pred_prob 4.79938268661499


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:27<00:00,  2.76s/it]


word attr tensor([ 0.6793, -0.0478,  0.0380,  0.1205,  0.3571,  0.0925,  0.1023, -0.1667,
         0.2909, -0.1985, -0.3636,  0.1671,  0.0147,  0.1154, -0.0573,  0.1638,
        -0.0055, -0.0879,  0.1023])
conti attr [tensor(0.6793), tensor(-0.0478), tensor(0.0380), tensor(0.1205), tensor(0.3571), tensor(0.0925), tensor(0.1023), tensor(0.1242), tensor(-0.1985), tensor(-0.3636), tensor(0.1671), tensor(0.0147), tensor(0.1154), tensor(-0.0573), tensor(0.1638), tensor(-0.0934), tensor(0.1023)]
detokenized ['<s>', 'There', 'are', 'people', 'out', 'on', 'the', 'street', '.', '</s>', '</s>', 'People', 'are', 'out', 'on', 'the', 'street', '.', '</s>']
len conti_raw 17
conti_raw ['<s>', 'There', 'are', 'people', 'out', 'on', 'the', 'street.', '</s>', '</s>', 'People', 'are', 'out', 'on', 'the', 'street.', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Similar,Similar (0.96),Similar,1.32,#s There are people out on the street. #/s #/s People are out on the street. #/s
,,,,




<IPython.core.display.HTML object>
Raw review: ["Here are many samples: Pbase.com Out of the 16k photos there, I'm sure some are with a d700.", "It's a 1:1 lens, so that means that the size of the subject will be the same size on the sensor."]
GT target: 0.6000000238418579
pred_prob 0.4683755338191986


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [01:32<00:00,  9.28s/it]


word attr tensor([ 0.0786,  0.1311,  0.0827, -0.0718,  0.0253,  0.0708,  0.2468, -0.0052,
        -0.1093,  0.0102, -0.0487,  0.0633, -0.0987, -0.0939,  0.1715,  0.1970,
        -0.0889,  0.1437, -0.0100,  0.0139,  0.0324, -0.0608, -0.0540, -0.0388,
        -0.0669,  0.0727,  0.1672, -0.2813,  0.1583, -0.1352, -0.2864, -0.0993,
         0.0490, -0.0296, -0.0573,  0.2300,  0.0768, -0.1833,  0.0281, -0.0125,
         0.0612,  0.0430, -0.1611, -0.0713,  0.0287,  0.1553, -0.1298,  0.0894,
         0.2014, -0.3075,  0.0609,  0.1150,  0.0330, -0.2865, -0.2724, -0.0738,
        -0.0400])
conti attr [tensor(0.0786), tensor(0.1311), tensor(0.0827), tensor(-0.0718), tensor(0.0961), tensor(0.1425), tensor(-0.0487), tensor(0.0633), tensor(-0.0987), tensor(0.0776), tensor(0.1970), tensor(0.0548), tensor(0.0039), tensor(0.0324), tensor(-0.0608), tensor(-0.0540), tensor(-0.0388), tensor(-0.0669), tensor(-0.0414), tensor(0.1583), tensor(-0.1352), tensor(-0.3857), tensor(0.0490), tensor(0.1431), tensor

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Not Similar,Not Similar (0.09),Not Similar,-0.34,"#s Here are many samples: Pbase.com Out of the 16k photos there, I'm sure some are with a d700. #/s #/s It's a 1:1 lens, so that means that the size of the subject will be the same size on the sensor. #/s"
,,,,




<IPython.core.display.HTML object>
Raw review: ['The University constituencies are modelled on the University constituencies in use in Britain in 1922 when Ireland became independent.', "This is still a representational form of gov't, but with a unique twist."]
GT target: 0.20000000298023224
pred_prob 1.4010797441005707


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [01:03<00:00,  6.35s/it]


word attr tensor([ 8.0031e-02, -2.4770e-01,  1.7825e-01, -1.1955e-01,  1.3476e-01,
        -1.2735e-01,  6.4827e-02,  4.7975e-02,  7.6113e-02,  5.9170e-02,
        -2.5663e-01,  8.7020e-02, -1.2219e-01,  3.8733e-01,  5.1308e-03,
        -2.9440e-04,  4.3871e-02, -2.2529e-01, -6.5410e-02,  4.3511e-02,
        -2.3316e-01, -9.0538e-02,  1.0195e-01,  3.2575e-01, -1.0753e-01,
        -8.9983e-02,  1.0474e-01, -7.9809e-02, -7.5770e-02,  1.3297e-01,
         1.6129e-01,  2.6322e-01, -1.4593e-01, -1.0296e-01,  2.1312e-01,
        -2.8033e-01,  9.9094e-03, -1.0088e-01, -2.3738e-02, -1.0972e-02,
         1.6780e-01,  4.4716e-02,  8.3242e-02])
conti attr [tensor(0.0800), tensor(-0.2477), tensor(0.1782), tensor(-0.1195), tensor(0.1348), tensor(-0.0625), tensor(0.0480), tensor(0.0761), tensor(0.0592), tensor(-0.2566), tensor(0.0870), tensor(-0.1222), tensor(0.3873), tensor(0.0051), tensor(-0.0003), tensor(0.0439), tensor(-0.2253), tensor(-0.0654), tensor(0.0435), tensor(-0.3237), tensor(0.1019), t

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Not Similar,Not Similar (0.28),Not Similar,0.31,"#s The University constituencies are modelled on the University constituencies in use in Britain in 1922 when Ireland became independent. #/s #/s This is still a representational form of gov't, but with a unique twist. #/s"
,,,,




<IPython.core.display.HTML object>
Raw review: ['One Asian and one occasion woman in glasses smiling with their thumbs up.', 'Two women are posing for the camera and giving the thumbs up sign.']
GT target: 3.4000000953674316
pred_prob 3.2387176156044006


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:45<00:00,  4.55s/it]


word attr tensor([-0.1285,  0.5007,  0.1580,  0.0124,  0.1025,  0.1327,  0.1133, -0.2322,
         0.0200, -0.0666, -0.0027, -0.0697, -0.1130,  0.2041, -0.0014,  0.1046,
        -0.1814,  0.2678,  0.1672,  0.3391,  0.1153,  0.0628, -0.0886, -0.1019,
         0.0676,  0.0603, -0.3287, -0.2493,  0.0099, -0.0785,  0.2496, -0.0876])
conti attr [tensor(-0.1285), tensor(0.5007), tensor(0.1580), tensor(0.0124), tensor(0.1025), tensor(0.1327), tensor(0.1133), tensor(-0.2322), tensor(0.0200), tensor(-0.0666), tensor(-0.0027), tensor(-0.0697), tensor(-0.1130), tensor(0.2028), tensor(0.1046), tensor(-0.1814), tensor(0.2678), tensor(0.1672), tensor(0.3391), tensor(0.1153), tensor(0.0628), tensor(-0.0886), tensor(-0.1019), tensor(0.0676), tensor(0.0603), tensor(-0.3287), tensor(-0.2493), tensor(0.0099), tensor(0.1710), tensor(-0.0876)]
detokenized ['<s>', 'One', 'Asian', 'and', 'one', 'occasion', 'woman', 'in', 'glasses', 'smiling', 'with', 'their', 'thumbs', 'up', '.', '</s>', '</s>', 'Two', 'wome

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Similar,Similar (0.65),Similar,0.96,#s One Asian and one occasion woman in glasses smiling with their thumbs up. #/s #/s Two women are posing for the camera and giving the thumbs up sign. #/s
,,,,




<IPython.core.display.HTML object>
Raw review: ['Three guys in speedos have their feet up on the railing of a boat getting a tan.', 'A few men on the deck are dropping another man into the water']
GT target: 1.600000023841858
pred_prob 1.1263110488653183


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:51<00:00,  5.17s/it]


word attr tensor([ 0.2612, -0.1791, -0.0860,  0.0341,  0.0600,  0.0872,  0.1182,  0.1307,
        -0.0324, -0.0169, -0.0903, -0.1041,  0.0630, -0.2078, -0.2202, -0.3457,
        -0.2943, -0.2419, -0.1206,  0.1487,  0.0449,  0.2276,  0.1028,  0.2542,
         0.0066,  0.1711,  0.0134,  0.1242,  0.2171,  0.1633, -0.3203,  0.0461,
         0.1215, -0.0842, -0.1596,  0.1786])
conti attr [tensor(0.2612), tensor(-0.1791), tensor(-0.0860), tensor(0.0341), tensor(0.1472), tensor(0.1182), tensor(0.1307), tensor(-0.0324), tensor(-0.0169), tensor(-0.0903), tensor(-0.1041), tensor(0.0630), tensor(-0.2078), tensor(-0.2202), tensor(-0.3457), tensor(-0.2943), tensor(-0.2419), tensor(0.0281), tensor(0.0449), tensor(0.2276), tensor(0.1028), tensor(0.2542), tensor(0.0066), tensor(0.1711), tensor(0.0134), tensor(0.1242), tensor(0.2171), tensor(0.1633), tensor(-0.3203), tensor(0.0461), tensor(0.1215), tensor(-0.0842), tensor(-0.1596), tensor(0.1786)]
detokenized ['<s>', 'Three', 'guys', 'in', 'speed', 'os

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Not Similar,Not Similar (0.23),Not Similar,0.07,#s Three guys in speedos have their feet up on the railing of a boat getting a tan. #/s #/s A few men on the deck are dropping another man into the water #/s
,,,,




<IPython.core.display.HTML object>
Raw review: ['Cardinals enter Vatican for historic vote', 'Cardinals enter Sistine Chapel to elect pope']
GT target: 3.4000000953674316
pred_prob 3.9953607320785522


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:28<00:00,  2.88s/it]


word attr tensor([ 0.0624,  0.3346,  0.1175, -0.3034,  0.2064,  0.2153,  0.2073, -0.0954,
        -0.2837,  0.2594,  0.0765,  0.3162,  0.3290, -0.1325, -0.0572,  0.1277,
        -0.1858, -0.0090,  0.4413, -0.0949])
conti attr [tensor(0.0624), tensor(0.4521), tensor(-0.3034), tensor(0.2064), tensor(0.2153), tensor(0.2073), tensor(-0.0954), tensor(-0.2837), tensor(0.2594), tensor(0.3927), tensor(0.3290), tensor(-0.1897), tensor(0.1277), tensor(-0.1858), tensor(-0.0090), tensor(0.4413), tensor(-0.0949)]
detokenized ['<s>', 'Card', 'inals', 'enter', 'Vatican', 'for', 'historic', 'vote', '</s>', '</s>', 'Card', 'inals', 'enter', 'S', 'istine', 'Chapel', 'to', 'elect', 'pope', '</s>']
len conti_raw 17
conti_raw ['<s>', 'Cardinals', 'enter', 'Vatican', 'for', 'historic', 'vote', '</s>', '</s>', 'Cardinals', 'enter', 'Sistine', 'Chapel', 'to', 'elect', 'pope', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Similar,Similar (0.80),Similar,1.53,#s Cardinals enter Vatican for historic vote #/s #/s Cardinals enter Sistine Chapel to elect pope #/s
,,,,




<IPython.core.display.HTML object>
Raw review: ['According to this website the peak visible magnitude will be about 10.5 around February 2nd.', 'The AAVSO data seems to indicate that it might have already peaked, at around 10.5 (visual).']
GT target: 3.5999999046325684
pred_prob 2.8884661197662354


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [01:08<00:00,  6.89s/it]


word attr tensor([ 4.9733e-01, -1.8089e-02,  8.1747e-02, -9.7662e-02, -1.9826e-01,
        -5.7143e-02, -3.2689e-02, -7.5583e-02, -2.1513e-02, -7.8758e-02,
         1.3875e-01,  1.8250e-01, -1.0444e-01,  2.0803e-01,  9.9114e-02,
         1.8495e-01,  7.1210e-03, -1.8789e-01,  8.1400e-02,  3.6919e-02,
         1.0478e-01,  1.9958e-02,  1.8424e-01, -1.3206e-02,  5.8569e-03,
        -5.7164e-04, -2.5106e-01, -6.7339e-02, -1.6640e-01, -1.0456e-01,
         6.5000e-02,  6.2770e-02,  2.7920e-01, -3.6259e-02, -1.0173e-01,
         2.0811e-01,  3.7412e-04, -2.4438e-01, -1.9249e-02,  9.8622e-02,
        -2.7446e-02, -8.7680e-02, -4.9200e-02, -3.3767e-02, -2.3239e-01,
        -2.3947e-01])
conti attr [tensor(0.4973), tensor(-0.0181), tensor(0.0817), tensor(-0.0977), tensor(-0.1983), tensor(-0.0571), tensor(-0.0327), tensor(-0.0756), tensor(-0.0215), tensor(-0.0788), tensor(0.1387), tensor(0.1825), tensor(0.2027), tensor(0.1850), tensor(0.0071), tensor(-0.0696), tensor(0.1048), tensor(0.0200), te

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Similar,Similar (0.58),Not Similar,0.0,"#s According to this website the peak visible magnitude will be about 10.5 around February 2nd. #/s #/s The AAVSO data seems to indicate that it might have already peaked, at around 10.5 (visual). #/s"
,,,,




<IPython.core.display.HTML object>
Raw review: ['Is it possible for a match to get tie even after applying D/L method?', "Yes, it's possible for a match to get tie after applying the D/L method."]
GT target: 4.0
pred_prob 3.2219207286834717


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:59<00:00,  5.95s/it]


word attr tensor([-0.0444, -0.3656,  0.1857,  0.2693, -0.1225,  0.0766,  0.2433, -0.0401,
        -0.0564, -0.0247, -0.0618,  0.0227,  0.1624, -0.1622,  0.0842,  0.2587,
         0.0889,  0.1980,  0.2421, -0.0284, -0.2844, -0.0570,  0.1421, -0.1277,
         0.1220,  0.2564,  0.0905,  0.2995, -0.0379, -0.1398,  0.1191,  0.1079,
         0.1186,  0.1504, -0.0779, -0.1552, -0.1215,  0.0058, -0.0667,  0.0481])
conti attr [tensor(-0.0444), tensor(-0.3656), tensor(0.1857), tensor(0.2693), tensor(-0.1225), tensor(0.0766), tensor(0.2433), tensor(-0.0401), tensor(-0.0564), tensor(-0.0247), tensor(-0.0618), tensor(0.0227), tensor(0.1624), tensor(0.1808), tensor(0.2870), tensor(0.2421), tensor(-0.0284), tensor(-0.3414), tensor(0.0144), tensor(0.1220), tensor(0.2564), tensor(0.0905), tensor(0.2995), tensor(-0.0379), tensor(-0.1398), tensor(0.1191), tensor(0.1079), tensor(0.1186), tensor(0.1504), tensor(-0.3546), tensor(-0.0609), tensor(0.0481)]
detokenized ['<s>', 'Is', 'it', 'possible', 'for', '

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Similar,Similar (0.64),Similar,1.32,"#s Is it possible for a match to get tie even after applying D/L method? #/s #/s Yes, it's possible for a match to get tie after applying the D/L method. #/s"
,,,,




<IPython.core.display.HTML object>
Raw review: ['Eclipse Below are list of eclipse shortcuts which I use almost everyday.', 'To train yourself to use keyboard shortcuts, unplug your mouse for a few days.']
GT target: 0.6000000238418579
pred_prob 0.666646882891655


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:50<00:00,  5.05s/it]


word attr tensor([ 0.1032,  0.0268,  0.1415, -0.2250, -0.0223, -0.0853, -0.0584,  0.3179,
         0.1751, -0.2849,  0.0930,  0.3907, -0.1921, -0.0058, -0.0106, -0.3065,
        -0.0312, -0.0078,  0.0601,  0.0725, -0.1728,  0.1571, -0.2118, -0.0597,
         0.0430,  0.0711,  0.1249,  0.1662,  0.0224, -0.2807,  0.1505, -0.1113,
         0.0916,  0.0371, -0.3383])
conti attr [tensor(0.1032), tensor(0.1683), tensor(-0.2250), tensor(-0.0223), tensor(-0.0853), tensor(-0.0584), tensor(0.3179), tensor(0.1751), tensor(-0.2849), tensor(0.0930), tensor(0.3907), tensor(-0.1921), tensor(-0.0164), tensor(-0.3065), tensor(-0.0312), tensor(-0.0078), tensor(0.0601), tensor(0.0725), tensor(-0.1728), tensor(0.1571), tensor(-0.2118), tensor(-0.0167), tensor(0.1960), tensor(0.1662), tensor(0.0224), tensor(-0.2807), tensor(0.1505), tensor(-0.1113), tensor(0.1287), tensor(-0.3383)]
detokenized ['<s>', 'E', 'clipse', 'Below', 'are', 'list', 'of', 'eclipse', 'shortcuts', 'which', 'I', 'use', 'almost', 'every

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Not Similar,Not Similar (0.13),Not Similar,-0.16,"#s Eclipse Below are list of eclipse shortcuts which I use almost everyday. #/s #/s To train yourself to use keyboard shortcuts, unplug your mouse for a few days. #/s"
,,,,




<IPython.core.display.HTML object>
Raw review: ['A woman and child push strollers down the sidewalk.', 'A women and a young girl walking pushchairs along the sidewalk.']
GT target: 3.4000000953674316
pred_prob 4.378781616687775


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:40<00:00,  4.01s/it]


word attr tensor([ 0.2049,  0.1574,  0.2574, -0.0232, -0.0645,  0.0426,  0.1415,  0.3316,
        -0.0562,  0.2407,  0.4356,  0.0609,  0.2857,  0.0346, -0.0873,  0.1251,
         0.2385, -0.2142, -0.0043,  0.1470,  0.2523, -0.0758, -0.0378, -0.0429,
        -0.1185, -0.2361,  0.1935, -0.2561])
conti attr [tensor(0.2049), tensor(0.1574), tensor(0.2574), tensor(-0.0232), tensor(-0.0645), tensor(0.0426), tensor(0.4731), tensor(-0.0562), tensor(0.2407), tensor(0.4965), tensor(0.2857), tensor(0.0346), tensor(-0.0873), tensor(0.1251), tensor(0.2385), tensor(-0.2142), tensor(-0.0043), tensor(0.1470), tensor(0.2523), tensor(-0.1136), tensor(-0.0429), tensor(-0.1185), tensor(-0.0426), tensor(-0.2561)]
detokenized ['<s>', 'A', 'woman', 'and', 'child', 'push', 'st', 'rollers', 'down', 'the', 'sidewalk', '.', '</s>', '</s>', 'A', 'women', 'and', 'a', 'young', 'girl', 'walking', 'push', 'chairs', 'along', 'the', 'sidewalk', '.', '</s>']
len conti_raw 24
conti_raw ['<s>', 'A', 'woman', 'and', 'child

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Similar,Similar (0.88),Similar,1.93,#s A woman and child push strollers down the sidewalk. #/s #/s A women and a young girl walking pushchairs along the sidewalk. #/s
,,,,




<IPython.core.display.HTML object>
Raw review: ['ICC lawyer detained in Libya on security concerns', 'ICC lawyer held in Libya faces 45-day detention']
GT target: 3.4000000953674316
pred_prob 3.1552401185035706


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:34<00:00,  3.44s/it]


word attr tensor([-0.3740,  0.1594, -0.1030,  0.0920,  0.2770,  0.2223,  0.2251, -0.2592,
         0.0580, -0.2592, -0.3647, -0.1581,  0.1967, -0.0478, -0.2757,  0.0168,
         0.0028,  0.0294,  0.3922, -0.0669,  0.1407,  0.1767, -0.1191,  0.0385])
conti attr [tensor(-0.3740), tensor(0.0564), tensor(0.0920), tensor(0.2770), tensor(0.2223), tensor(0.2251), tensor(-0.2592), tensor(0.0580), tensor(-0.2592), tensor(-0.3647), tensor(-0.1581), tensor(0.1489), tensor(-0.2757), tensor(0.0168), tensor(0.0028), tensor(0.0294), tensor(0.3922), tensor(0.2504), tensor(-0.1191), tensor(0.0385)]
detokenized ['<s>', 'IC', 'C', 'lawyer', 'detained', 'in', 'Libya', 'on', 'security', 'concerns', '</s>', '</s>', 'IC', 'C', 'lawyer', 'held', 'in', 'Libya', 'faces', '45', '-', 'day', 'detention', '</s>']
len conti_raw 20
conti_raw ['<s>', 'ICC', 'lawyer', 'detained', 'in', 'Libya', 'on', 'security', 'concerns', '</s>', '</s>', 'ICC', 'lawyer', 'held', 'in', 'Libya', 'faces', '45-day', 'detention', '</s>']

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Similar,Similar (0.63),Not Similar,-0.0,#s ICC lawyer detained in Libya on security concerns #/s #/s ICC lawyer held in Libya faces 45-day detention #/s
,,,,




<IPython.core.display.HTML object>
Raw review: ['The flag was moving in the air.', 'A flag is waving.']
GT target: 3.75
pred_prob 4.404574930667877


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:24<00:00,  2.49s/it]


word attr tensor([ 0.3261,  0.1642, -0.2638,  0.2255, -0.0437,  0.2459,  0.0413, -0.2763,
         0.3361,  0.0293,  0.2737, -0.0883,  0.0605,  0.4644,  0.4316,  0.0204,
        -0.0570])
conti attr [tensor(0.3261), tensor(0.1642), tensor(-0.2638), tensor(0.2255), tensor(-0.0437), tensor(0.2459), tensor(0.0413), tensor(0.0598), tensor(0.0293), tensor(0.2737), tensor(-0.0883), tensor(0.0605), tensor(0.4644), tensor(0.4520), tensor(-0.0570)]
detokenized ['<s>', 'The', 'flag', 'was', 'moving', 'in', 'the', 'air', '.', '</s>', '</s>', 'A', 'flag', 'is', 'waving', '.', '</s>']
len conti_raw 15
conti_raw ['<s>', 'The', 'flag', 'was', 'moving', 'in', 'the', 'air.', '</s>', '</s>', 'A', 'flag', 'is', 'waving.', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Similar,Similar (0.88),Similar,1.89,#s The flag was moving in the air. #/s #/s A flag is waving. #/s
,,,,




<IPython.core.display.HTML object>
Raw review: ['A woman is cutting broccoli.', 'A woman is slicing broccoli.']
GT target: 5.0
pred_prob 4.450764060020447


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:23<00:00,  2.35s/it]


word attr tensor([ 0.0561, -0.0219, -0.0338, -0.0395,  0.0216,  0.0032, -0.7735, -0.0932,
         0.1073, -0.2043, -0.1483,  0.1369,  0.4116,  0.2147,  0.1079,  0.2551])
conti attr [tensor(0.0561), tensor(-0.0219), tensor(-0.0338), tensor(-0.0395), tensor(0.0216), tensor(-0.7702), tensor(-0.0932), tensor(0.1073), tensor(-0.2043), tensor(-0.1483), tensor(0.1369), tensor(0.4116), tensor(0.3226), tensor(0.2551)]
detokenized ['<s>', 'A', 'woman', 'is', 'cutting', 'broccoli', '.', '</s>', '</s>', 'A', 'woman', 'is', 'slicing', 'broccoli', '.', '</s>']
len conti_raw 14
conti_raw ['<s>', 'A', 'woman', 'is', 'cutting', 'broccoli.', '</s>', '</s>', 'A', 'woman', 'is', 'slicing', 'broccoli.', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Similar,Similar (0.89),Not Similar,-0.0,#s A woman is cutting broccoli. #/s #/s A woman is slicing broccoli. #/s
,,,,




<IPython.core.display.HTML object>
Raw review: ['Iran warns Moscow nuclear talks could stall', 'Iranians “confess” to nuclear scientist murders']
GT target: 1.2000000476837158
pred_prob 0.6065619364380836


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:34<00:00,  3.49s/it]


word attr tensor([-0.3737,  0.2047,  0.0756, -0.0048,  0.4494, -0.2372,  0.0182,  0.0966,
         0.0679,  0.1604,  0.0189,  0.3379,  0.1534, -0.3102, -0.1166,  0.0089,
         0.2951, -0.1171, -0.2321, -0.2014, -0.1494,  0.0665, -0.2112])
conti attr [tensor(-0.3737), tensor(0.2047), tensor(0.0756), tensor(-0.0048), tensor(0.4494), tensor(-0.2372), tensor(0.0182), tensor(0.0966), tensor(0.0679), tensor(0.1604), tensor(0.3568), tensor(0.1534), tensor(-0.1228), tensor(-0.1171), tensor(-0.2321), tensor(-0.2014), tensor(-0.1494), tensor(0.0665), tensor(-0.2112)]
detokenized ['<s>', 'Iran', 'warns', 'Moscow', 'nuclear', 'talks', 'could', 'stall', '</s>', '</s>', 'Iran', 'ians', 'âĢ', 'ľ', 'conf', 'ess', 'âĢ', 'Ŀ', 'to', 'nuclear', 'scientist', 'murders', '</s>']
len conti_raw 19
conti_raw ['<s>', 'Iran', 'warns', 'Moscow', 'nuclear', 'talks', 'could', 'stall', '</s>', '</s>', 'Iranians', 'âĢ', 'ľconfessâĢ', 'Ŀ', 'to', 'nuclear', 'scientist', 'murders', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Not Similar,Not Similar (0.12),Not Similar,0.0,#s Iran warns Moscow nuclear talks could stall #/s #/s Iranians âĢ ľconfessâĢ Ŀ to nuclear scientist murders #/s
,,,,




<IPython.core.display.HTML object>
Raw review: ['Syria agrees to surrender chemical weapons', 'Syria FM: We will stop producing chemical weapons']
GT target: 3.4000000953674316
pred_prob 4.205862879753113


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:29<00:00,  2.91s/it]


word attr tensor([-0.4366,  0.1089,  0.1181, -0.3244, -0.3508,  0.1291, -0.2421,  0.0555,
        -0.1105,  0.4577, -0.0515,  0.0127,  0.2317, -0.0144,  0.0746, -0.1854,
         0.2155, -0.0222,  0.3342])
conti attr [tensor(-0.4366), tensor(0.1089), tensor(0.1181), tensor(-0.3244), tensor(-0.3508), tensor(0.1291), tensor(-0.2421), tensor(0.0555), tensor(-0.1105), tensor(0.4577), tensor(-0.0388), tensor(0.2317), tensor(-0.0144), tensor(0.0746), tensor(-0.1854), tensor(0.2155), tensor(-0.0222), tensor(0.3342)]
detokenized ['<s>', 'Syria', 'agrees', 'to', 'surrender', 'chemical', 'weapons', '</s>', '</s>', 'Syria', 'FM', ':', 'We', 'will', 'stop', 'producing', 'chemical', 'weapons', '</s>']
len conti_raw 18
conti_raw ['<s>', 'Syria', 'agrees', 'to', 'surrender', 'chemical', 'weapons', '</s>', '</s>', 'Syria', 'FM:', 'We', 'will', 'stop', 'producing', 'chemical', 'weapons', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Similar,Similar (0.84),Not Similar,0.0,#s Syria agrees to surrender chemical weapons #/s #/s Syria FM: We will stop producing chemical weapons #/s
,,,,




<IPython.core.display.HTML object>
Raw review: ['Chinese shares close lower on Wednesday', 'Chinese students attacked in France']
GT target: 0.0
pred_prob 0.4386577755212784


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:22<00:00,  2.23s/it]


word attr tensor([-0.1249,  0.4126,  0.3983, -0.3330, -0.0036,  0.3334,  0.1540, -0.2515,
        -0.0998, -0.0393, -0.2405,  0.2026,  0.0454,  0.0292, -0.4829])
conti attr [tensor(-0.1249), tensor(0.4126), tensor(0.3983), tensor(-0.3330), tensor(-0.0036), tensor(0.3334), tensor(0.1540), tensor(-0.2515), tensor(-0.0998), tensor(-0.0393), tensor(-0.2405), tensor(0.2026), tensor(0.0454), tensor(0.0292), tensor(-0.4829)]
detokenized ['<s>', 'Chinese', 'shares', 'close', 'lower', 'on', 'Wednesday', '</s>', '</s>', 'Chinese', 'students', 'attacked', 'in', 'France', '</s>']
len conti_raw 15
conti_raw ['<s>', 'Chinese', 'shares', 'close', 'lower', 'on', 'Wednesday', '</s>', '</s>', 'Chinese', 'students', 'attacked', 'in', 'France', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Not Similar,Not Similar (0.09),Not Similar,0.0,#s Chinese shares close lower on Wednesday #/s #/s Chinese students attacked in France #/s
,,,,




<IPython.core.display.HTML object>
Raw review: ['Protests after George Zimmerman acquitted in Trayvon Martin case', 'Zimmerman acquitted in slaying']
GT target: 3.0
pred_prob 2.7657532691955566


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:32<00:00,  3.23s/it]


word attr tensor([ 0.0297,  0.0261, -0.1484,  0.4667,  0.0095,  0.0339,  0.0398,  0.0088,
         0.3388, -0.3658,  0.1142, -0.0863, -0.4286,  0.1165,  0.3633, -0.2676,
        -0.0700,  0.1494,  0.0939,  0.2228])
conti attr [tensor(0.0297), tensor(-0.1223), tensor(0.4667), tensor(0.0095), tensor(0.0339), tensor(0.0398), tensor(0.0088), tensor(0.3388), tensor(-0.3658), tensor(0.1142), tensor(-0.0863), tensor(-0.4286), tensor(0.2122), tensor(-0.0700), tensor(0.1494), tensor(0.0939), tensor(0.2228)]
detokenized ['<s>', 'Prot', 'ests', 'after', 'George', 'Zimmerman', 'acquitted', 'in', 'Trayvon', 'Martin', 'case', '</s>', '</s>', 'Z', 'immer', 'man', 'acquitted', 'in', 'slaying', '</s>']
len conti_raw 17
conti_raw ['<s>', 'Protests', 'after', 'George', 'Zimmerman', 'acquitted', 'in', 'Trayvon', 'Martin', 'case', '</s>', '</s>', 'Zimmerman', 'acquitted', 'in', 'slaying', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Similar,Similar (0.55),Similar,0.65,#s Protests after George Zimmerman acquitted in Trayvon Martin case #/s #/s Zimmerman acquitted in slaying #/s
,,,,


<IPython.core.display.HTML object>
Raw review: ["The answer will depend significantly on what country you're traveling to.", 'As someone who has always travelled with a lot of prescription medication, I always declare it.']
GT target: 1.0




pred_prob 0.23258300498127937


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [01:29<00:00,  8.99s/it]


word attr tensor([ 0.4070,  0.1987,  0.0483,  0.0115, -0.0662,  0.0968, -0.1112,  0.0911,
         0.1486, -0.0832, -0.0606, -0.0321,  0.2785,  0.1800, -0.0273, -0.1190,
         0.1070, -0.0466, -0.1858, -0.0784, -0.4608, -0.0419, -0.0866, -0.0191,
        -0.0009,  0.0956, -0.0750, -0.2708, -0.2719,  0.0821, -0.3176, -0.0064,
        -0.0036,  0.2225, -0.0627])
conti attr [tensor(0.4070), tensor(0.1987), tensor(0.0483), tensor(0.0115), tensor(-0.0662), tensor(0.0968), tensor(-0.1112), tensor(0.0911), tensor(0.1486), tensor(-0.1438), tensor(-0.0321), tensor(0.4584), tensor(-0.0273), tensor(-0.1190), tensor(0.1070), tensor(-0.0466), tensor(-0.1858), tensor(-0.0784), tensor(-0.4608), tensor(-0.0419), tensor(-0.0866), tensor(-0.0191), tensor(-0.0009), tensor(0.0956), tensor(-0.0750), tensor(-0.5427), tensor(0.0821), tensor(-0.3176), tensor(-0.0064), tensor(0.2189), tensor(-0.0627)]
detokenized ['<s>', 'The', 'answer', 'will', 'depend', 'significantly', 'on', 'what', 'country', 'you', "'r

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Not Similar,Not Similar (0.05),Not Similar,-0.46,"#s The answer will depend significantly on what country you're traveling to. #/s #/s As someone who has always travelled with a lot of prescription medication, I always declare it. #/s"
,,,,




<IPython.core.display.HTML object>
Raw review: ['A man is running.', 'A man is singing.']
GT target: 1.25
pred_prob 0.7396946847438812


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:37<00:00,  3.79s/it]


word attr tensor([-0.4819,  0.4779,  0.2590,  0.0049,  0.0816, -0.3803, -0.1551, -0.3031,
        -0.0669,  0.1152,  0.0513, -0.0452,  0.0155,  0.4271])
conti attr [tensor(-0.4819), tensor(0.4779), tensor(0.2590), tensor(0.0049), tensor(-0.2987), tensor(-0.1551), tensor(-0.3031), tensor(-0.0669), tensor(0.1152), tensor(0.0513), tensor(-0.0298), tensor(0.4271)]
detokenized ['<s>', 'A', 'man', 'is', 'running', '.', '</s>', '</s>', 'A', 'man', 'is', 'singing', '.', '</s>']
len conti_raw 12
conti_raw ['<s>', 'A', 'man', 'is', 'running.', '</s>', '</s>', 'A', 'man', 'is', 'singing.', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Not Similar,Not Similar (0.15),Not Similar,-0.0,#s A man is running. #/s #/s A man is singing. #/s
,,,,




<IPython.core.display.HTML object>
Raw review: ["Egyptian president moves into Mubarak's old office", "Egypt's President-Elect Morsi Starts to Form Government"]
GT target: 2.3333332538604736
pred_prob 1.923585683107376


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:58<00:00,  5.87s/it]


word attr tensor([ 0.8662, -0.0246, -0.2102, -0.0753,  0.0530, -0.0319, -0.0349, -0.1701,
        -0.1701, -0.1976,  0.1290, -0.0437, -0.0352,  0.0498,  0.0482, -0.1372,
         0.0143,  0.0353, -0.0032,  0.0705, -0.1607,  0.1173, -0.0888])
conti attr [tensor(0.8662), tensor(-0.2348), tensor(-0.0753), tensor(0.0530), tensor(-0.0319), tensor(-0.2050), tensor(-0.1701), tensor(-0.1976), tensor(0.1290), tensor(-0.0437), tensor(0.0146), tensor(-0.0747), tensor(0.0353), tensor(-0.0032), tensor(0.0705), tensor(-0.1607), tensor(0.1173), tensor(-0.0888)]
detokenized ['<s>', 'Egypt', 'ian', 'president', 'moves', 'into', 'Mubarak', "'s", 'old', 'office', '</s>', '</s>', 'Egypt', "'s", 'President', '-', 'Elect', 'Morsi', 'Starts', 'to', 'Form', 'Government', '</s>']
len conti_raw 18
conti_raw ['<s>', 'Egyptian', 'president', 'moves', 'into', "Mubarak's", 'old', 'office', '</s>', '</s>', "Egypt's", 'President-Elect', 'Morsi', 'Starts', 'to', 'Form', 'Government', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Not Similar,Not Similar (0.38),Not Similar,0.0,#s Egyptian president moves into Mubarak's old office #/s #/s Egypt's President-Elect Morsi Starts to Form Government #/s
,,,,




<IPython.core.display.HTML object>
Raw review: ['The dog could not figure out how to get a stick out the doggy door.', 'A dog is trying to get through his dog door.']
GT target: 2.75
pred_prob 2.8478720784187317


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [01:18<00:00,  7.88s/it]


word attr tensor([ 0.4081,  0.1163, -0.1309,  0.0324,  0.1560,  0.5250,  0.1579, -0.0513,
        -0.1363, -0.1050, -0.1845, -0.0264, -0.1493, -0.1864, -0.0741, -0.0312,
        -0.0115, -0.0339, -0.1363,  0.0182,  0.4961, -0.0514,  0.1343, -0.1532,
        -0.0411,  0.0818, -0.0765,  0.0226,  0.0209,  0.0819,  0.0881, -0.0793])
conti attr [tensor(0.4081), tensor(0.1163), tensor(-0.1309), tensor(0.0324), tensor(0.1560), tensor(0.5250), tensor(0.1579), tensor(-0.0513), tensor(-0.1363), tensor(-0.1050), tensor(-0.1845), tensor(-0.0264), tensor(-0.1493), tensor(-0.1864), tensor(-0.1053), tensor(-0.0454), tensor(-0.1363), tensor(0.0182), tensor(0.4961), tensor(-0.0514), tensor(0.1343), tensor(-0.1532), tensor(-0.0411), tensor(0.0818), tensor(-0.0765), tensor(0.0226), tensor(0.0209), tensor(0.1700), tensor(-0.0793)]
detokenized ['<s>', 'The', 'dog', 'could', 'not', 'figure', 'out', 'how', 'to', 'get', 'a', 'stick', 'out', 'the', 'do', 'ggy', 'door', '.', '</s>', '</s>', 'A', 'dog', 'is', 't

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Similar,Similar (0.57),Similar,0.68,#s The dog could not figure out how to get a stick out the doggy door. #/s #/s A dog is trying to get through his dog door. #/s
,,,,


<IPython.core.display.HTML object>
Raw review: ['The white and brown dog is running quickly through the grass.', 'The big white dog is running in the grass.']
GT target: 3.4000000953674316




pred_prob 3.860727846622467


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [01:05<00:00,  6.57s/it]


word attr tensor([ 0.2034,  0.0505, -0.0246, -0.0110, -0.1146,  0.2509,  0.0412, -0.2983,
         0.1897,  0.2635, -0.2487,  0.3261,  0.0716,  0.2554, -0.0434,  0.2385,
        -0.1073, -0.1577,  0.2775, -0.0660,  0.0116, -0.1501,  0.0956,  0.2316,
         0.0592,  0.4234])
conti attr [tensor(0.2034), tensor(0.0505), tensor(-0.0246), tensor(-0.0110), tensor(-0.1146), tensor(0.2509), tensor(0.0412), tensor(-0.2983), tensor(0.1897), tensor(0.2635), tensor(-0.2487), tensor(0.3977), tensor(0.2554), tensor(-0.0434), tensor(0.2385), tensor(-0.1073), tensor(-0.1577), tensor(0.2775), tensor(-0.0660), tensor(0.0116), tensor(-0.1501), tensor(0.0956), tensor(0.2907), tensor(0.4234)]
detokenized ['<s>', 'The', 'white', 'and', 'brown', 'dog', 'is', 'running', 'quickly', 'through', 'the', 'grass', '.', '</s>', '</s>', 'The', 'big', 'white', 'dog', 'is', 'running', 'in', 'the', 'grass', '.', '</s>']
len conti_raw 24
conti_raw ['<s>', 'The', 'white', 'and', 'brown', 'dog', 'is', 'running', 'quickly'

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Similar,Similar (0.77),Similar,1.77,#s The white and brown dog is running quickly through the grass. #/s #/s The big white dog is running in the grass. #/s
,,,,


<IPython.core.display.HTML object>
Raw review: ['In Hinduism and Buddhism there is message of peace and an encouragement to not be materialistic.', 'This relates to the "discovery" of Asian, and other non-European cultures by Americans.']
GT target: 0.0




pred_prob 0.2106860652565956


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [01:54<00:00, 11.49s/it]


word attr tensor([ 0.0347, -0.0285, -0.0465, -0.5566,  0.0721, -0.0732, -0.0471, -0.0915,
        -0.1226,  0.3525, -0.1161,  0.0495, -0.3201, -0.0460, -0.1850,  0.0954,
         0.1381,  0.1299,  0.1379,  0.2745, -0.0643, -0.0884, -0.1842, -0.0118,
         0.1296,  0.1324, -0.0847, -0.0025,  0.0512, -0.0045, -0.0439, -0.0731,
        -0.0334,  0.0567,  0.1768,  0.0332, -0.0164,  0.0939,  0.0900, -0.1506,
         0.0804,  0.0726, -0.2139])
conti attr [tensor(0.0347), tensor(-0.0285), tensor(-0.6031), tensor(0.0721), tensor(-0.0732), tensor(-0.0471), tensor(-0.0915), tensor(-0.1226), tensor(0.3525), tensor(-0.1161), tensor(0.0495), tensor(-0.3201), tensor(-0.0460), tensor(-0.1850), tensor(0.0954), tensor(0.1381), tensor(0.5422), tensor(-0.0643), tensor(-0.0884), tensor(-0.1842), tensor(-0.0118), tensor(0.1296), tensor(0.1324), tensor(-0.0404), tensor(-0.0439), tensor(-0.1065), tensor(0.0567), tensor(0.1768), tensor(0.1107), tensor(0.0900), tensor(-0.1506), tensor(0.1530), tensor(-0.21

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Not Similar,Not Similar (0.04),Not Similar,-0.4,"#s In Hinduism and Buddhism there is message of peace and an encouragement to not be materialistic. #/s #/s This relates to the ""discovery"" of Asian, and other non-European cultures by Americans. #/s"
,,,,


<IPython.core.display.HTML object>
Raw review: ['To play golf, the first thing you need is a set of clubs.', 'Having fairly recently taken up the sport myself, I feel your discomfort.']
GT target: 1.2000000476837158




pred_prob 0.7371227443218231


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [01:27<00:00,  8.73s/it]


word attr tensor([ 0.0139,  0.1228, -0.0072, -0.0527,  0.1570, -0.2330, -0.0899,  0.1164,
        -0.2223,  0.1930, -0.0498,  0.2236, -0.0493, -0.0542, -0.3665,  0.0277,
         0.0984,  0.1082, -0.4142, -0.1183, -0.1230, -0.0296, -0.0403, -0.1545,
        -0.0895,  0.0623,  0.1826,  0.0556,  0.3743, -0.0112, -0.2146,  0.1822,
         0.2922])
conti attr [tensor(0.0139), tensor(0.1228), tensor(-0.0072), tensor(0.1043), tensor(-0.2330), tensor(-0.0899), tensor(0.1164), tensor(-0.2223), tensor(0.1930), tensor(-0.0498), tensor(0.2236), tensor(-0.0493), tensor(-0.0542), tensor(-0.3389), tensor(0.0984), tensor(0.1082), tensor(-0.4142), tensor(-0.1183), tensor(-0.1230), tensor(-0.0296), tensor(-0.0403), tensor(-0.1545), tensor(-0.0895), tensor(0.2449), tensor(0.0556), tensor(0.3743), tensor(-0.0112), tensor(-0.0325), tensor(0.2922)]
detokenized ['<s>', 'To', 'play', 'golf', ',', 'the', 'first', 'thing', 'you', 'need', 'is', 'a', 'set', 'of', 'clubs', '.', '</s>', '</s>', 'Having', 'fairly'

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Not Similar,Not Similar (0.15),Not Similar,-0.11,"#s To play golf, the first thing you need is a set of clubs. #/s #/s Having fairly recently taken up the sport myself, I feel your discomfort. #/s"
,,,,


<IPython.core.display.HTML object>
Raw review: ['The companies, Chiron and Aventis Pasteur, together made about 80 million doses of the injected vaccine, which ordinarily would have been enough to meet U.S. demand.', 'Chiron and Aventis Pasteur together made about 80 million doses, ordinarily enough for U.S. demand, The Associated Press reported.']
GT target: 4.400000095367432




pred_prob 3.635355532169342


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [02:29<00:00, 14.94s/it]


word attr tensor([-0.0258,  0.0029, -0.0684, -0.0326,  0.0664,  0.0654,  0.2975,  0.0603,
         0.0315, -0.2472, -0.0566, -0.0975, -0.1773, -0.0163, -0.1074, -0.0456,
        -0.0992,  0.0858, -0.0514,  0.0011,  0.1738,  0.1205,  0.0269, -0.0717,
         0.0496, -0.0468,  0.1148, -0.0932, -0.0969,  0.1376,  0.0782, -0.0166,
         0.0576,  0.1701,  0.0800,  0.0364, -0.0348,  0.2998,  0.1106, -0.4101,
         0.0583,  0.1553,  0.1442, -0.1356,  0.0857,  0.1637,  0.0323,  0.1007,
         0.1228, -0.0289, -0.0748, -0.0382, -0.1398,  0.1211,  0.0761,  0.0785,
         0.0317, -0.0231, -0.0080, -0.0233,  0.0382,  0.0941,  0.0798,  0.0006,
         0.0636,  0.1252,  0.2838,  0.0196,  0.1097,  0.1296])
conti attr [tensor(-0.0258), tensor(0.0029), tensor(-0.1009), tensor(0.1318), tensor(0.2975), tensor(-0.1553), tensor(-0.3314), tensor(-0.0163), tensor(-0.1074), tensor(-0.0456), tensor(-0.0992), tensor(0.0858), tensor(-0.0514), tensor(0.0011), tensor(0.1738), tensor(0.1205), tensor(-0.

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Similar,Similar (0.73),Similar,1.91,"#s The companies, Chiron and Aventis Pasteur, together made about 80 million doses of the injected vaccine, which ordinarily would have been enough to meet U.S. demand. #/s #/s Chiron and Aventis Pasteur together made about 80 million doses, ordinarily enough for U.S. demand, The Associated Press reported. #/s"
,,,,




<IPython.core.display.HTML object>
Raw review: ['A woman jumps and poses for the camera.', 'A woman poses for the camera.']
GT target: 4.0
pred_prob 3.580184280872345


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:29<00:00,  2.93s/it]


word attr tensor([ 0.5399, -0.1183,  0.0794, -0.1183,  0.0101, -0.3704,  0.1914, -0.0865,
        -0.0073, -0.0806,  0.0561, -0.0024,  0.2530,  0.1819, -0.0048, -0.0610,
         0.2346, -0.0283, -0.1989,  0.5356])
conti attr [tensor(0.5399), tensor(-0.1183), tensor(0.0794), tensor(-0.1183), tensor(0.0101), tensor(-0.3704), tensor(0.1914), tensor(-0.0865), tensor(-0.0880), tensor(0.0561), tensor(-0.0024), tensor(0.2530), tensor(0.1819), tensor(-0.0048), tensor(-0.0610), tensor(0.2346), tensor(-0.2272), tensor(0.5356)]
detokenized ['<s>', 'A', 'woman', 'jumps', 'and', 'poses', 'for', 'the', 'camera', '.', '</s>', '</s>', 'A', 'woman', 'poses', 'for', 'the', 'camera', '.', '</s>']
len conti_raw 18
conti_raw ['<s>', 'A', 'woman', 'jumps', 'and', 'poses', 'for', 'the', 'camera.', '</s>', '</s>', 'A', 'woman', 'poses', 'for', 'the', 'camera.', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Similar,Similar (0.72),Similar,1.01,#s A woman jumps and poses for the camera. #/s #/s A woman poses for the camera. #/s
,,,,




<IPython.core.display.HTML object>
Raw review: ['He claims it may seem unrealistic only because little effort has been devoted to the concept.', '"This proposal is modest compared with the space programme, and may seem unrealistic only because little effort has been devoted to it.']
GT target: 3.5
pred_prob 3.237951099872589


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [01:10<00:00,  7.09s/it]


word attr tensor([-0.1169,  0.1008,  0.2599,  0.0459,  0.0917, -0.0768, -0.1592, -0.2008,
         0.1358, -0.1591, -0.1475,  0.1745,  0.1235, -0.2146,  0.0713, -0.2368,
         0.0560, -0.0233,  0.1414, -0.0837, -0.0625,  0.1572,  0.1259, -0.0678,
         0.0721,  0.1175,  0.0436,  0.1728,  0.0537, -0.1723,  0.0909, -0.2998,
         0.0062, -0.1084,  0.2924,  0.0032,  0.2116, -0.0267,  0.2632,  0.1091,
         0.1816, -0.0542, -0.0501,  0.1365,  0.2000,  0.1612])
conti attr [tensor(-0.1169), tensor(0.1008), tensor(0.2599), tensor(0.0459), tensor(0.0917), tensor(-0.0768), tensor(-0.1592), tensor(-0.2008), tensor(0.1358), tensor(-0.1591), tensor(-0.1475), tensor(0.1745), tensor(0.1235), tensor(-0.2146), tensor(0.0713), tensor(-0.2368), tensor(0.0328), tensor(0.1414), tensor(-0.0837), tensor(0.0947), tensor(0.1259), tensor(-0.0678), tensor(0.0721), tensor(0.1175), tensor(0.0436), tensor(0.1728), tensor(0.0537), tensor(-0.0814), tensor(-0.2998), tensor(0.0062), tensor(-0.1084), tensor

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Similar,Similar (0.65),Similar,1.34,"#s He claims it may seem unrealistic only because little effort has been devoted to the concept. #/s #/s ""This proposal is modest compared with the space programme, and may seem unrealistic only because little effort has been devoted to it. #/s"
,,,,




<IPython.core.display.HTML object>
Raw review: ['A bird eating from a bird feeder.', 'A yellow bird eating fruit on a bird feeder.']
GT target: 3.200000047683716
pred_prob 3.4366700053215027


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:34<00:00,  3.44s/it]


word attr tensor([ 0.1685, -0.2112, -0.1242,  0.2202, -0.2185,  0.2379,  0.2221,  0.1197,
         0.0609,  0.1000, -0.0691, -0.2817, -0.0099,  0.0905,  0.1935,  0.1457,
         0.0530, -0.0342, -0.0348, -0.2136, -0.0956,  0.0711,  0.3640,  0.5769])
conti attr [tensor(0.1685), tensor(-0.2112), tensor(-0.1242), tensor(0.2202), tensor(-0.2185), tensor(0.2379), tensor(0.2221), tensor(0.2806), tensor(-0.0691), tensor(-0.2817), tensor(-0.0099), tensor(0.0905), tensor(0.1935), tensor(0.1457), tensor(0.0530), tensor(-0.0342), tensor(-0.0348), tensor(-0.2136), tensor(0.3395), tensor(0.5769)]
detokenized ['<s>', 'A', 'bird', 'eating', 'from', 'a', 'bird', 'feed', 'er', '.', '</s>', '</s>', 'A', 'yellow', 'bird', 'eating', 'fruit', 'on', 'a', 'bird', 'feed', 'er', '.', '</s>']
len conti_raw 20
conti_raw ['<s>', 'A', 'bird', 'eating', 'from', 'a', 'bird', 'feeder.', '</s>', '</s>', 'A', 'yellow', 'bird', 'eating', 'fruit', 'on', 'a', 'bird', 'feeder.', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Similar,Similar (0.69),Similar,1.33,#s A bird eating from a bird feeder. #/s #/s A yellow bird eating fruit on a bird feeder. #/s
,,,,




<IPython.core.display.HTML object>
Raw review: ['Different parts of California have different wildlife, so you should probably narrow the region.', 'American black bears They are somewhat common in some wilderness areas of California, mostly in the mountains.']
GT target: 2.0
pred_prob 1.586385816335678


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [01:00<00:00,  6.02s/it]


word attr tensor([ 0.0588, -0.1009, -0.2091, -0.1528,  0.0886,  0.0567,  0.0204,  0.1578,
         0.1013,  0.0787, -0.0669,  0.0415,  0.0807,  0.0799,  0.0256,  0.0413,
         0.0091,  0.3422, -0.0238, -0.1436, -0.3429, -0.0036,  0.2562, -0.0314,
        -0.3193, -0.2811, -0.0278,  0.0023, -0.0009, -0.1137, -0.1571,  0.0600,
         0.1602, -0.0115, -0.1511,  0.0271,  0.1201,  0.4578,  0.1617])
conti attr [tensor(0.0588), tensor(-0.1009), tensor(-0.2091), tensor(-0.1528), tensor(0.0886), tensor(0.0567), tensor(0.0204), tensor(0.2591), tensor(0.0787), tensor(-0.0669), tensor(0.0415), tensor(0.0807), tensor(0.0799), tensor(0.0256), tensor(0.0504), tensor(0.3422), tensor(-0.0238), tensor(-0.1436), tensor(-0.3429), tensor(-0.0036), tensor(0.2562), tensor(-0.0314), tensor(-0.3193), tensor(-0.2811), tensor(-0.0278), tensor(0.0023), tensor(-0.0009), tensor(-0.1137), tensor(-0.1571), tensor(0.2201), tensor(-0.0115), tensor(-0.1511), tensor(0.0271), tensor(0.5779), tensor(0.1617)]
detokeniz

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Not Similar,Not Similar (0.32),Not Similar,0.29,"#s Different parts of California have different wildlife, so you should probably narrow the region. #/s #/s American black bears They are somewhat common in some wilderness areas of California, mostly in the mountains. #/s"
,,,,




<IPython.core.display.HTML object>
Raw review: ['I am not aware of any university run participant pools.', "At the universities I've worked in North America, human-subject studies (esp."]
GT target: 0.6000000238418579
pred_prob 0.6573943793773651


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:46<00:00,  4.60s/it]


word attr tensor([ 0.0967, -0.0489, -0.5077,  0.4013, -0.0178,  0.3824,  0.1472,  0.0593,
         0.0356, -0.0254,  0.0586, -0.0374,  0.0082, -0.0225,  0.0279, -0.1621,
        -0.1618,  0.0603,  0.2336, -0.0167,  0.1627,  0.0139, -0.1264,  0.0343,
        -0.1739, -0.1853,  0.0142,  0.0603,  0.1146, -0.3208, -0.0475, -0.2192])
conti attr [tensor(0.0967), tensor(-0.0489), tensor(-0.5077), tensor(0.4013), tensor(-0.0178), tensor(0.3824), tensor(0.1472), tensor(0.0593), tensor(0.0356), tensor(-0.0254), tensor(0.0211), tensor(0.0082), tensor(-0.0225), tensor(0.0279), tensor(-0.1621), tensor(-0.1618), tensor(0.2939), tensor(-0.0167), tensor(0.1627), tensor(0.0139), tensor(-0.0921), tensor(-0.3450), tensor(0.0603), tensor(-0.2538), tensor(-0.2192)]
detokenized ['<s>', 'I', 'am', 'not', 'aware', 'of', 'any', 'university', 'run', 'participant', 'pools', '.', '</s>', '</s>', 'At', 'the', 'universities', 'I', "'ve", 'worked', 'in', 'North', 'America', ',', 'human', '-', 'subject', 'studies', '

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Not Similar,Not Similar (0.13),Not Similar,-0.16,"#s I am not aware of any university run participant pools. #/s #/s At the universities I've worked in North America, human-subject studies (esp. #/s"
,,,,




<IPython.core.display.HTML object>
Raw review: ['A girl is riding a horse.', 'The girl trotted the horse.']
GT target: 4.5
pred_prob 3.1594371795654297


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:26<00:00,  2.62s/it]


word attr tensor([ 0.3892, -0.0253, -0.0811,  0.0611,  0.0025, -0.0622,  0.6075, -0.3637,
        -0.2106, -0.1941, -0.0645,  0.0928,  0.1390,  0.2018,  0.2895,  0.2590,
        -0.1612, -0.0237])
conti attr [tensor(0.3892), tensor(-0.0253), tensor(-0.0811), tensor(0.0611), tensor(0.0025), tensor(-0.0622), tensor(0.2438), tensor(-0.2106), tensor(-0.1941), tensor(-0.0645), tensor(0.0928), tensor(0.3408), tensor(0.2895), tensor(0.0979), tensor(-0.0237)]
detokenized ['<s>', 'A', 'girl', 'is', 'riding', 'a', 'horse', '.', '</s>', '</s>', 'The', 'girl', 'tro', 'tted', 'the', 'horse', '.', '</s>']
len conti_raw 15
conti_raw ['<s>', 'A', 'girl', 'is', 'riding', 'a', 'horse.', '</s>', '</s>', 'The', 'girl', 'trotted', 'the', 'horse.', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Similar,Similar (0.63),Similar,0.86,#s A girl is riding a horse. #/s #/s The girl trotted the horse. #/s
,,,,




<IPython.core.display.HTML object>
Raw review: ['There are four snowboarders going off a jump and that are in the air.', 'Two racers are coming to a turn.']
GT target: 1.0
pred_prob 0.10505742393434048


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:43<00:00,  4.33s/it]


word attr tensor([ 2.4862e-01, -1.3446e-01,  1.4406e-01, -6.7210e-06, -7.8034e-02,
        -3.9974e-02, -8.7999e-02, -2.4122e-02,  1.2776e-01,  9.4501e-03,
        -1.2893e-01, -1.3463e-01,  2.6677e-02, -7.2249e-02,  2.7302e-01,
         1.6624e-01,  1.9164e-01, -4.7740e-02,  1.5106e-01,  7.9935e-02,
         2.3706e-01,  2.3722e-01, -3.5184e-01, -4.5015e-01, -3.6093e-01,
        -1.1577e-02, -7.0783e-02, -2.1270e-01, -1.4722e-01, -1.2101e-01])
conti attr [tensor(0.2486), tensor(-0.1345), tensor(0.1441), tensor(-6.7210e-06), tensor(-0.2060), tensor(-0.0241), tensor(0.1278), tensor(0.0095), tensor(-0.1289), tensor(-0.1346), tensor(0.0267), tensor(-0.0722), tensor(0.2730), tensor(0.1662), tensor(0.1439), tensor(0.1511), tensor(0.0799), tensor(0.2371), tensor(-0.1146), tensor(-0.4501), tensor(-0.3609), tensor(-0.0116), tensor(-0.0708), tensor(-0.3599), tensor(-0.1210)]
detokenized ['<s>', 'There', 'are', 'four', 'snow', 'board', 'ers', 'going', 'off', 'a', 'jump', 'and', 'that', 'are', 'i

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Not Similar,Not Similar (0.02),Not Similar,-0.58,#s There are four snowboarders going off a jump and that are in the air. #/s #/s Two racers are coming to a turn. #/s
,,,,




<IPython.core.display.HTML object>
Raw review: ['The 30-year bond US30YT=RR dipped 14/32 for a yield of 4.26 percent from 4.23 percent.', 'The 30-year bond US30YT=RR lost 16/32, taking its yield to 4.20 percent from 4.18 percent.']
GT target: 3.3329999446868896
pred_prob 2.7606171369552612


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [01:44<00:00, 10.42s/it]


word attr tensor([ 0.0569, -0.0021,  0.0887, -0.0018,  0.0140,  0.0126,  0.0592,  0.0646,
        -0.0441,  0.0381,  0.1223, -0.0120,  0.0053, -0.2260,  0.1169,  0.0687,
         0.2207, -0.0641,  0.1601,  0.0191,  0.2524, -0.1214,  0.1051,  0.1584,
         0.0403,  0.0072,  0.0636, -0.0024,  0.1073, -0.2001,  0.2316, -0.1589,
        -0.2069,  0.1097, -0.1595, -0.0428, -0.0836, -0.0201,  0.0267, -0.1141,
        -0.1810,  0.1020,  0.0244,  0.0124,  0.2154,  0.0640, -0.2783,  0.1564,
        -0.1630,  0.0461, -0.1474,  0.0160,  0.1048,  0.0894,  0.0419, -0.0908,
         0.0464, -0.2542,  0.1887,  0.1548,  0.0401,  0.2356, -0.0255])
conti attr [tensor(0.0569), tensor(-0.0021), tensor(0.1010), tensor(0.0126), tensor(0.2282), tensor(0.0053), tensor(-0.0403), tensor(0.2207), tensor(-0.0641), tensor(0.1601), tensor(0.0191), tensor(0.2361), tensor(0.1584), tensor(0.0403), tensor(0.0684), tensor(-0.0928), tensor(0.2316), tensor(-0.1589), tensor(-0.2069), tensor(-0.0926), tensor(-0.0836), te

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Similar,Similar (0.55),Similar,1.09,"#s The 30-year bond US30YT=RR dipped 14/32 for a yield of 4.26 percent from 4.23 percent. #/s #/s The 30-year bond US30YT=RR lost 16/32, taking its yield to 4.20 percent from 4.18 percent. #/s"
,,,,




<IPython.core.display.HTML object>
Raw review: ['Federal DOMA does not test the full faith and credit clause so much as the 9th and 10th Amendments to the Constitution.', 'DOMA as a congressional statute could not violate the Full Faith and Credit Clause of the Constitution.']
GT target: 4.0
pred_prob 3.4599828720092773


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [01:15<00:00,  7.59s/it]


word attr tensor([-0.0632,  0.2512,  0.0300, -0.1477, -0.0361,  0.0959,  0.1206, -0.0289,
         0.0145,  0.0195,  0.3253,  0.1504,  0.0397, -0.0532,  0.2386, -0.0006,
         0.0435, -0.0532, -0.0576, -0.0597, -0.1621, -0.3222,  0.1676,  0.0423,
        -0.2247,  0.2495,  0.2066, -0.0590, -0.2095, -0.0159, -0.0346,  0.2316,
         0.0626, -0.0314, -0.2038,  0.1606, -0.0311,  0.0537,  0.0673, -0.0298,
         0.0090,  0.2700,  0.0402, -0.0814,  0.1018, -0.1415,  0.2482, -0.0631,
         0.0758])
conti attr [tensor(-0.0632), tensor(0.2512), tensor(-0.1177), tensor(-0.0361), tensor(0.0959), tensor(0.1206), tensor(-0.0289), tensor(0.0145), tensor(0.0195), tensor(0.3253), tensor(0.1504), tensor(0.0397), tensor(-0.0532), tensor(0.2386), tensor(-0.0006), tensor(0.0435), tensor(-0.1107), tensor(-0.0597), tensor(-0.4843), tensor(0.1676), tensor(0.0423), tensor(-0.2247), tensor(0.4561), tensor(-0.0590), tensor(-0.2095), tensor(-0.0505), tensor(0.2316), tensor(0.0626), tensor(-0.0314), te

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Similar,Similar (0.69),Similar,1.21,#s Federal DOMA does not test the full faith and credit clause so much as the 9th and 10th Amendments to the Constitution. #/s #/s DOMA as a congressional statute could not violate the Full Faith and Credit Clause of the Constitution. #/s
,,,,




<IPython.core.display.HTML object>
Raw review: ['I was invited in to pitch story ideas to Ron Moore at ST:TNG.', "It's obviously possible for it to leave the field without harm - it occurred."]
GT target: 0.0
pred_prob 0.0472458079457283


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:53<00:00,  5.31s/it]


word attr tensor([ 0.2749,  0.0749,  0.0277, -0.0510, -0.0074,  0.0220,  0.0710,  0.0712,
        -0.2365, -0.0519, -0.1951,  0.2374, -0.1265,  0.1180, -0.0891,  0.0251,
        -0.4945,  0.1194, -0.1474, -0.1560, -0.1886, -0.0207,  0.1890,  0.0100,
        -0.3513,  0.0046, -0.1304,  0.2047,  0.0507,  0.1628,  0.0265,  0.1154,
         0.0208, -0.1531,  0.0674, -0.1722,  0.2304])
conti attr [tensor(0.2749), tensor(0.0749), tensor(0.0277), tensor(-0.0510), tensor(-0.0074), tensor(0.0220), tensor(0.0710), tensor(0.0712), tensor(-0.2365), tensor(-0.0519), tensor(-0.1951), tensor(0.2374), tensor(-0.1265), tensor(-0.3211), tensor(-0.1474), tensor(-0.1560), tensor(-0.2093), tensor(0.1890), tensor(0.0100), tensor(-0.3513), tensor(0.0046), tensor(-0.1304), tensor(0.2047), tensor(0.0507), tensor(0.1628), tensor(0.0265), tensor(0.1154), tensor(0.0208), tensor(-0.1531), tensor(-0.1049), tensor(0.2304)]
detokenized ['<s>', 'I', 'was', 'invited', 'in', 'to', 'pitch', 'story', 'ideas', 'to', 'Ron',

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Not Similar,Not Similar (0.01),Not Similar,-0.45,#s I was invited in to pitch story ideas to Ron Moore at ST:TNG. #/s #/s It's obviously possible for it to leave the field without harm - it occurred. #/s
,,,,




<IPython.core.display.HTML object>
Raw review: ['INTEL TODAY disclosed details of its next-generation XScale processor for mobile phones and handheld devices here in San Jose.', 'Intel on Wednesday unveiled its next-generation processor for cell phones, PDAs, and other wireless devices.']
GT target: 3.5999999046325684
pred_prob 3.724987804889679


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [01:17<00:00,  7.72s/it]


word attr tensor([ 0.4603, -0.0232,  0.2396,  0.1508, -0.0486, -0.1036, -0.1507, -0.1062,
         0.1921, -0.2518,  0.0444,  0.2671,  0.0648,  0.1807,  0.1042, -0.1372,
        -0.0613,  0.1665,  0.0005,  0.0747, -0.0195,  0.0288, -0.0470, -0.0979,
        -0.1146, -0.0990, -0.1759,  0.0461,  0.2143, -0.1000,  0.2016,  0.1078,
         0.1614,  0.0048, -0.1361,  0.1028,  0.0891, -0.1627,  0.1741,  0.1287,
        -0.0169,  0.0275,  0.0451,  0.2088,  0.0186,  0.0167, -0.1468,  0.0257,
         0.0077])
conti attr [tensor(0.4603), tensor(0.2163), tensor(0.1508), tensor(-0.0486), tensor(-0.1036), tensor(-0.1507), tensor(-0.1062), tensor(-0.0153), tensor(0.3319), tensor(0.1807), tensor(0.1042), tensor(-0.1372), tensor(-0.0613), tensor(0.1665), tensor(0.0005), tensor(0.0747), tensor(-0.0195), tensor(0.0288), tensor(-0.0470), tensor(-0.2125), tensor(-0.0990), tensor(-0.1759), tensor(0.0461), tensor(0.2143), tensor(-0.1000), tensor(0.2016), tensor(0.1078), tensor(0.0301), tensor(0.1028), ten

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Similar,Similar (0.74),Similar,1.56,"#s INTEL TODAY disclosed details of its next-generation XScale processor for mobile phones and handheld devices here in San Jose. #/s #/s Intel on Wednesday unveiled its next-generation processor for cell phones, PDAs, and other wireless devices. #/s"
,,,,




<IPython.core.display.HTML object>
Raw review: ['Like Chris said above, stars rotate to conserve their angular momentum.', "Stars are rotating, but that's not the cause for their stability."]
GT target: 3.4000000953674316
pred_prob 2.7909621596336365


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:46<00:00,  4.62s/it]


word attr tensor([-0.0033, -0.3297,  0.2984, -0.0259,  0.0076,  0.0050, -0.2183,  0.0264,
         0.1070, -0.0907,  0.1393, -0.0740, -0.1316,  0.0008,  0.0124,  0.1623,
        -0.1947,  0.1027,  0.1393, -0.1577,  0.0776,  0.2034, -0.1841, -0.0809,
         0.1864, -0.1486,  0.1452,  0.0008,  0.0245,  0.0514,  0.6349])
conti attr [tensor(-0.0033), tensor(-0.3297), tensor(0.2984), tensor(-0.0259), tensor(0.0127), tensor(-0.2183), tensor(0.0264), tensor(0.1070), tensor(-0.0907), tensor(0.1393), tensor(-0.0740), tensor(-0.1308), tensor(0.0124), tensor(0.1623), tensor(-0.1947), tensor(0.1027), tensor(-0.0184), tensor(0.0776), tensor(0.0192), tensor(-0.0809), tensor(0.1864), tensor(-0.1486), tensor(0.1452), tensor(0.0008), tensor(0.0759), tensor(0.6349)]
detokenized ['<s>', 'Like', 'Chris', 'said', 'above', ',', 'stars', 'rotate', 'to', 'conserve', 'their', 'angular', 'momentum', '.', '</s>', '</s>', 'Stars', 'are', 'rotating', ',', 'but', 'that', "'s", 'not', 'the', 'cause', 'for', 'their

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Similar,Similar (0.56),Similar,0.69,"#s Like Chris said above, stars rotate to conserve their angular momentum. #/s #/s Stars are rotating, but that's not the cause for their stability. #/s"
,,,,




<IPython.core.display.HTML object>
Raw review: ['A lady with a yellow outfit is in the air above the bed in a yellow room.', 'A dog with a red leash still attached chases over the grass toward a tennis ball.']
GT target: 0.0
pred_prob 0.04631649237126112


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:59<00:00,  5.91s/it]


word attr tensor([ 0.2524,  0.0017,  0.1350,  0.0368, -0.0008,  0.1146, -0.2886,  0.0824,
        -0.1959, -0.0140,  0.0875, -0.0320,  0.1304, -0.0413, -0.1999,  0.1658,
         0.1386,  0.1603,  0.2306, -0.1980,  0.0632, -0.0712, -0.2296, -0.1432,
        -0.3071,  0.0841,  0.0701,  0.1512, -0.0374, -0.0130, -0.2546, -0.0245,
        -0.2260,  0.0222, -0.4019,  0.0032, -0.0588,  0.1151,  0.0365,  0.1820])
conti attr [tensor(0.2524), tensor(0.0017), tensor(0.1350), tensor(0.0368), tensor(-0.0008), tensor(0.1146), tensor(-0.2886), tensor(0.0824), tensor(-0.1959), tensor(-0.0140), tensor(0.0875), tensor(-0.0320), tensor(0.1304), tensor(-0.0413), tensor(-0.1999), tensor(0.1658), tensor(0.1386), tensor(0.3909), tensor(-0.1980), tensor(0.0632), tensor(-0.0712), tensor(-0.2296), tensor(-0.1432), tensor(-0.3071), tensor(0.0841), tensor(0.0701), tensor(0.1512), tensor(-0.0374), tensor(-0.2677), tensor(-0.0245), tensor(-0.2260), tensor(0.0222), tensor(-0.4019), tensor(0.0032), tensor(-0.0588),

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Not Similar,Not Similar (0.01),Not Similar,-0.47,#s A lady with a yellow outfit is in the air above the bed in a yellow room. #/s #/s A dog with a red leash still attached chases over the grass toward a tennis ball. #/s
,,,,




<IPython.core.display.HTML object>
Raw review: ['A woman is adding oil on fishes.', 'The woman is cutting potatoes.']
GT target: 0.6000000238418579
pred_prob 1.113269180059433


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:26<00:00,  2.63s/it]


word attr tensor([ 0.4357, -0.1303,  0.3107,  0.1404,  0.1495, -0.2240,  0.2232, -0.0639,
        -0.2667,  0.3124, -0.5023, -0.0923, -0.2978,  0.0944, -0.0368,  0.1032,
        -0.1063, -0.0019])
conti attr [tensor(0.4357), tensor(-0.1303), tensor(0.3107), tensor(0.1404), tensor(0.1495), tensor(-0.2240), tensor(0.2232), tensor(-0.3307), tensor(0.3124), tensor(-0.5023), tensor(-0.0923), tensor(-0.2978), tensor(0.0944), tensor(-0.0368), tensor(-0.0031), tensor(-0.0019)]
detokenized ['<s>', 'A', 'woman', 'is', 'adding', 'oil', 'on', 'fishes', '.', '</s>', '</s>', 'The', 'woman', 'is', 'cutting', 'potatoes', '.', '</s>']
len conti_raw 16
conti_raw ['<s>', 'A', 'woman', 'is', 'adding', 'oil', 'on', 'fishes.', '</s>', '</s>', 'The', 'woman', 'is', 'cutting', 'potatoes.', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Not Similar,Not Similar (0.22),Not Similar,0.05,#s A woman is adding oil on fishes. #/s #/s The woman is cutting potatoes. #/s
,,,,




<IPython.core.display.HTML object>
Raw review: ['Train going down the tracks.', 'A black steam train traveling on the railroad tracks.']
GT target: 3.799999952316284
pred_prob 3.3534544706344604


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:29<00:00,  2.95s/it]


word attr tensor([ 0.7691,  0.0073,  0.0680, -0.2687, -0.0086, -0.0202,  0.0778, -0.0885,
        -0.0622, -0.1017,  0.0939, -0.1908,  0.2475, -0.1316, -0.0331, -0.2056,
        -0.1400,  0.1710,  0.0955, -0.2791])
conti attr [tensor(0.7691), tensor(0.0073), tensor(0.0680), tensor(-0.2687), tensor(-0.0086), tensor(0.0576), tensor(-0.0885), tensor(-0.0622), tensor(-0.1017), tensor(0.0939), tensor(-0.1908), tensor(0.2475), tensor(-0.1316), tensor(-0.0331), tensor(-0.2056), tensor(-0.1400), tensor(0.2665), tensor(-0.2791)]
detokenized ['<s>', 'Train', 'going', 'down', 'the', 'tracks', '.', '</s>', '</s>', 'A', 'black', 'steam', 'train', 'traveling', 'on', 'the', 'railroad', 'tracks', '.', '</s>']
len conti_raw 18
conti_raw ['<s>', 'Train', 'going', 'down', 'the', 'tracks.', '</s>', '</s>', 'A', 'black', 'steam', 'train', 'traveling', 'on', 'the', 'railroad', 'tracks.', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Similar,Similar (0.67),Not Similar,-0.0,#s Train going down the tracks. #/s #/s A black steam train traveling on the railroad tracks. #/s
,,,,




<IPython.core.display.HTML object>
Raw review: ['According to Shia Muslims specially Iran state and Hezbollah a country called Israel does not exist.', 'As with the definition of Palestine, the definition of Israel varies depending on whom you ask.']
GT target: 3.200000047683716
pred_prob 1.4565475285053253


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:58<00:00,  5.87s/it]


word attr tensor([ 0.6371, -0.0653,  0.0977, -0.0179,  0.0661, -0.0636,  0.1520, -0.3154,
         0.1848, -0.0298,  0.1408, -0.0772,  0.1383,  0.0044,  0.1264, -0.0054,
        -0.0448, -0.0467, -0.0711, -0.1607, -0.1963, -0.1385,  0.0864, -0.0067,
        -0.0166, -0.1833, -0.0600,  0.0222,  0.0901, -0.1306, -0.0808,  0.0034,
        -0.3213,  0.1408, -0.0396, -0.0198,  0.0519,  0.2213,  0.0801])
conti attr [tensor(0.6371), tensor(-0.0653), tensor(0.0977), tensor(-0.0179), tensor(0.0661), tensor(-0.0636), tensor(0.1520), tensor(-0.3154), tensor(0.1848), tensor(-0.0298), tensor(0.1408), tensor(-0.0772), tensor(0.1383), tensor(0.0044), tensor(0.1264), tensor(-0.0054), tensor(-0.0915), tensor(-0.0711), tensor(-0.1607), tensor(-0.1963), tensor(-0.1385), tensor(0.0864), tensor(-0.0067), tensor(-0.0166), tensor(-0.2433), tensor(0.0222), tensor(0.0901), tensor(-0.1306), tensor(-0.0808), tensor(0.0034), tensor(-0.3213), tensor(0.1408), tensor(-0.0396), tensor(-0.0198), tensor(0.2732), tensor

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Similar,Not Similar (0.29),Not Similar,0.15,"#s According to Shia Muslims specially Iran state and Hezbollah a country called Israel does not exist. #/s #/s As with the definition of Palestine, the definition of Israel varies depending on whom you ask. #/s"
,,,,


<IPython.core.display.HTML object>
Raw review: ['Absolute majority always defined by the institution, or the context.', 'In countries like India, when you have 2/3 of the seats of all parliament, you have absolute majority.']
GT target: 2.5999999046325684




pred_prob 1.6035784780979156


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [01:43<00:00, 10.35s/it]


word attr tensor([ 0.0716, -0.1194,  0.0327, -0.2198,  0.0566, -0.1447,  0.0173,  0.0812,
        -0.1568,  0.0160,  0.0903,  0.0764,  0.0104,  0.0444, -0.0184,  0.1528,
         0.0228,  0.0551, -0.3689, -0.0783,  0.2056,  0.1416, -0.2101,  0.0125,
         0.0105, -0.1144, -0.0515, -0.0862, -0.2186,  0.0320, -0.0027, -0.0241,
        -0.0883,  0.0561,  0.2137, -0.1088, -0.0267,  0.2400,  0.6132,  0.0887])
conti attr [tensor(0.0716), tensor(-0.0867), tensor(-0.2198), tensor(0.0566), tensor(-0.1447), tensor(0.0173), tensor(0.0812), tensor(-0.1409), tensor(0.0903), tensor(0.0764), tensor(0.0547), tensor(-0.0184), tensor(0.1528), tensor(0.0228), tensor(0.0551), tensor(-0.3689), tensor(0.1273), tensor(0.1416), tensor(-0.2101), tensor(0.0125), tensor(-0.1554), tensor(-0.0862), tensor(-0.2186), tensor(0.0320), tensor(-0.0027), tensor(-0.0241), tensor(-0.0323), tensor(0.2137), tensor(-0.1088), tensor(-0.0267), tensor(0.8532), tensor(0.0887)]
detokenized ['<s>', 'Abs', 'olute', 'majority', 'a

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Similar,Not Similar (0.32),Not Similar,0.3,"#s Absolute majority always defined by the institution, or the context. #/s #/s In countries like India, when you have 2/3 of the seats of all parliament, you have absolute majority. #/s"
,,,,


<IPython.core.display.HTML object>
Raw review: ['Two little girls are talking on the phone.', 'A little girl is walking down the street.']
GT target: 0.5




pred_prob 0.6913229078054428


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:58<00:00,  5.89s/it]


word attr tensor([ 0.1439, -0.1391, -0.0235, -0.2067, -0.3409, -0.0982,  0.2655,  0.1318,
         0.0599, -0.0820,  0.0593,  0.1548,  0.0969,  0.3083, -0.0490, -0.2738,
         0.4381, -0.1529, -0.0288, -0.5075,  0.0477,  0.0860])
conti attr [tensor(0.1439), tensor(-0.1391), tensor(-0.0235), tensor(-0.2067), tensor(-0.3409), tensor(-0.0982), tensor(0.2655), tensor(0.1318), tensor(-0.0221), tensor(0.0593), tensor(0.1548), tensor(0.0969), tensor(0.3083), tensor(-0.0490), tensor(-0.2738), tensor(0.4381), tensor(-0.1529), tensor(-0.0288), tensor(-0.4598), tensor(0.0860)]
detokenized ['<s>', 'Two', 'little', 'girls', 'are', 'talking', 'on', 'the', 'phone', '.', '</s>', '</s>', 'A', 'little', 'girl', 'is', 'walking', 'down', 'the', 'street', '.', '</s>']
len conti_raw 20
conti_raw ['<s>', 'Two', 'little', 'girls', 'are', 'talking', 'on', 'the', 'phone.', '</s>', '</s>', 'A', 'little', 'girl', 'is', 'walking', 'down', 'the', 'street.', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Not Similar,Not Similar (0.14),Not Similar,-0.11,#s Two little girls are talking on the phone. #/s #/s A little girl is walking down the street. #/s
,,,,


<IPython.core.display.HTML object>
Raw review: ["The report shows that drugs sold in Canadian pharmacies are manufactured in facilities approved by Health Canada - the FDA's counterpart in Canada.", 'The report shows that drugs sold in Canadian pharmacies are manufactured in facilities approved by Health Canada, which serves a similar role as the FDA for the Canadian government.']
GT target: 5.0




pred_prob 4.7438859939575195


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [02:48<00:00, 16.82s/it]


word attr tensor([ 0.1478,  0.1159, -0.0194,  0.0994, -0.0875, -0.1664,  0.1090,  0.2981,
         0.2706,  0.2562, -0.1326, -0.1714,  0.1751,  0.1897,  0.1941,  0.0196,
         0.0176,  0.0377, -0.0956,  0.1573,  0.1947,  0.0128,  0.1369,  0.0159,
        -0.0101,  0.0717,  0.0101,  0.1401,  0.0864,  0.0763,  0.1084,  0.1738,
        -0.0109, -0.0311,  0.0676,  0.1092,  0.3786,  0.0672, -0.1606,  0.0899,
         0.0277, -0.0225, -0.1004, -0.1593,  0.1047,  0.0241, -0.0256, -0.2157,
        -0.0033, -0.0552, -0.0033, -0.0039,  0.0645,  0.0081, -0.0181, -0.1640,
         0.0770,  0.0753,  0.0982, -0.0664])
conti attr [tensor(0.1478), tensor(0.1159), tensor(-0.0194), tensor(0.0994), tensor(-0.0875), tensor(-0.1664), tensor(0.1090), tensor(0.2981), tensor(0.2706), tensor(0.2562), tensor(-0.1326), tensor(-0.1714), tensor(0.1751), tensor(0.1897), tensor(0.1941), tensor(0.0196), tensor(0.0176), tensor(0.0377), tensor(-0.0956), tensor(0.1573), tensor(0.2075), tensor(0.1369), tensor(0.0159),

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Similar,Similar (0.95),Similar,2.58,"#s The report shows that drugs sold in Canadian pharmacies are manufactured in facilities approved by Health Canada - the FDA's counterpart in Canada. #/s #/s The report shows that drugs sold in Canadian pharmacies are manufactured in facilities approved by Health Canada, which serves a similar role as the FDA for the Canadian government. #/s"
,,,,


<IPython.core.display.HTML object>
Raw review: ["Foreign companies eye new 'opening-up'", 'More carriers likely in China']
GT target: 0.6000000238418579
pred_prob 0.07349737454205751


Kernel Shap attribution: 100%|██████████████████████████████████████████████████████████| 10/10 [00:49<00:00,  4.93s/it]


word attr tensor([ 0.7036,  0.0131, -0.0998,  0.2596, -0.2374,  0.1340, -0.0583, -0.0817,
        -0.1383, -0.0955,  0.2730, -0.1780, -0.3772, -0.1320, -0.1565,  0.1554,
         0.0135,  0.0025])
conti attr [tensor(0.7036), tensor(0.0131), tensor(-0.0998), tensor(0.2596), tensor(-0.2374), tensor(-0.2398), tensor(0.2730), tensor(-0.1780), tensor(-0.3772), tensor(-0.1320), tensor(-0.1565), tensor(0.1554), tensor(0.0135), tensor(0.0025)]
detokenized ['<s>', 'Foreign', 'companies', 'eye', 'new', "'", 'opening', '-', 'up', "'", '</s>', '</s>', 'More', 'carriers', 'likely', 'in', 'China', '</s>']
len conti_raw 14
conti_raw ['<s>', 'Foreign', 'companies', 'eye', 'new', "'opening-up'", '</s>', '</s>', 'More', 'carriers', 'likely', 'in', 'China', '</s>']


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Not Similar,Not Similar (0.01),Not Similar,-0.0,#s Foreign companies eye new 'opening-up' #/s #/s More carriers likely in China #/s
,,,,


<IPython.core.display.HTML object>


In [9]:
save_info(idxs, stsb_data_raw, targets, model_out_list, raw_attr_list, conti_attr_list, raw_input_list, fname='kernel_shap_out.pkl')

{'indices': [436,
  138,
  931,
  1462,
  1491,
  577,
  873,
  899,
  268,
  603,
  1341,
  648,
  941,
  901,
  335,
  1306,
  234,
  132,
  1257,
  1433,
  1351,
  1484,
  977,
  146,
  1275,
  225,
  396,
  757,
  949,
  1151,
  605,
  1041,
  350,
  788,
  626,
  88,
  599,
  1080,
  882,
  927,
  1209,
  647,
  486,
  214,
  317,
  895,
  897,
  50,
  1054,
  1454],
 'raw_data': [['Two dogs are sniffing something in the rocks.',
   'Two dogs running down a path in the woods.'],
  ['A woman is slicing a potato.', 'A woman is slicing carrot.'],
  ['It was before the season began because Amy saw the woman before the Silence took her.',
   "The exact timeline hasn't been listed yet, but there are a few assumptions we can make."],
  ['Protests for kidnapped girls banned in Nigerian capital',
   "Scores 'killed' in Boko Haram raid in Nigeria"],
  ['Oil falls in Asian trade', 'Oil prices down in Asian trade'],
  ['There are people out on the street.', 'People are out on the street.'],
 