In [1]:
import sys, time, pickle, torch
sys.path.insert(0, '../../Models')
sys.path.insert(0, '../../Utils')
sys.path.insert(0, '../../Preprocess')
import torch
import numpy as np
import pandas as pd
from transformers import RobertaTokenizerFast, RobertaForSequenceClassification
from _utils import sample_random_glue_sst2, get_continuation_mapping, get_continuous_attributions, get_continuous_raw_inputs

In [2]:
reviews_raw, targets, idxs = sample_random_glue_sst2()

Reusing dataset glue (/home/user/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


  0%|          | 0/3 [00:00<?, ?it/s]

Loading cached processed dataset at /home/user/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-3b24abff24d1d8c0.arrow
Loading cached processed dataset at /home/user/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-5960909ab3834668.arrow


In [3]:
tokenizer = RobertaTokenizerFast.from_pretrained('siebert/sentiment-roberta-large-english')

In [4]:
class Model_Wrapper(torch.nn.Module):
    def __init__(self):
        super(Model_Wrapper, self).__init__()
        self.model = RobertaForSequenceClassification.from_pretrained('siebert/sentiment-roberta-large-english')
    
    def get_embeddings(self, input_ids):
        return self.model.roberta.embeddings(input_ids)
    
    def forward(self, embeddings):        
        encoder_outputs = self.model.roberta.encoder(embeddings)
        sequence_output = encoder_outputs[0]
#         pooled_output = self.model.roberta.pooler(sequence_output) if self.model.roberta.pooler is not None else None
#         roberta_outputs = (sequence_output, pooled_output) + encoder_outputs[1:]
        
        logits = self.model.classifier(sequence_output)
#         print(f'logits {logits} {logits.size()} {logits.dtype} {logits.requires_grad}')
        pred_prob = torch.softmax(logits, dim=1)[:, 1]
        #get the item at idx 1 because it corresponds to probability of being positive
        
        return pred_prob

In [5]:
model = Model_Wrapper()

In [6]:
#define some containers to save some info
model_out_list, attr_list, wrd_attr_list = [], [], []

In [7]:
from captum.attr import DeepLift
from captum.attr import visualization 

In [8]:
# lime = Lime(forward_func=model.forward)
deep_lift = DeepLift(model)

In [10]:
def generate_record(raw_review, target):
    #tokenizer operations
    tokenized = tokenizer(raw_review, truncation=True, return_offsets_mapping=True)
    offset_mapping = tokenized['offset_mapping']
    conti_map = get_continuation_mapping(offset_mapping)
    input_ids = torch.tensor(tokenized['input_ids']).unsqueeze(0)
    detokenized = [t.replace('Ġ', '') for t in tokenizer.convert_ids_to_tokens(input_ids[0])]
    
    #feeding input forward 
    input_emb = model.get_embeddings(input_ids)
    pred_prob = model(input_emb).item()
    
    #categorizing results
    pred_class = 'Pos' if pred_prob > 0.5 else 'Neg' 
    true_class = 'Pos' if target > 0.5 else 'Neg' 
    
    #attribution algorithm working
    attribution, delta = deep_lift.attribute(input_emb, return_convergence_delta=True)
    word_attributions = attribution.squeeze(0).sum(dim=1)
    word_attributions /= torch.norm(word_attributions)
    attr_score = torch.sum(word_attributions)
    attr_class = 'Pos' if attr_score > 0.5 else 'Neg'
    convergence_score = delta
    
    
    #re-organizing tensors and arrays because words get split down
    conti_attr = get_continuous_attributions(conti_map, word_attributions)
    raw_input = get_continuous_raw_inputs(conti_map, detokenized)

#     print(f'word attributions {word_attributions}')
#     print(f'pred_prob {pred_prob}')
#     print(f'pred_class {pred_class}')
#     print(f'true_class {true_class}')
#     print(f'attribution {attribution}')
#     print(f'attr_class {attr_class}')
#     print(f'attr_score {attr_score}')
#     print(f'raw_input {raw_input}')

        
#     collect info for metrics later
#     collect_info_for_metric(model_out_list, pred_prob, attr_list, attribution, wrd_attr_list, wrd_attr_dict)
        
    
    visual_record = visualization.VisualizationDataRecord(word_attributions=word_attributions,
                                                         pred_prob=pred_prob,
                                                         pred_class=pred_class,
                                                         true_class=true_class,
                                                         attr_class=attr_class,
                                                         attr_score=attr_score,
                                                         raw_input=raw_input,
                                                         convergence_score=convergence_score)
        
        
    return visual_record
      
    

In [11]:
for i, (rev_raw, target) in enumerate(zip(reviews_raw, targets), start=1):
    print(f'Raw review: {rev_raw}')
    print(f'GT target: {target}')
    visual_record=generate_record(rev_raw, target)
    print(visualization.visualize_text([visual_record]))
    break

Raw review: against all odds 
GT target: 1


               activations. The hooks and attributes will be removed
            after the attribution is finished


asd 6


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
Pos,Pos (0.99),Neg,-0.45,#s against all odds #/s
,,,,


<IPython.core.display.HTML object>
