In [32]:
from captum.attr import IntegratedGradients,LayerIntegratedGradients,TokenReferenceBase, visualization
import torch
import torch.nn as nn
from label_dict import label_emo_map
from transformers import BertTokenizer,AutoTokenizer

class model_wrapper(nn.Module):

    def __init__(self,model):
        super(model_wrapper, self).__init__()

        self.softmax = nn.Softmax(dim=1)
        self.model = model
    def forward(self, text): #here text is utterance based on the input type specified

        output = self.model(text)
        return self.softmax(output)

def add_attributions_to_visualizer(attributions, text, pred, pred_ind, label, delta):
    attributions = attributions.sum(dim=2).squeeze(0)
    attributions = attributions / torch.norm(attributions)
    attributions = attributions.cpu().detach().numpy()
    # storing couple samples in an array for visualization purposes
    return visualization.VisualizationDataRecord(
                            attributions,
                            pred,
                            label_emo_map[pred_ind],
                            label_emo_map[label],
                            label_emo_map[pred_ind],
                            attributions.sum(),
                            text,
                            delta)

def explain_model(model,binput_ids,btarget,binput_str,bpred_ind,bpred_softmax):
    tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
    vis_data_records_ig = []
    token_reference = TokenReferenceBase(reference_token_idx=0)
    model = model.cpu()
    model_explain = model_wrapper(model)
    
    # print(binput_str)
    print(binput_ids[0])
#     input_id = binput_ids[i,:].unsqueeze(0).cpu()
    seq_len = len(input_id.squeeze(0).tolist())
    target = btarget[i].item()
    pred_ind  = bpred_ind[i].item()
    pred_softmax = bpred_softmax[i,:][pred_ind].item()
    input_str = tokenizer.tokenize("".join(binput_str[i]))

    if label_emo_map[target] == "sentimental" or label_emo_map[target] == "nostalgic":
        if label_emo_map[pred_ind] == "nostalgic" or label_emo_map[pred_ind] == "sentimental":
            device = torch.device("cpu")
            reference_ids = token_reference.generate_reference(seq_len,device=device).unsqueeze(0)
            ig =LayerIntegratedGradients(model_explain,model_explain.model.encoder.bert.embeddings)
            attributions, delta = ig.attribute(input_id,reference_ids,target=target,n_steps=10,return_convergence_delta=True)

            # print('pred: ', label_emo_map[pred_ind], '(', '%.2f'%pred_softmax, ')', ', delta: ', abs(delta))
            vis_data_records_ig.append(add_attributions_to_visualizer(attributions,input_str,pred_softmax, pred_ind,target, delta))

    if len(vis_data_records_ig) != 0:
        visualization.visualize_text(vis_data_records_ig)

In [33]:
import os
import time
import shutil
import time
import json
import random
import numpy as np
from easydict import EasyDict as edict
import argparse
from sklearn.metrics import classification_report,f1_score
import pickle
## torch packages
import torch
import torch.nn.functional as F
from torch.autograd import Variable
import torch.nn as nn

## for visulisation
import matplotlib.pyplot as plt
from IPython.display import clear_output

## custom
from select_model_input import select_model,select_input
import dataset
from label_dict import emo_label_map,label_emo_map,class_names,class_indices
# from xai_emo_rec import explain_model

torch.manual_seed(0)
np.random.seed(0)
random.seed(0)
torch.backends.cudnn.deterministic = False

def get_pred_softmax(logits):
    softmax_layer = nn.Softmax(dim=1)
    return softmax_layer(logits)

def eval_model(model, val_iter, loss_fn,config,arch_name,mode="train",explain=False):

    confusion = config.confusion
    per_class = config.per_class
    y_true = []
    y_pred = []
    total_epoch_loss = 0
    total_epoch_acc = 0
    total_epoch_acc3 = 0

    eval_batch_size = 1

    if confusion:
        conf_matrix = torch.zeros(config.output_size, config.output_size)
    if per_class:
           class_correct = list(0. for i in range(config.output_size))
           class_total = list(0. for i in range(config.output_size))

    model.eval()
    with torch.no_grad():
        for idx, batch in enumerate(val_iter):
            model = model.cuda()
            text, attn,target = select_input(batch,config,arch_name)
            target = torch.autograd.Variable(target).long()

            if torch.cuda.is_available():
                if arch_name=="a_bert":
                    text = [text[0].cuda(),text[1].cuda()]
                    attn = attn.cuda()
                elif arch_name == "va_bert":
                    text = [text[0].cuda(),text[1].cuda(),text[2].cuda()]
                    attn = attn.cuda()
                elif arch_name == "vad_bert" or arch_name=="kea_bert" or arch_name == "self_attn_bert":
                    text = [text[0].cuda(),text[1].cuda(),text[2].cuda(),text[3].cuda()]
                    attn = attn.cuda()
                else:
                    text = text.cuda()
                    attn = attn.cuda()
                target = target.cuda()

            prediction = model(text,attn)

            correct = np.squeeze(torch.max(prediction, 1)[1].eq(target.view_as(torch.max(prediction, 1)[1])))
            pred_ind = torch.max(prediction, 1)[1].view(target.size()).data

            if mode == "explain":
                pred_softmax = get_pred_softmax(prediction)
                explain_model(model,text,target.data,batch["utterance_data_str"],pred_ind,pred_softmax) ## use jupyter-notebook while doing explainations
            else:
                if confusion:
                    for t, p in zip(target.data, pred_ind):
                            conf_matrix[t.long(), p.long()] += 1
                if per_class:
                    label = target[0]
                    class_correct[label] += correct.item()
                    class_total[label] += 1

                loss = loss_fn(prediction, target)

                num_corrects = (pred_ind == target.data).sum()
                y_true.extend(target.data.cpu().tolist())
                y_pred.extend(pred_ind.cpu().tolist())

                acc = 100.0 * num_corrects/eval_batch_size
                acc3 = accuracy_topk(prediction, target, topk=(3,))
                total_epoch_loss += loss.item()
                total_epoch_acc += acc.item()
                total_epoch_acc3 += acc3

        if confusion:
            import seaborn as sns
            sns.heatmap(conf_matrix, annot=True,xticklabels=list(emo_label_map.keys()),yticklabels=list(emo_label_map.keys()),cmap='Blues')

            plt.show()
        if per_class:
            for i in range(config.output_size):
                print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
                label_emo_map[i], 100 * class_correct[i] / class_total[i],
                np.sum(class_correct[i]), np.sum(class_total[i])))

    if mode != "explain":

        f1_score_e = f1_score(y_true, y_pred, labels=class_indices,average='macro')
        f1_score_w = f1_score(y_true, y_pred, labels=class_indices,average='weighted')
        return total_epoch_loss/len(val_iter), total_epoch_acc/len(val_iter),f1_score_e,f1_score_w,total_epoch_acc3/len(val_iter)



def load_model(resume,model,optimizer):
    
    
    checkpoint = torch.load(resume)
    start_epoch = checkpoint['epoch']
    model.load_state_dict(checkpoint['state_dict'])
    model = model.cuda()
    model.eval()
    # optimizer.load_state_dict(checkpoint['optimizer']) ## during retrain TODO

    return model,optimizer,start_epoch
    

def call_eval(resume_path,mode,rem_epoch=10,patience=10):

    ## Load the resume model parameters  
    log_path = resume_path.replace("model_best.pth.tar","log.json")
    with open(log_path,'r') as f:
        log = json.load(f)
    f.close()
    
    ## Initialising parameters
    learning_rate = log["param"]["learning_rate"]
    batch_size = log["param"]["batch_size"]
    input_type = log["param"]["input_type"]
    arch_name = log["param"]["arch_name"]
    hidden_size = log["param"]["hidden_size"]
    embedding_length = log["param"]["embedding_length"]
    output_size = log["param"]["output_size"]
    tokenizer = log["param"]["tokenizer"]
    embedding_type = log["param"]["embedding_type"]

    ## Loading data
    print('Loading dataset')
    start_time = time.time()
    vocab_size, word_embeddings,train_iter, valid_iter ,test_iter= dataset.get_dataloader(batch_size,tokenizer,embedding_type,arch_name)
    finish_time = time.time()
    print('Finished loading. Time taken:{:06.3f} sec'.format(finish_time-start_time))

    eval_config = edict(log["param"])
    eval_config.resume_path = resume_path

    if mode == "explain":
        model = select_model(eval_config,arch_name,vocab_size,word_embeddings,grad_check=False)
    else:
        model = select_model(eval_config,arch_name,vocab_size,word_embeddings)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()),lr=learning_rate)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,eval_config.step_size, gamma=0.5)

    model,optimizer,start_epoch = load_model(resume_path,model,optimizer)

        
    print(f'Train Acc: {log["train_acc"]:.3f}%, Valid Acc: {log["valid_acc"]:.3f}%, Test Acc: {log["test_acc"]:.3f}%')

    eval_config.confusion = False
    eval_config.per_class = False

    ## explaining
    eval_model(model, test_iter,loss_fn,eval_config,arch_name,mode,explain=True)


The below section shows the word importances of sentimental and nostalgic!

In [34]:

call_eval("/home/ashvar/varsha/Emotion-Recognition/save/speaker+listener/kea_bert/2020_11_02_15_47_09/model_best.pth.tar"
,"explain")


Loading dataset
Finished loading. Time taken:01.817 sec


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Train Acc: 61.888%, Valid Acc: 55.848%, Test Acc: 54.024%
[tensor([[  101,  3398,  2055,  2184,  2086,  3283,  1045,  2018,  1037,  7570,
         18752, 14116,  3325,  1012,  2009,  2001,  2531,  1003,  2037,  6346,
          2021,  2027,  2718,  1996,  2300, 13826,  1998,  5175,  1012,  2027,
          2018,  2053,  6441,  2021,  2027,  2471,  2743,  2033,  2125,  1996,
          2346,  1012,   102,  2106,  2017,  9015,  2151,  6441,  1029,   102,
          2053,  1045,  2347,  1005,  1056,  2718,  1012,  2009,  2357,  2041,
          2027,  2020,  7144,  1012,  1045,  2371,  5905,  2021,  3651,  2009,
          2001,  2010,  6346,  1012,   102,  2339,  2106,  2017,  2514,  5905,
          1029,  2111,  2428,  5807,  1005,  1056,  3298,  7144,  1012,   102,
          1045,  2123,  1005,  1056,  2113,  1045,  2001,  2047,  2000,  4439,
          1998,  2910,  1005,  1056,  5281,  2505,  2066,  2008,  1012,  1045,
          2371,  2066,  2026,  7109,  2081,  2032, 25430,  2121,  3726, 

NameError: name 'input_id' is not defined