In [1]:
import pickle
import json
import numpy as np
import pandas as pd
import torch
from nltk.metrics.agreement import AnnotationTask
import analysis_constants as ac

In [2]:
def load_out_and_results_files(frame_name):
    frame_out_name = f'{frame_name}_out.pkl'
    frame_res_name =  f'{frame_name}_results.csv'
    frame_out = {}
    with open(f'{frame_out_name}', 'rb') as f:
        frame_out = pickle.load(f)
    frame_res = pd.read_csv(f'{frame_res_name}')
#     print(f'{frame_res.columns}')
    return frame_out, frame_res

In [3]:
#process frame_out's information to be used by calc score
def get_organized_frame_out(frame_out):
    #get features (words)
    organized = {'feats_pos': [], 'feats_neg': [], 'N_Chunks': []}
    for i, (ri, ca) in enumerate(zip(frame_out['raw_input_list'], frame_out['conti_attr_list'])):
        #subout beginnining <s> and end </s> tokens for ['BEGIN'] and ['END']
        raw_input_i = ['[BEGIN]'if f == '<s>' else '[END]' if f == '</s>' else f for f in ri]
        attr_appearance_cutoff = 5e-2
        ca = ca.to(torch.float32)
        
        #filtering out by zeroing non-appearing features
        ca_i = torch.where(torch.abs(ca) < attr_appearance_cutoff, torch.zeros(1), ca) 
        
        #get positive and negative features
        ca_i_pos = torch.where(ca_i > 0, ca_i, torch.zeros(1))
        ca_i_neg = torch.where(ca_i < 0, ca_i, torch.zeros(1))
        
        try:
            #get idx of pos/neg identified feature
            ca_i_pos_idx = torch.nonzero(ca_i_pos).squeeze().numpy()
            ca_i_neg_idx = torch.nonzero(ca_i_neg).squeeze().numpy() 
            #don't account for empty ''s or empty arrays
            features_pos = [raw_input_i[idx] for idx in ca_i_pos_idx if ri[idx] != ''] 
            features_neg = [raw_input_i[idx] for idx in ca_i_neg_idx if ri[idx] != ''] 
        except TypeError: #TypeError: iteration over a 0-d array
            #         print(f'i: {i}')
            #         print(f'{ri}')
            #         print(f'i: {raw_input_i}')
            #         print(f'{ca}')
            #         print(f'{ca_i}')
            #         print(f'{ca_i_idx}')
            #         print(f'features frame {features_frame}')
            #         print(f'N_Chunks {N_Chunks}')
            #         print(organized)
            features_pos = []
            features_neg = []
            

        
            
        organized['feats_pos'].append(features_pos)
        organized['feats_neg'].append(features_neg)
        
        N_cs = len(features_pos) + len(features_neg)
        organized['N_Chunks'].append(N_cs)
        
#      
    frame_out.update(organized)
    return frame_out
        

In [4]:
# process frame_res to be used by calc_score
def get_organized_frame_res(frame_res, yhs_func, task_name):
    organized = {'yh': [], 'non_neg': [], 'non_pos': [], 'should_neg': [], 'should_pos': [], 'trust_numbers': []}
    for x in frame_res.groupby(f'Input.{task_name}_number'):  # edit this groupby thing for different tasks
        task_answers = x[1]['Answer.taskAnswers']
        yhs, trusts = [], []
        non_red_ins, non_green_ins, fiat_red_ins, fiat_green_ins = [], [], [], []
        for answer_string in task_answers:  # edit this portion below to adapt
            json_obj = json.loads(answer_string)[0]
#             print(f'json obj {json_obj}')
            yh = yhs_func(json_obj)
            yhs.append(yh)
            trust = json_obj['trust_number'] if 'trust_number' in json_obj else 0         
            trusts.append(trust)
            
            if 'non_red_in' in json_obj.keys():
                non_red_ins.append([k.strip() for k in json_obj['non_red_in'].split(',')])
            else:
                non_red_ins.append([])

            if 'non_green_in' in json_obj.keys():
                non_green_ins.append([k.strip() for k in json_obj['non_green_in'].split(',')])
            else:
                non_green_ins.append([])

            if 'fiat_red_in' in json_obj.keys():
                fiat_red_ins.append([k.strip() for k in json_obj['fiat_red_in'].split(',')])
            else:
                fiat_red_ins.append([])

            if 'fiat_green_in' in json_obj.keys():
                fiat_green_ins.append([k.strip() for k in json_obj['fiat_green_in'].split(',')])
            else:
                fiat_green_ins.append([])

        organized['yh'].append(yhs)
        organized['non_neg'].append(non_red_ins)
        organized['non_pos'].append(non_green_ins)
        organized['should_neg'].append(fiat_red_ins)
        organized['should_pos'].append(fiat_green_ins)
        organized['trust_numbers'].append(trusts)
        
    # see agreement rate of turks:
    # assume 3 annotators
        
    annotation_triples = []
    for i, y_res in enumerate(organized['yh'], start=1):
        if len(y_res) != 3:
            print(f'i:{i}, don\'t have 3 answers')
            continue
        a1 = ('a1', str(i), y_res[0])
        a2 = ('a2', str(i), y_res[1])
        a3 = ('a3', str(i), y_res[2])
        annotation_triples.append(a1)
        annotation_triples.append(a2)
        annotation_triples.append(a3)      
    annotation_task = AnnotationTask(annotation_triples)
    average_ao = annotation_task.avg_Ao()
    return organized, average_ao

In [5]:
#functions to parse out the annotator's result from the json object for each task
def sst2_sentiment_function(json_obj):
    sentiment = 1 if json_obj['sentiment_radio']['1'] else 0
    return sentiment

def stsb_similarity_function(json_obj):
    similarity = json_obj['similarity'] if 'similarity' in json_obj else 2.5
    similarity = 1 if float(similarity) > 2.5 else 0
    return similarity

def qnli_entailment_function(json_obj):
    entailment = 1 if json_obj['entailment_radio']['1'] else 0
    return entailment

yhs_function_list = [sst2_sentiment_function, stsb_similarity_function, qnli_entailment_function]

In [6]:
def proc_frame_out_and_res(task_name,  
                          yhs_func,
    frame_names=['deeplift', 'guided_backprop', 'input_x_gradients', 'integrated_gradients', 'kernel_shap', 'lime']):
    
    path_to_task_files = ac.path_to_task_files(task_name)
    avg_aos = []
    for frame_name in frame_names:
        print(f'current task: {task_name}, current framework: {frame_name}')
        path_to_frame=f'{path_to_task_files}/{frame_name}'
        frame_out, frame_results = load_out_and_results_files(path_to_frame)
        org_frame_results, avg_ao = get_organized_frame_res(frame_results, yhs_func, task_name)
        avg_aos.append(avg_ao)
        org_frame_out = get_organized_frame_out(frame_out)    
        
        with open(f'{path_to_task_files}/{frame_name}_out{ac.process_suffix}.pkl', 'wb') as f:
            pickle.dump(org_frame_out, f)
        with open(f'{path_to_task_files}/{frame_name}_results{ac.process_suffix}.pkl', 'wb') as f:
            pickle.dump(org_frame_results, f)
            
    print(f'average average agreement {np.mean(avg_aos):.2f}')
    return frame_names

In [7]:
def task_frame_process_wrapper(tasks_names, yhs_function_list):
        for (task_name, yhs_function) in zip(tasks_names, yhs_function_list):
            proc_frame_out_and_res(task_name, yhs_function)


In [8]:
task_frame_process_wrapper(ac.task_names, yhs_function_list)

current task: sst2, current framework: deeplift
current task: sst2, current framework: guided_backprop
current task: sst2, current framework: input_x_gradients
current task: sst2, current framework: integrated_gradients
current task: sst2, current framework: kernel_shap
current task: sst2, current framework: lime
average average agreement 0.65
current task: stsb, current framework: deeplift
current task: stsb, current framework: guided_backprop
current task: stsb, current framework: input_x_gradients
current task: stsb, current framework: integrated_gradients
current task: stsb, current framework: kernel_shap
current task: stsb, current framework: lime
average average agreement 0.83
current task: qnli, current framework: deeplift
current task: qnli, current framework: guided_backprop
current task: qnli, current framework: input_x_gradients
current task: qnli, current framework: integrated_gradients
current task: qnli, current framework: kernel_shap
current task: qnli, current framework