In [None]:
import os
import pandas as pd
from src.create_datasets_manual_evaluation import create_verbs_dataset, create_nouns_dataset, create_roles_dataset

# 1. Create datasets

In [None]:
data_dir = 'workdir/manual_evaluation/data'


### Verbs

In [None]:
gold_df, new_df = create_verbs_dataset(results_path='/raid/anwar/generative-ie/workdir/results/test-paper_verbs_st_pattern_vocabfilter',
                          predictor=("xlnet_embs","gie_swv_test_semiPURExlnet_embs_swvhypers"),
                          gold_dataset_path='workdir/data/swv_gold_dataset.pkl',
                          test_indexes_path='workdir/data/swv_gold_dataset_test_split.json',
                          frame_description_file='workdir/framenet_data/frame_info.json')


new_df

In [None]:
# new_df.to_csv(f'{data_dir}/verbs-xlnet_embs_k10.csv', index=False)
# gold_df.to_pickle(f'{res_dir}/gold_dataset_verbs.pkl')


### NOUNS

In [None]:
gold_df, new_df = create_nouns_dataset(results_path='/raid/anwar/generative-ie/workdir/results/test-paper_nouns_st_pattern_stopwords_nounfilter',
                          predictor=("xlnet_embs","gie_swn_test_semiPURExlnet_embs_swnhypers"),
                          gold_dataset_path='workdir/data/swn_gold_dataset.pkl',
                          test_indexes_path='workdir/data/swn_gold_dataset_test_split.json',
                          frame_description_file='workdir/framenet_data/frame_info.json'
                         )

new_df


In [None]:
# new_df.to_csv(f'{data_dir}/nouns-xlnet_embs_k10.csv', index=False)
# gold_df.to_pickle(f'{res_dir}/gold_dataset_nouns.pkl')


## Roles

In [None]:
gold_df, new_df = create_roles_dataset(results_path='/raid/anwar/generative-ie/workdir/results/test-paper_roles_st_pattern',
                          predictor=("xlnet_embs","gie_swr_test_semiPURExlnet_embs_swrhypers"),
                          gold_dataset_path='workdir/data/swr_gold_dataset.pkl',
                          test_indexes_path='workdir/data/swr_gold_dataset_test_split.json',
                          frame_description_file='workdir/framenet_data/frame_info.json'
                         )

new_df


In [None]:
# new_df.to_csv(f'{data_dir}/roles-xlnet_embs_k10.csv', index=False)
# gold_df.to_pickle(f'{res_dir}/gold_dataset_roles.pkl')


# 2. Evaluate manually
Manual evaluation can be found here:
https://docs.google.com/spreadsheets/d/1me9YNaQpXJZ0p6pupd-IdmXTJ8AxbeavTIndfeROMpA/edit?usp=sharing

# 3. Results

## 3.1 automatic evaluation with gold

In [None]:
import os
import csv
import pickle
import pandas as pd


# prepare data for final precision calculation 
column_map ={'does not fit context':'NC', 
          'fit context NOT frame': 'C', 
          'fit context AND frame': 'CF', 
          'match gold': 'G'}

def dump_tps_gps_aps(results_path,
                     gold_path,
                     exp_name,
                     save_results_path=None):
    
    exp_path = f'{results_path}/{exp_name}'
    manual_results_path = f'{exp_path}/results.tsv'
    if not save_results_path:
        save_results_path = exp_path
        
    gold_df = pd.read_pickle(gold_path)
    
    temp_df = pd.read_csv(f'{manual_results_path}', sep='\t', quoting=csv.QUOTE_NONE)
    temp_df.fillna(0, inplace=True)
    #
    temp_df['match gold'] = temp_df['match gold'].apply(lambda x: 0 if x==-1 else x)
    # 
    L = len(temp_df)//10

    gold_df = gold_df[:L]

    print(len(gold_df), len(temp_df))

    k =10


    for column, subdir in column_map.items():

        print(column, subdir)
        save_dir_path = f'{save_results_path}/{subdir}'
        if not os.path.exists(save_dir_path):
            os.mkdir(save_dir_path)    

        tps, aps, gps =[],[],[]
        for i in range(0, len(gold_df)):
            res = temp_df[column][i*k:i*k+k]
            _tps, _gps = [True if r else False for r in res], max(0, len(res))
            _aps = []
            for j in range(k):
                if j < len(res):
                    _aps.append(j+1)
                else:
                    _aps.append(len(res))
            aps.append(_aps)
            gps.append(_gps)
            tps.append(_tps)

        tps, gps, aps = tuple(tps), tuple(gps), tuple(aps)    

        print('Saving resutls...')
        with open(os.path.join(save_dir_path, 'tps.pkl'), 'wb') as f:
            pickle.dump(tps, f)

        with open(os.path.join(save_dir_path, 'aps.pkl'), 'wb') as f:
            pickle.dump(aps, f)

        with open(os.path.join(save_dir_path, 'gps.pkl'), 'wb') as f:
            pickle.dump(gps, f)    
            
   


In [None]:
# !python -m src.run_evaluate --results_path=$save_results_path

In [None]:
from src.run_evaluate import load_tps_aps_gps, precision_at_level_hard

column_map ={'does not fit context':'NC', 
          'fit context NOT frame': 'C', 
          'fit context AND frame': 'CF', 
          'match gold': 'G'}

def evaluate_manual(results_path,
                    exp_name,
                    save_results_path=None):
    
    exp_path = f'{results_path}/{exp_name}'
    if not save_results_path:
        save_results_path = exp_path
        
    res_df = pd.DataFrame(columns = ['p@1', 'p@3', 'p@5', 'p@10'], index=column_map.keys())

    for column, subdir in column_map.items():
        print(column, subdir)
        save_dir_path = f'{save_results_path}/{subdir}'

        annots = load_tps_aps_gps(save_dir_path)

        levels = [1,3, 5,10]
        k=10
    #     exp_name = 'xlm_embs_manual'
        annots_df = pd.DataFrame({'tps' : annots[0], 'aps' : annots[1], 'gps' : annots[2]})
        annots_df = annots_df[annots_df.gps != 0]
        # print(len(annots_df))
        annots = annots_df.tps.tolist(), annots_df.aps.tolist(), annots_df.gps.tolist()

#         curve = create_precision_recall_curve(annots, exp_name=None, 
#                                               output_file_path=None)
        tps, aps, gps = annots

        precs = precision_at_level_hard(tps, levels=levels)
        # mean_av_prec = calc_MAP_at_k(tps, gps, k)


        metrics = {}
        metrics['precisions_at_level'] = {str(lev) : prec for lev, prec in zip(levels, precs)}
        # metrics['map'] = mean_av_prec
        res = [prec for lev, prec in zip(levels, precs)]
        # res.append(mean_av_prec)
        print(res)
        res_df.loc[column] = res
    #     metrics

    return res_df
                    

In [None]:
data_dir = 'workdir/manual_evaluation/data'
res_dir = 'workdir/manual_evaluation/results'

dataset, gold_ds  = 'paper_verbs_st', 'gold_dataset_verbs.pkl'
# dataset, gold_ds  = 'paper_nouns_st', 'gold_dataset_nouns.pkl'
# dataset, gold_ds  = 'paper_roles_st', 'gold_dataset_roles.pkl'

exp_name = 'xlnet_embs'
results_path = f'{res_dir}/{dataset}'
gold_path = f'{data_dir}/{gold_ds}'
save_results_path = f'{res_dir}/{dataset}'

dump_tps_gps_aps(results_path, gold_path, exp_name, save_results_path)

In [None]:
evaluate_manual(results_path, exp_name, save_results_path)                   