In [None]:
import pandas as pd
import json
import textstat as txt
from itertools import groupby

import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.ticker as ticker

from itertools import permutations

import pickle
# from sklearn.metrics import jaccard_score

## Loading TACRED dataset

In [None]:
train_data = json.load(open('./../dataset/tacred/json/train.json'))
print("Number of Training instances :: {}".format(len(train_data)))


dev_data = json.load(open('./../dataset/tacred/json/dev.json'))
print("Number of Dev instances :: {}".format(len(dev_data)))

test_data = json.load(open('./../dataset/tacred/json/test.json'))
print("Number of Test instances :: {}".format(len(test_data)))

In [None]:
tacred_test = pd.DataFrame({'sentence_id':[eg['id'] for eg in test_data]})
tacred_test

In [None]:
tacred_dev = pd.DataFrame({'sentence_id':[eg['id'] for eg in dev_data]})
tacred_dev

## Loading Re-TACRED

In [None]:
re_test = json.load(open('./../dataset/tacred/json/Re-TACRED/test_id2label.json'))
print("Total Number of instances in ReTACRED-Reduced test set  :: {}".format(len(re_test)))
count = 0
retacred_test = dict()
for example in test_data:
    sid = example['id']
    rel = example['relation'] 
    if sid in re_test and rel != re_test[sid]:
        count += 1
#         print(count, sid, rel)
        retacred_test[sid] = rel
    
sentence_id = list(retacred_test.keys())

retacred_test = pd.DataFrame({'sentence_id':sentence_id})
print("Number of incorrectly labeled test instances in TACRED test  :: {}".format(len(retacred_test)))

In [None]:
re_dev = json.load(open('./../dataset/tacred/json/Re-TACRED/dev_id2label.json'))
print("Total Number of instances in ReTACRED-Reduced dev set  :: {}".format(len(re_dev)))
count = 0
retacred_dev = dict()
for example in dev_data:
    sid = example['id']
    rel = example['relation'] 
    if sid in re_dev and rel != re_dev[sid]:
        count += 1
#         print(count, sid, rel)
        retacred_dev[sid] = rel
    
sentence_id = list(retacred_dev.keys())

retacred_dev = pd.DataFrame({'sentence_id':sentence_id})
print("Number of incorrectly labeled test instances in TACRED dev  :: {}".format(len(retacred_dev)))

In [None]:
re_train = json.load(open('./../dataset/tacred/json/Re-TACRED/train_id2label.json'))
print("Total Number of instances in ReTACRED-Reduced train set  :: {}".format(len(re_train)))
count = 0
retacred_train = dict()
for example in train_data:
    sid = example['id']
    rel = example['relation'] 
    if sid in re_train and rel != re_train[sid]:
        count += 1
#         print(count, sid, rel)
        retacred_train[sid] = rel
    
sentence_id = list(retacred_train.keys())

retacred_train = pd.DataFrame({'sentence_id':sentence_id})
print("Number of incorrectly labeled test instances in TACRED train  :: {}".format(len(retacred_train)))

## ReTACRED Baseline: Random Picking

In [None]:
re_freq_re = []
for i in range(len(tacred_test)):
    n_dp = len(retacred_test.merge(tacred_test[:i]))
    print(i, n_dp)
    re_freq_re.append(n_dp)
    
print(len(re_freq_re))

In [None]:
re_dev_freq_re = []
for i in range(len(tacred_dev)):
    n_dp = len(retacred_dev.merge(tacred_dev[:i]))
    print(i, n_dp)
    re_dev_freq_re.append(n_dp)
    
print(len(re_dev_freq_re))

## Jaccard Similarity Score

In [None]:
def jaccard_similarity(list1, list2):
    s1 = set(list1)
    s2 = set(list2)
    return float(len(s1.intersection(s2))) / float(len(s1.union(s2)))

## Loading Dataframes

### Test Results

In [None]:
parnn_test_inc = pd.read_csv('./tacred/logs/inc_parnn_test.tsv', sep='\t')
parnn_test_c = pd.read_csv('./tacred/logs/c_parnn_test.tsv', sep='\t')
parnn_test = pd.concat([parnn_test_c, parnn_test_inc]).reset_index(drop=True); print(len(parnn_test))

lstm_test_inc = pd.read_csv('./tacred/logs/inc_lstm_test.tsv', sep='\t')
lstm_test_c = pd.read_csv('./tacred/logs/c_lstm_test.tsv', sep='\t')
lstm_test = pd.concat([lstm_test_c, lstm_test_inc]).reset_index(drop=True); print(len(lstm_test))

bilstm_test_inc = pd.read_csv('./bilstm/logs/inc_bilstm_test.tsv', sep='\t')
bilstm_test_c = pd.read_csv('./bilstm/logs/c_bilstm_test.tsv', sep='\t')
bilstm_test = pd.concat([bilstm_test_c, bilstm_test_inc]).reset_index(drop=True); print(len(bilstm_test))

cgcn_test_inc = pd.read_csv('./cgcn/logs/inc_cgcn_test.tsv', sep='\t')
cgcn_test_c = pd.read_csv('./cgcn/logs/c_cgcn_test.tsv', sep='\t')
cgcn_test = pd.concat([cgcn_test_c, cgcn_test_inc]).reset_index(drop=True); print(len(cgcn_test))

gcn_test_inc = pd.read_csv('./cgcn/logs/inc_gcn_test.tsv', sep='\t')
gcn_test_c = pd.read_csv('./cgcn/logs/c_gcn_test.tsv', sep='\t')
gcn_test = pd.concat([gcn_test_c, gcn_test_inc]).reset_index(drop=True); print(len(gcn_test))

cnn_test_inc = pd.read_csv('./cnn/logs/inc_cnn_test.tsv', sep='\t')
cnn_test_c = pd.read_csv('./cnn/logs/c_cnn_test.tsv', sep='\t')
cnn_test = pd.concat([cnn_test_c, cnn_test_inc]).reset_index(drop=True); print(len(cnn_test))

sattn_test_inc = pd.read_csv('./self-attention/logs/inc_self-attn_test.tsv', sep='\t')
sattn_test_c = pd.read_csv('./self-attention/logs/c_self-attn_test.tsv', sep='\t')
sattn_test = pd.concat([sattn_test_c, sattn_test_inc]).reset_index(drop=True); print(len(sattn_test))

### Dev Results

In [None]:
parnn_dev_inc = pd.read_csv('./tacred/logs/inc_parnn_dev.tsv', sep='\t')
parnn_dev_c = pd.read_csv('./tacred/logs/c_parnn_dev.tsv', sep='\t')
parnn_dev = pd.concat([parnn_dev_c, parnn_dev_inc]).reset_index(drop=True); print(len(parnn_dev))

lstm_dev_inc = pd.read_csv('./tacred/logs/inc_lstm_dev.tsv', sep='\t')
lstm_dev_c = pd.read_csv('./tacred/logs/c_lstm_dev.tsv', sep='\t')
lstm_dev = pd.concat([lstm_dev_c, lstm_dev_inc]).reset_index(drop=True); print(len(lstm_dev))

bilstm_dev_inc = pd.read_csv('./bilstm/logs/inc_bilstm_dev.tsv', sep='\t')
bilstm_dev_c = pd.read_csv('./bilstm/logs/c_bilstm_dev.tsv', sep='\t')
bilstm_dev = pd.concat([bilstm_dev_c, bilstm_dev_inc]).reset_index(drop=True); print(len(bilstm_dev))

cgcn_dev_inc = pd.read_csv('./cgcn/logs/inc_cgcn_dev.tsv', sep='\t')
cgcn_dev_c = pd.read_csv('./cgcn/logs/c_cgcn_dev.tsv', sep='\t')
cgcn_dev = pd.concat([cgcn_dev_c, cgcn_dev_inc]).reset_index(drop=True); print(len(cgcn_dev))

gcn_dev_inc = pd.read_csv('./cgcn/logs/inc_gcn_dev.tsv', sep='\t')
gcn_dev_c = pd.read_csv('./cgcn/logs/c_gcn_dev.tsv', sep='\t')
gcn_dev = pd.concat([gcn_dev_c, gcn_dev_inc]).reset_index(drop=True); print(len(gcn_dev))

cnn_dev_inc = pd.read_csv('./cnn/logs/inc_cnn_dev.tsv', sep='\t')
cnn_dev_c = pd.read_csv('./cnn/logs/c_cnn_dev.tsv', sep='\t')
cnn_dev = pd.concat([cnn_dev_c, cnn_dev_inc]).reset_index(drop=True); print(len(cnn_dev))

sattn_dev_inc = pd.read_csv('./self-attention/logs/inc_self-attn_dev.tsv', sep='\t')
sattn_dev_c = pd.read_csv('./self-attention/logs/c_self-attn_dev.tsv', sep='\t')
sattn_dev = pd.concat([sattn_dev_c, sattn_dev_inc]).reset_index(drop=True); print(len(sattn_dev))

## Train Results

In [None]:
parnn_train_inc = pd.read_csv('./retacred_results/inc_re-parnn_train.tsv', sep='\t')
parnn_train_c = pd.read_csv('./retacred_results/c_re-parnn_train.tsv', sep='\t')
parnn_train = pd.concat([parnn_train_c, parnn_train_inc]).reset_index(drop=True); print(len(parnn_train))

lstm_train_inc = pd.read_csv('./retacred_results/inc_re-lstm_train.tsv', sep='\t')
lstm_train_c = pd.read_csv('./retacred_results/c_re-lstm_train.tsv', sep='\t')
lstm_train = pd.concat([lstm_train_c, lstm_train_inc]).reset_index(drop=True); print(len(lstm_train))

bilstm_train_inc = pd.read_csv('./retacred_results/inc_re-bilstm_train.tsv', sep='\t')
bilstm_train_c = pd.read_csv('./retacred_results/c_re-bilstm_train.tsv', sep='\t')
bilstm_train = pd.concat([bilstm_train_c, bilstm_train_inc]).reset_index(drop=True); print(len(bilstm_train))

cgcn_train_inc = pd.read_csv('./retacred_results/inc_re-cgcn_train.tsv', sep='\t')
cgcn_train_c = pd.read_csv('./retacred_results/c_re-cgcn_train.tsv', sep='\t')
cgcn_train = pd.concat([cgcn_train_c, cgcn_train_inc]).reset_index(drop=True); print(len(cgcn_train))

gcn_train_inc = pd.read_csv('./retacred_results/inc_re-gcn_train.tsv', sep='\t')
gcn_train_c = pd.read_csv('./retacred_results/c_re-gcn_train.tsv', sep='\t')
gcn_train = pd.concat([gcn_train_c, gcn_train_inc]).reset_index(drop=True); print(len(gcn_train))

## Analysis on Test Data

### Confidence Score

In [None]:
def change_score(row):
    if row['ground_truth'] == row['prediction']:
        row['confidence'] = -1 * row['confidence']
    else:
        row['confidence'] = 1 * row['confidence']
    return row

In [None]:
parnn_test_df = parnn_test.apply(change_score, 1).loc[:,['sentence_id', 'confidence']]
parnn_test_df = parnn_test_df.rename(columns={'confidence':'confidence_parnn'})
print(len(parnn_test_df))

lstm_test_df = lstm_test.apply(change_score, 1).loc[:,['sentence_id', 'confidence']]
lstm_test_df = lstm_test_df.rename(columns={'confidence':'confidence_lstm'})
print(len(lstm_test_df))

bilstm_test_df = bilstm_test.apply(change_score, 1).loc[:,['sentence_id', 'confidence']]
bilstm_test_df = bilstm_test_df.rename(columns={'confidence':'confidence_bilstm'})
print(len(bilstm_test_df))

cgcn_test_df = cgcn_test.apply(change_score, 1).loc[:,['sentence_id', 'confidence']]
cgcn_test_df = cgcn_test_df.rename(columns={'confidence':'confidence_cgcn'})
print(len(cgcn_test_df))

gcn_test_df = gcn_test.apply(change_score, 1).loc[:,['sentence_id', 'confidence']]
gcn_test_df = gcn_test_df.rename(columns={'confidence':'confidence_gcn'})
print(len(gcn_test_df))

cnn_test_df = cnn_test.apply(change_score, 1).loc[:,['sentence_id', 'confidence']]
cnn_test_df = cnn_test_df.rename(columns={'confidence':'confidence_cnn'})
print(len(cnn_test_df))

sattn_test_df = sattn_test.apply(change_score, 1).loc[:,['sentence_id', 'confidence']]
sattn_test_df = sattn_test_df.rename(columns={'confidence':'confidence_sattn'})
print(len(sattn_test_df))

# dfconf = parnn_test_df.merge(lstm_test_df).merge(bilstm_test_df).merge(cgcn_test_df).merge(gcn_test_df).merge(cnn_test_df).merge(sattn_test_df)
# dfconf['confidence'] = (dfconf['confidence_parnn'] + dfconf['confidence_lstm'] + dfconf['confidence_bilstm']
#                         + dfconf['confidence_cgcn'] + dfconf['confidence_gcn'] + dfconf['confidence_cnn'] + dfconf['confidence_sattn']) / 7

dfconf = parnn_test_df.merge(lstm_test_df).merge(bilstm_test_df).merge(cgcn_test_df).merge(gcn_test_df)
dfconf['confidence'] = (dfconf['confidence_parnn'] + dfconf['confidence_lstm'] + dfconf['confidence_bilstm']
                        + dfconf['confidence_cgcn'] + dfconf['confidence_gcn']) / 5

dfconf

In [None]:
sorted_dfconf = dfconf.sort_values(by=['confidence'], ascending=False)
sorted_dfconf

In [None]:
re_freq_c = []
for i in range(len(sorted_dfconf)):
    n_dp = len(retacred_test.merge(sorted_dfconf[:i]))
    print(i, n_dp)
    re_freq_c.append(n_dp)
    
print(len(re_freq_c))

In [None]:
re_per_c = []
for i in range(1, len(sorted_dfconf)):
    n_dp = len(retacred_test.merge(sorted_dfconf[:i])) / i
    print(i, n_dp)
    re_per_c.append(n_dp)
    
print(len(re_per_c))

In [None]:
re_intersection_c = []
for i in range(5,len(sorted_dfconf)):
    s = list(retacred_test.merge(sorted_dfconf[:i])['sentence_id'])
    c = list(retacred_test.merge(sorted_dfconf[:i])['sentence_id'])
    js = jaccard_similarity(s, c)*100
    print(i, js)
    re_intersection_c.append(js)
    
print(len(re_intersection_c))

In [None]:
retacred_test.merge(sorted_dfconf)

## Distance between ground truth and prediction

In [None]:
parnn_test_df = parnn_test.loc[:,['sentence_id', 'dp']]
parnn_test_df = parnn_test_df.rename(columns={'dp':'dp_parnn'})
print(len(parnn_test_df))

lstm_test_df = lstm_test.loc[:,['sentence_id', 'dp']]
lstm_test_df = lstm_test_df.rename(columns={'dp':'dp_lstm'})
print(len(lstm_test_df))

bilstm_test_df = bilstm_test.loc[:,['sentence_id', 'dp']]
bilstm_test_df = bilstm_test_df.rename(columns={'dp':'dp_bilstm'})
print(len(bilstm_test_df))

cgcn_test_df = cgcn_test.loc[:,['sentence_id', 'dp']]
cgcn_test_df = cgcn_test_df.rename(columns={'dp':'dp_cgcn'})
print(len(cgcn_test_df))

gcn_test_df = gcn_test.loc[:,['sentence_id', 'dp']]
gcn_test_df = gcn_test_df.rename(columns={'dp':'dp_gcn'})
print(len(gcn_test_df))

cnn_test_df = cnn_test.loc[:,['sentence_id', 'dp']]
cnn_test_df = cnn_test_df.rename(columns={'dp':'dp_cnn'})
print(len(cnn_test_df))

sattn_test_df = sattn_test.loc[:,['sentence_id', 'dp']]
sattn_test_df = sattn_test_df.rename(columns={'dp':'dp_sattn'})
print(len(sattn_test_df))

# dfpd = parnn_test_df.merge(lstm_test_df).merge(bilstm_test_df).merge(cgcn_test_df).merge(gcn_test_df).merge(cnn_test_df).merge(sattn_test_df)
# dfpd['dp'] = (dfpd['dp_parnn'] + dfpd['dp_lstm'] + dfpd['dp_bilstm'] 
#                         + dfpd['dp_cgcn'] + dfpd['dp_gcn'] + dfpd['dp_cnn'] + dfpd['dp_sattn']) / 7

dfpd = parnn_test_df.merge(lstm_test_df).merge(bilstm_test_df).merge(cgcn_test_df).merge(gcn_test_df)
dfpd['dp'] = (dfpd['dp_parnn'] + dfpd['dp_lstm'] + dfpd['dp_bilstm'] 
                        + dfpd['dp_cgcn'] + dfpd['dp_gcn']) / 5

dfpd

In [None]:
sorted_dfpd = dfpd.sort_values(by=['dp'], ascending=False)
sorted_dfpd

In [None]:
re_freq_p = []
for i in range(len(sorted_dfpd)):
    n_dp = len(retacred_test.merge(sorted_dfpd[:i]))
    print(i, n_dp)
    re_freq_p.append(n_dp)
    
print(len(re_freq_p))

In [None]:
re_per_p = []
for i in range(1, len(sorted_dfpd)):
    n_dp = len(retacred_test.merge(sorted_dfpd[:i])) / i
    print(i, n_dp)
    re_per_p.append(n_dp)
    
print(len(re_per_p))

In [None]:
re_intersection_p = []
for i in range(5,len(sorted_dfpd)):
    s = list(retacred_test.merge(sorted_dfpd[:i])['sentence_id'])
    c = list(retacred_test.merge(sorted_dfconf[:i])['sentence_id'])
    js = jaccard_similarity(s, c)*100
    print(i, js)
    re_intersection_p.append(js)
    
print(len(re_intersection_p))

In [None]:
retacred_test.merge(sorted_dfpd)

## Distance between ground truth and LCA

In [None]:
parnn_test_df = parnn_test.loc[:,['sentence_id', 'dl']]
parnn_test_df = parnn_test_df.rename(columns={'dl':'dl_parnn'})
print(len(parnn_test_df))

lstm_test_df = lstm_test.loc[:,['sentence_id', 'dl']]
lstm_test_df = lstm_test_df.rename(columns={'dl':'dl_lstm'})
print(len(lstm_test_df))

bilstm_test_df = bilstm_test.loc[:,['sentence_id', 'dl']]
bilstm_test_df = bilstm_test_df.rename(columns={'dl':'dl_bilstm'})
print(len(bilstm_test_df))

cgcn_test_df = cgcn_test.loc[:,['sentence_id', 'dl']]
cgcn_test_df = cgcn_test_df.rename(columns={'dl':'dl_cgcn'})
print(len(cgcn_test_df))

gcn_test_df = gcn_test.loc[:,['sentence_id', 'dl']]
gcn_test_df = gcn_test_df.rename(columns={'dl':'dl_gcn'})
print(len(gcn_test_df))

cnn_test_df = cnn_test.loc[:,['sentence_id', 'dl']]
cnn_test_df = cnn_test_df.rename(columns={'dl':'dl_cnn'})
print(len(cnn_test_df))

sattn_test_df = sattn_test.loc[:,['sentence_id', 'dl']]
sattn_test_df = sattn_test_df.rename(columns={'dl':'dl_sattn'})
print(len(sattn_test_df))

# dflca = parnn_test_df.merge(lstm_test_df).merge(bilstm_test_df).merge(cgcn_test_df).merge(gcn_test_df).merge(cnn_test_df).merge(sattn_test_df)
# dflca['dl'] = (dflca['dl_parnn'] + dflca['dl_lstm'] + dflca['dl_bilstm'] 
#                   + dflca['dl_cgcn'] + dflca['dl_gcn'] + dflca['dl_cnn'] + dflca['dl_sattn']) / 7

dflca = parnn_test_df.merge(lstm_test_df).merge(bilstm_test_df).merge(cgcn_test_df).merge(gcn_test_df)
dflca['dl'] = (dflca['dl_parnn'] + dflca['dl_lstm'] + dflca['dl_bilstm'] 
                  + dflca['dl_cgcn'] + dflca['dl_gcn']) / 5

dflca

In [None]:
sorted_dflca = dflca.sort_values(by=['dl'], ascending=False)
sorted_dflca

In [None]:
re_freq_l = []
for i in range(len(sorted_dflca)):
    n_dp = len(retacred_test.merge(sorted_dflca[:i]))
    print(i, n_dp)
    re_freq_l.append(n_dp)
    
print(len(re_freq_l))

In [None]:
re_per_l = []
for i in range(1, len(sorted_dflca)):
    n_dp = len(retacred_test.merge(sorted_dflca[:i])) / i
    print(i, n_dp)
    re_per_l.append(n_dp)
    
print(len(re_per_l))

In [None]:
re_intersection_l = []
for i in range(5,len(sorted_dflca)):
    s = list(retacred_test.merge(sorted_dflca[:i])['sentence_id'])
    c = list(retacred_test.merge(sorted_dfconf[:i])['sentence_id'])
    js = jaccard_similarity(s, c)*100
    print(i, js)
    re_intersection_l.append(js)
    
print(len(re_intersection_l))

In [None]:
retacred_test.merge(sorted_dflca)

## Visualization on Test Data

In [None]:
# freq = pd.DataFrame({'TACRev':re_freq_c, 'ReTACRED':re_freq_re, 'GD':re_freq_p, 'LD':re_freq_l})
plt.figure(figsize=(10,5))
# ax1 = plt.subplot(2,2,1)
# ax1 = sns.lineplot(data=freq, dashes=False)
# plt.ylabel('Number of sentences common with LC', fontsize='large', fontweight='bold')
# plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
# plt.title('Test Dataset', fontsize='large', fontweight='bold')
# plt.yticks(np.arange(0, 700, 70))


# freq = pd.DataFrame({'TACRev':re_freq_c, 'GD':re_freq_p, 'LD':re_freq_l, 'ReTACRED':re_freq_re})
freq = pd.DataFrame({'TACRev':re_freq_c, 'GD':re_freq_p, 'LD':re_freq_l})
freq = freq / 1795 * 100
ax2 = plt.subplot(1,2,1)
ax2 = sns.lineplot(data=freq, dashes=False)
plt.ylabel('Percentage of sentences common with LC', fontsize='large', fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('Test Dataset', fontsize='large', fontweight='bold')
ax2.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=100))

# freq = pd.DataFrame({'Confidence':re_per_c, 'GD':re_per_p, 'LD':re_per_l, 'RL':re_per_rl})
# freq = freq*100
# ax3 = plt.subplot(2,2,3)
# ax3 = sns.lineplot(data=freq, dashes=False)
# plt.ylabel('Ratio of sentences common with LC and Reannotation Budget', fontsize='large', fontweight='bold')
# plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
# plt.title('Test Dataset', fontsize='large', fontweight='bold')
# ax3.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=100))

intersection = pd.DataFrame({'TACRev vs TACRev':re_intersection_c, 'GD vs TACRev':re_intersection_p,
                             'LD vs TACRev':re_intersection_l})
ax4 = plt.subplot(1,2,2)
ax4 = sns.lineplot(data=intersection, dashes=False)
plt.ylabel('Jaccard Similarity', fontsize='large', fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('Test Dataset', fontsize='large', fontweight='bold')
ax4.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=100))

# plt.savefig('test-analysis')

## Analysis on Dev data

### Confidence Score

In [None]:
parnn_dev_df = parnn_dev.apply(change_score, 1).loc[:,['sentence_id', 'confidence']]
parnn_dev_df = parnn_dev_df.rename(columns={'confidence':'confidence_parnn'})
print(len(parnn_dev_df))

lstm_dev_df = lstm_dev.apply(change_score, 1).loc[:,['sentence_id', 'confidence']]
lstm_dev_df = lstm_dev_df.rename(columns={'confidence':'confidence_lstm'})
print(len(lstm_dev_df))

bilstm_dev_df = bilstm_dev.apply(change_score, 1).loc[:,['sentence_id', 'confidence']]
bilstm_dev_df = bilstm_dev_df.rename(columns={'confidence':'confidence_bilstm'})
print(len(bilstm_dev_df))

cgcn_dev_df = cgcn_dev.apply(change_score, 1).loc[:,['sentence_id', 'confidence']]
cgcn_dev_df = cgcn_dev_df.rename(columns={'confidence':'confidence_cgcn'})
print(len(cgcn_dev_df))

gcn_dev_df = gcn_dev.apply(change_score, 1).loc[:,['sentence_id', 'confidence']]
gcn_dev_df = gcn_dev_df.rename(columns={'confidence':'confidence_gcn'})
print(len(gcn_dev_df))

dfconf_dev = parnn_dev_df.merge(lstm_dev_df).merge(bilstm_dev_df).merge(cgcn_dev_df).merge(gcn_dev_df)
dfconf_dev['confidence'] = (dfconf_dev['confidence_parnn'] + dfconf_dev['confidence_lstm'] + dfconf_dev['confidence_bilstm'] 
                            + dfconf_dev['confidence_cgcn'] + dfconf_dev['confidence_gcn']) / 5

dfconf_dev

In [None]:
sorted_dfconf_dev = dfconf_dev.sort_values(by=['confidence'], ascending=False)
sorted_dfconf_dev

In [None]:
re_dev_freq_c = []
for i in range(len(sorted_dfconf_dev)):
    n_dp = len(retacred_dev.merge(sorted_dfconf_dev[:i]))
    print(i, n_dp)
    re_dev_freq_c.append(n_dp)
    
print(len(re_dev_freq_c))

In [None]:
re_dev_per_c = []
for i in range(1, len(sorted_dfconf_dev)):
    n_dp = len(retacred_dev.merge(sorted_dfconf_dev[:i])) / i
    print(i, n_dp)
    re_dev_per_c.append(n_dp)
    
print(len(re_dev_per_c))

In [None]:
re_dev_intersection_c = []
for i in range(5,len(sorted_dfconf_dev)):
    s = list(retacred_dev.merge(sorted_dfconf_dev[:i])['sentence_id'])
    c = list(retacred_dev.merge(sorted_dfconf_dev[:i])['sentence_id'])
    js = jaccard_similarity(s, c)*100
    print(i, js)
    re_dev_intersection_c.append(js)
    
print(len(re_dev_intersection_c))

In [None]:
retacred_dev.merge(sorted_dfconf_dev)

### Distance between ground truth and prediction

In [None]:
parnn_dev_df = parnn_dev.loc[:,['sentence_id', 'dp']]
parnn_dev_df = parnn_dev_df.rename(columns={'dp':'dp_parnn'})
print(len(parnn_dev_df))

lstm_dev_df = lstm_dev.loc[:,['sentence_id', 'dp']]
lstm_dev_df = lstm_dev_df.rename(columns={'dp':'dp_lstm'})
print(len(lstm_dev_df))

bilstm_dev_df = bilstm_dev.loc[:,['sentence_id', 'dp']]
bilstm_dev_df = bilstm_dev_df.rename(columns={'dp':'dp_bilstm'})
print(len(bilstm_dev_df))

cgcn_dev_df = cgcn_dev.loc[:,['sentence_id', 'dp']]
cgcn_dev_df = cgcn_dev_df.rename(columns={'dp':'dp_cgcn'})
print(len(cgcn_dev_df))

gcn_dev_df = gcn_dev.loc[:,['sentence_id', 'dp']]
gcn_dev_df = gcn_dev_df.rename(columns={'dp':'dp_gcn'})
print(len(gcn_dev_df))

dfpd_dev = parnn_dev_df.merge(lstm_dev_df).merge(bilstm_dev_df).merge(cgcn_dev_df).merge(gcn_dev_df)
dfpd_dev['dp'] = (dfpd_dev['dp_parnn'] + dfpd_dev['dp_lstm'] + dfpd_dev['dp_bilstm']
                            + dfpd_dev['dp_cgcn'] + dfpd_dev['dp_gcn']) / 5

dfpd_dev

In [None]:
sorted_dfpd_dev = dfpd_dev.sort_values(by=['dp'], ascending=False)
sorted_dfpd_dev

In [None]:
re_dev_freq_p = []
for i in range(len(sorted_dfpd_dev)):
    n_dp = len(retacred_dev.merge(sorted_dfpd_dev[:i]))
    print(i, n_dp)
    re_dev_freq_p.append(n_dp)
    
print(len(re_dev_freq_p))

In [None]:
re_dev_per_p = []
for i in range(1, len(sorted_dfpd_dev)):
    n_dp = len(retacred_dev.merge(sorted_dfpd_dev[:i])) / i
    print(i, n_dp)
    re_dev_per_p.append(n_dp)
    
print(len(re_dev_per_p))

In [None]:
re_dev_intersection_p = []
for i in range(5, len(sorted_dfpd_dev)):
    s = list(retacred_dev.merge(sorted_dfpd_dev[:i])['sentence_id'])
    c = list(retacred_dev.merge(sorted_dfconf_dev[:i])['sentence_id'])
    js = jaccard_similarity(s, c)*100
    print(i, js)
    re_dev_intersection_p.append(js)
    
print(len(re_dev_intersection_p))

In [None]:
retacred_dev.merge(sorted_dfpd_dev)

### Distance between ground truth and lca

In [None]:
parnn_dev_df = parnn_dev.loc[:,['sentence_id', 'dl']]
parnn_dev_df = parnn_dev_df.rename(columns={'dl':'dl_parnn'})
print(len(parnn_dev_df))

lstm_dev_df = lstm_dev.loc[:,['sentence_id', 'dl']]
lstm_dev_df = lstm_dev_df.rename(columns={'dl':'dl_lstm'})
print(len(lstm_dev_df))

bilstm_dev_df = bilstm_dev.loc[:,['sentence_id', 'dl']]
bilstm_dev_df = bilstm_dev_df.rename(columns={'dl':'dl_bilstm'})
print(len(bilstm_dev_df))

cgcn_dev_df = cgcn_dev.loc[:,['sentence_id', 'dl']]
cgcn_dev_df = cgcn_dev_df.rename(columns={'dl':'dl_cgcn'})
print(len(cgcn_dev_df))

gcn_dev_df = gcn_dev.loc[:,['sentence_id', 'dl']]
gcn_dev_df = gcn_dev_df.rename(columns={'dl':'dl_gcn'})
print(len(gcn_dev_df))

dflca_dev = parnn_dev_df.merge(lstm_dev_df).merge(bilstm_dev_df).merge(cgcn_dev_df).merge(gcn_dev_df)
dflca_dev['dl'] = (dflca_dev['dl_parnn'] + dflca_dev['dl_lstm'] + dflca_dev['dl_bilstm'] 
                      + dflca_dev['dl_cgcn'] + dflca_dev['dl_gcn']) / 5

dflca_dev

In [None]:
sorted_dflca_dev = dflca_dev.sort_values(by=['dl'], ascending=False)
sorted_dflca_dev

In [None]:
re_dev_freq_l = []
for i in range(len(sorted_dflca_dev)):
    n_dp = len(retacred_dev.merge(sorted_dflca_dev[:i]))
    print(i, n_dp)
    re_dev_freq_l.append(n_dp)
    
print(len(re_dev_freq_l))

In [None]:
re_dev_per_l = []
for i in range(1, len(sorted_dflca_dev)):
    n_dp = len(retacred_dev.merge(sorted_dflca_dev[:i])) / i
    print(i, n_dp)
    re_dev_per_l.append(n_dp)
    
print(len(re_dev_per_l))

In [None]:
re_dev_intersection_l = []
for i in range(5,len(sorted_dflca_dev)):
    s = list(retacred_dev.merge(sorted_dflca_dev[:i])['sentence_id'])
    c = list(retacred_dev.merge(sorted_dfconf_dev[:i])['sentence_id'])
    js = jaccard_similarity(s, c)*100
    print(i, js)
    re_dev_intersection_l.append(js)
    
print(len(re_dev_intersection_l))

In [None]:
retacred_dev.merge(sorted_dflca_dev)

## Ratio of distance between ground-truth & LCA and ground-truth and root

What percentage of correct path from the root is correctly predicted when started from ground-truth.

In [None]:
parnn_dev_df = parnn_dev
parnn_dev_df['lratio'] = parnn_dev_df.dl / parnn_dev_df.dr
parnn_dev_df = parnn_dev_df.loc[:,['sentence_id', 'lratio']]
parnn_dev_df = parnn_dev_df.rename(columns={'lratio':'lratio_parnn'})
print(len(parnn_dev_df))

lstm_dev_df = lstm_dev
lstm_dev_df['lratio'] = lstm_dev_df.dl / lstm_dev_df.dr
lstm_dev_df = lstm_dev_df.loc[:,['sentence_id', 'lratio']]
lstm_dev_df = lstm_dev_df.rename(columns={'lratio':'lratio_lstm'})
print(len(lstm_dev_df))

bilstm_dev_df = bilstm_dev
bilstm_dev_df['lratio'] = bilstm_dev_df.dl / bilstm_dev_df.dr
bilstm_dev_df = bilstm_dev_df.loc[:,['sentence_id', 'lratio']]
bilstm_dev_df = bilstm_dev_df.rename(columns={'lratio':'lratio_bilstm'})
print(len(bilstm_dev_df))

cgcn_dev_df = cgcn_dev
cgcn_dev_df['lratio'] = cgcn_dev_df.dl / cgcn_dev_df.dr
cgcn_dev_df = cgcn_dev_df.loc[:,['sentence_id', 'lratio']]
cgcn_dev_df = cgcn_dev_df.rename(columns={'lratio':'lratio_cgcn'})
print(len(cgcn_dev_df))

gcn_dev_df = gcn_dev
gcn_dev_df['lratio'] = gcn_dev_df.dl / gcn_dev_df.dr
gcn_dev_df = gcn_dev_df.loc[:,['sentence_id', 'lratio']]
gcn_dev_df = gcn_dev_df.rename(columns={'lratio':'lratio_gcn'})
print(len(gcn_dev_df))

dfrl_dev = parnn_dev_df.merge(lstm_dev_df).merge(bilstm_dev_df).merge(cgcn_dev_df).merge(gcn_dev_df)
dfrl_dev['lratio'] = (dfrl_dev['lratio_parnn'] + dfrl_dev['lratio_lstm'] + dfrl_dev['lratio_bilstm'] 
                  + dfrl_dev['lratio_cgcn'] + dfrl_dev['lratio_gcn']) / 5

dfrl_dev

In [None]:
sorted_dfrl_dev = dfrl_dev.sort_values(by=['lratio'], ascending=False)
sorted_dfrl_dev

In [None]:
re_dev_freq_rl = []
for i in range(len(sorted_dfrl_dev)):
    n_dp = len(retacred_dev.merge(sorted_dfrl_dev[:i]))
    print(i, n_dp)
    re_dev_freq_rl.append(n_dp)
    
print(len(re_dev_freq_rl))

In [None]:
re_dev_per_rl = []
for i in range(1, len(sorted_dfrl_dev)):
    n_dp = len(retacred_dev.merge(sorted_dfrl_dev[:i])) / i
    print(i, n_dp)
    re_dev_per_rl.append(n_dp)
    
print(len(re_dev_per_rl))

In [None]:
re_dev_intersection_rl = []
for i in range(5,len(sorted_dfrl_dev)):
    s = list(retacred_dev.merge(sorted_dfrl_dev[:i])['sentence_id'])
    c = list(retacred_dev.merge(sorted_dfconf_dev[:i])['sentence_id'])
    js = jaccard_similarity(s, c)*100
    print(i, js)
    re_dev_intersection_rl.append(js)
    
print(len(re_dev_intersection_rl))

In [None]:
retacred_dev.merge(sorted_dfrl_dev)

## Visualizations on Dev data

In [None]:
# freq = pd.DataFrame({'Confidence':re_dev_freq_c, 'GD':re_dev_freq_p, 'LD':re_dev_freq_l, 'RL':re_dev_freq_rl})
plt.figure(figsize=(10,6))
# ax1 = plt.subplot(2,2,1)
# ax1 = sns.lineplot(data=freq, dashes=False)
# plt.ylabel('Number of sentences common with LC', fontsize='large', fontweight='bold')
# plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
# plt.title('Dev Dataset', fontsize='large', fontweight='bold')
# plt.yticks(np.arange(0, 700, 70))


freq = pd.DataFrame({'TACRev':re_dev_freq_c, 'GD':re_dev_freq_p, 'LD':re_dev_freq_l, 'ReTACRED':re_dev_freq_re})
freq = freq / 5326 * 100
ax2 = plt.subplot(1,2,1)
ax2 = sns.lineplot(data=freq, dashes=False)
plt.ylabel('Percentage of sentences common with LC', fontsize='large', fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('Dev Dataset', fontsize='large', fontweight='bold')
ax2.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=100))

# freq = pd.DataFrame({'Confidence':re_dev_per_c, 'GD':re_dev_per_p, 'LD':re_dev_per_l, 'RL':re_dev_per_rl})
# freq = freq*100
# ax3 = plt.subplot(2,2,3)
# ax3 = sns.lineplot(data=freq, dashes=False)
# plt.ylabel('Ratio of sentences common with LC and Reannotation Budget', fontsize='large', fontweight='bold')
# plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
# plt.title('Dev Dataset', fontsize='large', fontweight='bold')
# ax3.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=100))

intersection = pd.DataFrame({'TACRev vs TACRev':re_dev_intersection_c, 'GD vs TACRev':re_dev_intersection_p,
                             'LD vs TACRev':re_dev_intersection_l})
ax4 = plt.subplot(1,2,2)
ax4 = sns.lineplot(data=intersection, dashes=False)
plt.ylabel('Jaccard Similarity', fontsize='large', fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('Dev Dataset', fontsize='large', fontweight='bold')
ax4.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=100))

plt.savefig('dev-analysis')

## Analysis on Train data

### Confidence Score

In [None]:
parnn_train_df = parnn_train.apply(change_score, 1).loc[:,['sentence_id', 'confidence']]
parnn_train_df = parnn_train_df.rename(columns={'confidence':'confidence_parnn'})
print(len(parnn_train_df))

lstm_train_df = lstm_train.apply(change_score, 1).loc[:,['sentence_id', 'confidence']]
lstm_train_df = lstm_train_df.rename(columns={'confidence':'confidence_lstm'})
print(len(lstm_train_df))

bilstm_train_df = bilstm_train.apply(change_score, 1).loc[:,['sentence_id', 'confidence']]
bilstm_train_df = bilstm_train_df.rename(columns={'confidence':'confidence_bilstm'})
print(len(bilstm_train_df))

cgcn_train_df = cgcn_train.apply(change_score, 1).loc[:,['sentence_id', 'confidence']]
cgcn_train_df = cgcn_train_df.rename(columns={'confidence':'confidence_cgcn'})
print(len(cgcn_train_df))

gcn_train_df = gcn_train.apply(change_score, 1).loc[:,['sentence_id', 'confidence']]
gcn_train_df = gcn_train_df.rename(columns={'confidence':'confidence_gcn'})
print(len(gcn_train_df))

dfconf_tr = parnn_train_df.merge(lstm_train_df).merge(bilstm_train_df).merge(cgcn_train_df).merge(gcn_train_df)
dfconf_tr['confidence'] = (dfconf_tr['confidence_parnn'] + dfconf_tr['confidence_lstm'] + dfconf_tr['confidence_bilstm']
                        + dfconf_tr['confidence_cgcn'] + dfconf_tr['confidence_gcn']) / 5

dfconf_tr

In [None]:
sorted_dfconf_tr = dfconf_tr.sort_values(by=['confidence'], ascending=False)
sorted_dfconf_tr

In [None]:
re_tr_freq_c = []
for i in range(len(sorted_dfconf_tr)):
    n_dp = len(retacred_train.merge(sorted_dfconf_tr[:i]))
    re_tr_freq_c.append(n_dp)
    
print(len(re_tr_freq_c))

re_tr_per_c = []
for i in range(1, len(sorted_dfconf_tr)):
    n_dp = len(retacred_train.merge(sorted_dfconf_tr[:i])) / i
    re_tr_per_c.append(n_dp)
    
print(len(re_tr_per_c))

re_tr_intersection_c = []
for i in range(5,len(sorted_dfconf_tr)):
    s = list(retacred_train.merge(sorted_dfconf_tr[:i])['sentence_id'])
    c = list(retacred_train.merge(sorted_dfconf_tr[:i])['sentence_id'])
    re_tr_intersection_c.append(jaccard_similarity(s, c)*100)
    
print(len(re_tr_intersection_c))

In [None]:
retacred_train.merge(sorted_dfconf_tr)

### Distance between ground truth and prediction

In [None]:
parnn_train_df = parnn_train.loc[:,['sentence_id', 'dp']]
parnn_train_df = parnn_train_df.rename(columns={'dp':'dp_parnn'})
print(len(parnn_train_df))

lstm_train_df = lstm_train.loc[:,['sentence_id', 'dp']]
lstm_train_df = lstm_train_df.rename(columns={'dp':'dp_lstm'})
print(len(lstm_train_df))

bilstm_train_df = bilstm_train.loc[:,['sentence_id', 'dp']]
bilstm_train_df = bilstm_train_df.rename(columns={'dp':'dp_bilstm'})
print(len(bilstm_train_df))

cgcn_train_df = cgcn_train.loc[:,['sentence_id', 'dp']]
cgcn_train_df = cgcn_train_df.rename(columns={'dp':'dp_cgcn'})
print(len(cgcn_train_df))

gcn_train_df = gcn_train.loc[:,['sentence_id', 'dp']]
gcn_train_df = gcn_train_df.rename(columns={'dp':'dp_gcn'})
print(len(gcn_train_df))

dfpd_tr = parnn_train_df.merge(lstm_train_df).merge(bilstm_train_df).merge(cgcn_train_df).merge(gcn_train_df)
dfpd_tr['dp'] = (dfpd_tr['dp_parnn'] + dfpd_tr['dp_lstm'] + dfpd_tr['dp_bilstm']
                        + dfpd_tr['dp_cgcn'] + dfpd_tr['dp_gcn']) / 5

dfpd_tr

In [None]:
sorted_dfpd_tr = dfpd_tr.sort_values(by=['dp'], ascending=False)
sorted_dfpd_tr

In [None]:
re_tr_freq_p = []
for i in range(len(sorted_dfpd_tr)):
    n_dp = len(retacred_train.merge(sorted_dfpd_tr[:i]))
    re_tr_freq_p.append(n_dp)
    
print(len(re_tr_freq_p))

re_tr_per_p = []
for i in range(1, len(sorted_dfpd_tr)):
    n_dp = len(retacred_train.merge(sorted_dfpd_tr[:i])) / i
    re_tr_per_p.append(n_dp)
    
print(len(re_tr_per_p))

re_tr_intersection_p = []
for i in range(5,len(sorted_dfpd_tr)):
    s = list(retacred_train.merge(sorted_dfpd_tr[:i])['sentence_id'])
    c = list(retacred_train.merge(sorted_dfconf_tr[:i])['sentence_id'])
    re_tr_intersection_p.append(jaccard_similarity(s, c)*100)
    
print(len(re_tr_intersection_p))

In [None]:
retacred_train.merge(sorted_dfpd_tr)

### Distance between ground truth and lca

In [None]:
parnn_train_df = parnn_train.loc[:,['sentence_id', 'dl']]
parnn_train_df = parnn_train_df.rename(columns={'dl':'dl_parnn'})
print(len(parnn_train_df))

lstm_train_df = lstm_train.loc[:,['sentence_id', 'dl']]
lstm_train_df = lstm_train_df.rename(columns={'dl':'dl_lstm'})
print(len(lstm_train_df))

bilstm_train_df = bilstm_train.loc[:,['sentence_id', 'dl']]
bilstm_train_df = bilstm_train_df.rename(columns={'dl':'dl_bilstm'})
print(len(bilstm_train_df))

cgcn_train_df = cgcn_train.loc[:,['sentence_id', 'dl']]
cgcn_train_df = cgcn_train_df.rename(columns={'dl':'dl_cgcn'})
print(len(cgcn_train_df))

gcn_train_df = gcn_train.loc[:,['sentence_id', 'dl']]
gcn_train_df = gcn_train_df.rename(columns={'dl':'dl_gcn'})
print(len(gcn_train_df))

dflca_tr = parnn_train_df.merge(lstm_train_df).merge(bilstm_train_df).merge(cgcn_train_df).merge(gcn_train_df)
dflca_tr['dl'] = (dflca_tr['dl_parnn'] + dflca_tr['dl_lstm'] + dflca_tr['dl_bilstm']
                        + dflca_tr['dl_cgcn'] + dflca_tr['dl_gcn']) / 5

dflca_tr

In [None]:
sorted_dflca_tr = dflca_tr.sort_values(by=['dl'], ascending=False)
sorted_dflca_tr

In [None]:
re_tr_freq_l = []
for i in range(len(sorted_dflca_tr)):
    n_dp = len(retacred_train.merge(sorted_dflca_tr[:i]))
    re_tr_freq_l.append(n_dp)
    
print(len(re_tr_freq_l))

re_tr_per_l = []
for i in range(1, len(sorted_dflca_tr)):
    n_dp = len(retacred_train.merge(sorted_dflca_tr[:i])) / i
    re_tr_per_l.append(n_dp)
    
print(len(re_tr_per_l))

re_tr_intersection_l = []
for i in range(5,len(sorted_dflca_tr)):
    s = list(retacred_train.merge(sorted_dflca_tr[:i])['sentence_id'])
    c = list(retacred_train.merge(sorted_dfconf_tr[:i])['sentence_id'])
    re_tr_intersection_l.append(jaccard_similarity(s, c)*100)
    
print(len(re_tr_intersection_l))

In [None]:
retacred_train.merge(sorted_dflca_tr)

## Ratio of distance between ground-truth & LCA and ground-truth and root

What percentage of correct path from the root is correctly predicted when started from ground-truth.

In [None]:
parnn_tr_df = parnn_train
parnn_tr_df['lratio'] = parnn_tr_df.dl / parnn_tr_df.dr
parnn_tr_df = parnn_tr_df.loc[:,['sentence_id', 'lratio']]
parnn_tr_df = parnn_tr_df.rename(columns={'lratio':'lratio_parnn'})
print(len(parnn_tr_df))

lstm_tr_df = lstm_train
lstm_tr_df['lratio'] = lstm_tr_df.dl / lstm_tr_df.dr
lstm_tr_df = lstm_tr_df.loc[:,['sentence_id', 'lratio']]
lstm_tr_df = lstm_tr_df.rename(columns={'lratio':'lratio_lstm'})
print(len(lstm_tr_df))

bilstm_tr_df = bilstm_train
bilstm_tr_df['lratio'] = bilstm_tr_df.dl / bilstm_tr_df.dr
bilstm_tr_df = bilstm_tr_df.loc[:,['sentence_id', 'lratio']]
bilstm_tr_df = bilstm_tr_df.rename(columns={'lratio':'lratio_bilstm'})
print(len(bilstm_tr_df))

cgcn_tr_df = cgcn_train
cgcn_tr_df['lratio'] = cgcn_tr_df.dl / cgcn_tr_df.dr
cgcn_tr_df = cgcn_tr_df.loc[:,['sentence_id', 'lratio']]
cgcn_tr_df = cgcn_tr_df.rename(columns={'lratio':'lratio_cgcn'})
print(len(cgcn_tr_df))

gcn_tr_df = gcn_train
gcn_tr_df['lratio'] = gcn_tr_df.dl / gcn_tr_df.dr
gcn_tr_df = gcn_tr_df.loc[:,['sentence_id', 'lratio']]
gcn_tr_df = gcn_tr_df.rename(columns={'lratio':'lratio_gcn'})
print(len(gcn_tr_df))

dfrl_tr = parnn_tr_df.merge(lstm_tr_df).merge(bilstm_tr_df).merge(cgcn_tr_df).merge(gcn_tr_df)
dfrl_tr['lratio'] = (dfrl_tr['lratio_parnn'] + dfrl_tr['lratio_lstm'] + dfrl_tr['lratio_bilstm'] 
                  + dfrl_tr['lratio_cgcn'] + dfrl_tr['lratio_gcn']) / 5

dfrl_tr

In [None]:
sorted_dfrl_tr = dfrl_tr.sort_values(by=['lratio'], ascending=False)
sorted_dfrl_tr

In [None]:
re_tr_freq_rl = []
for i in range(len(sorted_dfrl_tr)):
    n_dp = len(retacred_train.merge(sorted_dfrl_tr[:i]))
    re_tr_freq_rl.append(n_dp)
    
print(len(re_tr_freq_rl))

re_tr_per_rl = []
for i in range(1, len(sorted_dfrl_tr)):
    n_dp = len(retacred_train.merge(sorted_dfrl_tr[:i])) / i
    re_tr_per_rl.append(n_dp)
    
print(len(re_tr_per_rl))

re_tr_intersection_rl = []
for i in range(5,len(sorted_dfrl_tr)):
    s = list(retacred_train.merge(sorted_dfrl_tr[:i])['sentence_id'])
    c = list(retacred_train.merge(sorted_dfconf_tr[:i])['sentence_id'])
    re_tr_intersection_rl.append(jaccard_similarity(s, c)*100)
    
print(len(re_tr_intersection_rl))

In [None]:
retacred_train.merge(sorted_dfrl_tr)

## Visualizations on Train data

In [None]:
freq = pd.DataFrame({'Confidence':re_tr_freq_c, 'GD':re_tr_freq_p, 'LD':re_tr_freq_l, 'RL':re_tr_freq_rl})
plt.figure(figsize=(20,16))
ax1 = plt.subplot(2,2,1)
ax1 = sns.lineplot(data=freq, dashes=False)
plt.ylabel('Number of sentences common with LC', fontsize='large', fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('Train Dataset', fontsize='large', fontweight='bold')
# plt.yticks(np.arange(0, 700, 70))


freq = pd.DataFrame({'Confidence':re_tr_freq_c, 'GD':re_tr_freq_p, 'LD':re_tr_freq_l, 'RL':re_tr_freq_rl})
freq = freq / 13923 * 100
ax2 = plt.subplot(2,2,2)
ax2 = sns.lineplot(data=freq, dashes=False)
plt.ylabel('Percentage of sentences common with LC', fontsize='large', fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('Train Dataset', fontsize='large', fontweight='bold')
ax2.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=100))

freq = pd.DataFrame({'Confidence':re_tr_per_c, 'GD':re_tr_per_p, 'LD':re_tr_per_l, 'RL':re_tr_per_rl})
freq = freq*100
ax3 = plt.subplot(2,2,3)
ax3 = sns.lineplot(data=freq, dashes=False)
plt.ylabel('Ratio of sentences common with LC and Reannotation Budget', fontsize='large', fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('Train Dataset', fontsize='large', fontweight='bold')
ax3.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=100))

intersection = pd.DataFrame({'Confidence vs Confidence':re_tr_intersection_c, 'GD vs Confidence':re_tr_intersection_p,
                             'LD vs Confidence':re_tr_intersection_l, 'RL vs Confidence':re_tr_intersection_rl})
ax4 = plt.subplot(2,2,4)
ax4 = sns.lineplot(data=intersection, dashes=False)
plt.ylabel('Jaccard Similarity', fontsize='large', fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('Train Dataset', fontsize='large', fontweight='bold')
ax4.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=100))

plt.savefig('train-analysis')

## Visualization

In [None]:
freq = pd.DataFrame({'Confidence':re_freq_c, 'GD':re_freq_p, 'LD':re_freq_l, 'RL':re_freq_rl})
dev_freq = pd.DataFrame({'Confidence':re_dev_freq_c, 'GD':re_dev_freq_p, 'LD':re_dev_freq_l, 'RL':re_dev_freq_rl})
# tr_freq = pd.DataFrame({'Confidence':re_tr_freq_c, 'GD':re_tr_freq_p, 'LD':re_tr_freq_l, 'RL':re_tr_freq_rl})

plt.figure(figsize=(10,5))
ax1 = plt.subplot(1,2,1)
ax1 = sns.lineplot(data=freq)
plt.ylabel('Number of sentences common with LC', fontsize='large', fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('Test Dataset', fontsize='large', fontweight='bold')
# plt.yticks(np.arange(0, 700, 70))


ax2 = plt.subplot(1,2,2)
ax2 = sns.lineplot(data=dev_freq)
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('Dev Dataset', fontsize='large', fontweight='bold')
# plt.yticks(np.arange(0, 1100, 110))

# ax3 = plt.subplot(1,3,3)
# ax3 = sns.lineplot(data=tr_freq)
# plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
# plt.title('Train Dataset', fontsize='large', fontweight='bold')

plt.savefig('retacred_common')

In [None]:
freq = pd.DataFrame({'TACRev':re_freq_c, 'GD':re_freq_p, 'LD':re_freq_l, 'ReTACRED':re_freq_re})
freq = freq / 3936 * 100
dev_freq = pd.DataFrame({'TACRev':re_dev_freq_c, 'GD':re_dev_freq_p, 'LD':re_dev_freq_l, 'ReTACRED':re_dev_freq_re})
dev_freq = dev_freq / 5326 * 100
# tr_freq = pd.DataFrame({'Confidence':re_tr_freq_c, 'GD':re_tr_freq_p, 'LD':re_tr_freq_l, 'RL':re_tr_freq_rl})
# tr_freq = tr_freq / 13923 * 100

plt.figure(figsize=(10,5))
ax1 = plt.subplot(1,2,1)
ax1 = sns.lineplot(data=freq)
plt.ylabel('Percentage of sentences common with Set N', fontsize=12, fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize=12, fontweight='bold')
plt.title('Test Dataset', fontsize=12, fontweight='bold')
plt.legend(fontsize=12)
ax1.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=100))

ax2 = plt.subplot(1,2,2)
ax2 = sns.lineplot(data=dev_freq)
plt.xlabel('Reannotation Budget', fontsize=12, fontweight='bold')
plt.title('Dev Dataset', fontsize=12, fontweight='bold')
plt.legend(fontsize=12)
ax2.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=100))


# ax3 = plt.subplot(1,3,3)
# ax3 = sns.lineplot(data=tr_freq)
# plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
# plt.title('Train Dataset', fontsize='large', fontweight='bold')
# ax2.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=100))

plt.savefig('retacred_common_percentage')

In [None]:
per = pd.DataFrame({'Confidence':re_per_c, 'GD':re_per_p, 'LD':re_per_l, 'RL':re_per_rl})
per = freq
dev_per = pd.DataFrame({'Confidence':re_dev_per_c, 'GD':re_dev_per_p, 'LD':re_dev_per_l, 'RL':re_dev_per_rl})
dev_per = dev_freq
# tr_per = pd.DataFrame({'Confidence':re_tr_per_c, 'GD':re_tr_per_p, 'LD':re_tr_per_l, 'RL':re_tr_per_rl})
# tr_per = tr_freq*100

plt.figure(figsize=(10,5))
ax1 = plt.subplot(1,2,1)
ax1 = sns.lineplot(data=per)
plt.ylabel('Ratio of sentences common with LC and Reannotation Budget', fontsize='large', fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('Test Dataset', fontsize='large', fontweight='bold')
ax1.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=100))

ax2 = plt.subplot(1,2,2)
ax2 = sns.lineplot(data=dev_per)
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('Dev Dataset', fontsize='large', fontweight='bold')
ax2.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=100))

# ax3 = plt.subplot(1,3,3)
# ax3 = sns.lineplot(data=tr_per)
# plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
# plt.title('Train Dataset', fontsize='large', fontweight='bold')
# ax3.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=100))

plt.savefig('retacred_percentage')

In [None]:
intersection = pd.DataFrame({'TACRev vs TACRev':re_intersection_c, 'GD vs TACRev':re_intersection_p,
                             'LD vs TACRev':re_intersection_l})
dev_intersection = pd.DataFrame({'TACRev vs TACRev':re_dev_intersection_c, 'GD vs TACRev':re_dev_intersection_p,
                             'LD vs TACRev':re_dev_intersection_l})

# tr_intersection = pd.DataFrame({'Confidence vs Confidence':re_tr_intersection_c, 'GD vs Confidence':re_tr_intersection_p,
#                              'LD vs Confidence':re_tr_intersection_l, 'RL vs Confidence':re_tr_intersection_rl})
plt.figure(figsize=(10,5))
ax1 = plt.subplot(1,2,1)
ax1 = sns.lineplot(data=intersection)
plt.ylabel('Jaccard Similarity', fontsize=16, fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize=16, fontweight='bold')
plt.title('Test Dataset', fontsize=16, fontweight='bold')
plt.legend(fontsize=12)
ax1.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=100))

ax2 = plt.subplot(1,2,2)
ax2 = sns.lineplot(data=dev_intersection)
plt.xlabel('Reannotation Budget', fontsize=16, fontweight='bold')
plt.title('Dev Dataset', fontsize=16, fontweight='bold')
plt.legend(fontsize=12)
ax2.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=100))

# ax3 = plt.subplot(1,3,3)
# ax3 = sns.lineplot(data=tr_intersection, dashes=False)
# plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
# plt.title('Train Dataset', fontsize='large', fontweight='bold')
# ax3.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=100))

plt.savefig('retacred_intersection')

## Identifying label change in TACRED (model predictions)

In [None]:
LABEL_TO_ID = {'no_relation': 0, 'org:members': 1, 'per:siblings': 2, 'per:spouse': 3, 'org:country_of_branch': 4, 'per:country_of_death': 5, 'per:parents': 6, 'per:stateorprovinces_of_residence': 7, 'org:top_members/employees': 8, 'org:dissolved': 9, 'org:number_of_employees/members': 10, 'per:stateorprovince_of_death': 11, 'per:origin': 12, 'per:children': 13, 'org:political/religious_affiliation': 14, 'per:city_of_birth': 15, 'per:title': 16, 'org:shareholders': 17, 'per:employee_of': 18, 'org:member_of': 19, 'org:founded_by': 20, 'per:countries_of_residence': 21, 'per:other_family': 22, 'per:religion': 23, 'per:identity': 24, 'per:date_of_birth': 25, 'org:city_of_branch': 26, 'org:alternate_names': 27, 'org:website': 28, 'per:cause_of_death': 29, 'org:stateorprovince_of_branch': 30, 'per:schools_attended': 31, 'per:country_of_birth': 32, 'per:date_of_death': 33, 'per:city_of_death': 34, 'org:founded': 35, 'per:cities_of_residence': 36, 'per:age': 37, 'per:charges': 38, 'per:stateorprovince_of_birth': 39}

### Loading ReTACRED

In [None]:
retest_dct = json.load(open('./retacred_patch/test_id2label.json'))
retest_dct = {key:LABEL_TO_ID[val] for key, val in retest_dct.items() if key in list(retacred_test.sentence_id)}
retacred_test = pd.DataFrame({'sentence_id':list(retest_dct.keys()), 'relation':list(retest_dct.values())})

retacred_test

In [None]:
print(len(retacred_test))

In [None]:
redev_dct = json.load(open('./retacred_patch/dev_id2label.json'))
redev_dct = {key:LABEL_TO_ID[val] for key, val in redev_dct.items() if key in list(retacred_dev.sentence_id)}
retacred_dev = pd.DataFrame({'sentence_id':list(redev_dct.keys()), 'relation':list(redev_dct.values())})

retacred_dev

In [None]:
print(len(retacred_dev))

In [None]:
retrain_dct = json.load(open('./retacred_patch/train_id2label.json'))
retrain_dct = {key:LABEL_TO_ID[val] for key, val in retrain_dct.items() if key in list(retacred_train.sentence_id)}
retacred_train = pd.DataFrame({'sentence_id':list(retrain_dct.keys()), 'relation':list(retrain_dct.values())})

retacred_train

In [None]:
print(len(retacred_train))

### Analysis for Test dataset

In [None]:
match_conf = []
count = 0
prev_index = 0
for i, _ in enumerate(sorted_dfconf.sentence_id, start=1):
    temp_df = retacred_test.merge(sorted_dfconf[:i])
    print(i, prev_index, len(temp_df))
    for j, sid in enumerate(temp_df[prev_index:].sentence_id):
        parnn_pred = int(parnn_test[parnn_test['sentence_id'] == sid]['prediction'])
        lstm_pred = int(lstm_test[lstm_test['sentence_id'] == sid]['prediction'])
        bilstm_pred = int(bilstm_test[bilstm_test['sentence_id'] == sid]['prediction'])
        cgcn_pred = int(cgcn_test[cgcn_test['sentence_id'] == sid]['prediction'])
        gcn_pred = int(gcn_test[gcn_test['sentence_id'] == sid]['prediction'])
        reannotation = retest_dct[sid]
        if (reannotation == parnn_pred or reannotation == lstm_pred or reannotation == bilstm_pred or 
            reannotation == cgcn_pred or reannotation == gcn_pred):
            count+=1
    print(i, count)
    match_conf.append(count)
    prev_index = len(temp_df)

In [None]:
print(len(match_conf))

In [None]:
match_dpred = []
count = 0
prev_index = 0
for i, _ in enumerate(sorted_dfpd.sentence_id, start=1):
    temp_df = retacred_test.merge(sorted_dfpd[:i])
    print(i, prev_index, len(temp_df))
    for j, sid in enumerate(temp_df[prev_index:].sentence_id):
        parnn_pred = int(parnn_test[parnn_test['sentence_id'] == sid]['prediction'])
        lstm_pred = int(lstm_test[lstm_test['sentence_id'] == sid]['prediction'])
        bilstm_pred = int(bilstm_test[bilstm_test['sentence_id'] == sid]['prediction'])
        cgcn_pred = int(cgcn_test[cgcn_test['sentence_id'] == sid]['prediction'])
        gcn_pred = int(gcn_test[gcn_test['sentence_id'] == sid]['prediction'])
        reannotation = retest_dct[sid]
        if (reannotation == parnn_pred or reannotation == lstm_pred or reannotation == bilstm_pred or 
            reannotation == cgcn_pred or reannotation == gcn_pred):
            count+=1
    print(i, count)
    match_dpred.append(count)
    prev_index = len(temp_df)

# match_dpred = []
# count = 0
# for i, sid in enumerate(sorted_dfpd.sentence_id, start=1):
#     parnn_pred = int(parnn_test[parnn_test['sentence_id'] == sid]['prediction'])
#     lstm_pred = int(lstm_test[lstm_test['sentence_id'] == sid]['prediction'])
#     bilstm_pred = int(bilstm_test[bilstm_test['sentence_id'] == sid]['prediction'])
#     cgcn_pred = int(cgcn_test[cgcn_test['sentence_id'] == sid]['prediction'])
#     gcn_pred = int(gcn_test[gcn_test['sentence_id'] == sid]['prediction'])
#     if sid in retest_dct:
#         reannotation = retest_dct[sid]
#         if reannotation == parnn_pred or reannotation == lstm_pred or reannotation == bilstm_pred or reannotation == cgcn_pred or reannotation == gcn_pred:
#             count += 1
# #             match_dpred.append(count)
#             match_dpred.append(count/len(retacred_test.merge(sorted_dfpd[:i]))*100)
# #             print(count/len(retacred_test.merge(sorted_dfpd[:i]))*100)

In [None]:
print(len(match_dpred))

In [None]:
match_dlca = []
count = 0
prev_index = 0
for i, _ in enumerate(sorted_dflca.sentence_id, start=1):
    temp_df = retacred_test.merge(sorted_dflca[:i])
    print(i, prev_index, len(temp_df))
    for j, sid in enumerate(temp_df[prev_index:].sentence_id):
        parnn_pred = int(parnn_test[parnn_test['sentence_id'] == sid]['prediction'])
        lstm_pred = int(lstm_test[lstm_test['sentence_id'] == sid]['prediction'])
        bilstm_pred = int(bilstm_test[bilstm_test['sentence_id'] == sid]['prediction'])
        cgcn_pred = int(cgcn_test[cgcn_test['sentence_id'] == sid]['prediction'])
        gcn_pred = int(gcn_test[gcn_test['sentence_id'] == sid]['prediction'])
        reannotation = retest_dct[sid]
        if (reannotation == parnn_pred or reannotation == lstm_pred or reannotation == bilstm_pred or 
            reannotation == cgcn_pred or reannotation == gcn_pred):
            count+=1
    print(i, count)
    match_dlca.append(count)
    prev_index = len(temp_df)


# match_dlca = []
# count = 0
# for i, sid in enumerate(sorted_dflca.sentence_id, start=1):
#     parnn_pred = int(parnn_test[parnn_test['sentence_id'] == sid]['prediction'])
#     lstm_pred = int(lstm_test[lstm_test['sentence_id'] == sid]['prediction'])
#     bilstm_pred = int(bilstm_test[bilstm_test['sentence_id'] == sid]['prediction'])
#     cgcn_pred = int(cgcn_test[cgcn_test['sentence_id'] == sid]['prediction'])
#     gcn_pred = int(gcn_test[gcn_test['sentence_id'] == sid]['prediction'])
#     if sid in retest_dct:
#         reannotation = retest_dct[sid]
#         if reannotation == parnn_pred or reannotation == lstm_pred or reannotation == bilstm_pred or reannotation == cgcn_pred or reannotation == gcn_pred:
#             count += 1
# #             match_dlca.append(count)
#             match_dlca.append(count/len(retacred_test.merge(sorted_dflca[:i]))*100)
# #             print(count/len(retacred_test.merge(sorted_dflca[:i]))*100)

In [None]:
print(len(match_dlca))

In [None]:
match_rl = []
count = 0
prev_index = 0
for i, _ in enumerate(sorted_dfrl.sentence_id, start=1):
    temp_df = retacred_test.merge(sorted_dfrl[:i])
    for j, sid in enumerate(temp_df[prev_index:].sentence_id):
        parnn_pred = int(parnn_test[parnn_test['sentence_id'] == sid]['prediction'])
        lstm_pred = int(lstm_test[lstm_test['sentence_id'] == sid]['prediction'])
        bilstm_pred = int(bilstm_test[bilstm_test['sentence_id'] == sid]['prediction'])
        cgcn_pred = int(cgcn_test[cgcn_test['sentence_id'] == sid]['prediction'])
        gcn_pred = int(gcn_test[gcn_test['sentence_id'] == sid]['prediction'])
        reannotation = retest_dct[sid]
        if (reannotation == parnn_pred or reannotation == lstm_pred or reannotation == bilstm_pred or 
            reannotation == cgcn_pred or reannotation == gcn_pred):
            count+=1
    print(i, count)
    match_rl.append(count)
    prev_index = len(temp_df)


# match_rl = []
# count = 0
# for i, sid in enumerate(sorted_dfrl.sentence_id, start=1):
#     parnn_pred = int(parnn_test[parnn_test['sentence_id'] == sid]['prediction'])
#     lstm_pred = int(lstm_test[lstm_test['sentence_id'] == sid]['prediction'])
#     bilstm_pred = int(bilstm_test[bilstm_test['sentence_id'] == sid]['prediction'])
#     cgcn_pred = int(cgcn_test[cgcn_test['sentence_id'] == sid]['prediction'])
#     gcn_pred = int(gcn_test[gcn_test['sentence_id'] == sid]['prediction'])
#     if sid in retest_dct:
#         reannotation = retest_dct[sid]
#         if reannotation == parnn_pred or reannotation == lstm_pred or reannotation == bilstm_pred or reannotation == cgcn_pred or reannotation == gcn_pred:
#             count += 1
# #             match_rl.append(count)
#             match_rl.append(count/len(retacred_test.merge(sorted_dfrl[:i]))*100)
# #             print(count/len(retacred_test.merge(sorted_dfrl[:i]))*100)

In [None]:
print(len(match_rl))

### Analyzing Dev Data

In [None]:
match_conf_dev = []
count = 0
prev_index = 0
for i, _ in enumerate(sorted_dfconf_dev.sentence_id, start=1):
    temp_df = retacred_dev.merge(sorted_dfconf_dev[:i])
    print(i, prev_index, len(temp_df))
    for j, sid in enumerate(temp_df[prev_index:].sentence_id):
        parnn_pred = int(parnn_dev[parnn_dev['sentence_id'] == sid]['prediction'])
        lstm_pred = int(lstm_dev[lstm_dev['sentence_id'] == sid]['prediction'])
        bilstm_pred = int(bilstm_dev[bilstm_dev['sentence_id'] == sid]['prediction'])
        cgcn_pred = int(cgcn_dev[cgcn_dev['sentence_id'] == sid]['prediction'])
        gcn_pred = int(gcn_dev[gcn_dev['sentence_id'] == sid]['prediction'])
        reannotation = redev_dct[sid]
        if (reannotation == parnn_pred or reannotation == lstm_pred or reannotation == bilstm_pred or 
            reannotation == cgcn_pred or reannotation == gcn_pred):
            count+=1
    print(i, count)
    match_conf_dev.append(count)
    prev_index = len(temp_df)


# match_conf_dev = []
# count = 0
# for i, sid in enumerate(sorted_dfconf_dev.sentence_id, start=1):
#     parnn_pred = int(parnn_dev[parnn_dev['sentence_id'] == sid]['prediction'])
#     lstm_pred = int(lstm_dev[lstm_dev['sentence_id'] == sid]['prediction'])
#     bilstm_pred = int(bilstm_dev[bilstm_dev['sentence_id'] == sid]['prediction'])
#     cgcn_pred = int(cgcn_dev[cgcn_dev['sentence_id'] == sid]['prediction'])
#     gcn_pred = int(gcn_dev[gcn_dev['sentence_id'] == sid]['prediction'])
#     if sid in redev_dct:
#         reannotation = redev_dct[sid]
#         if reannotation == parnn_pred or reannotation == lstm_pred or reannotation == bilstm_pred or reannotation == cgcn_pred or reannotation == gcn_pred:
#             count += 1
# #             match_conf_dev.append(count)
#             match_conf_dev.append(count/len(retacred_dev.merge(sorted_dfconf_dev[:i]))*100)
# #             print(count/len(retacred_dev.merge(sorted_dfconf_dev[:i]))*100)

In [None]:
print(len(match_conf_dev))

In [None]:
match_dpred_dev = []
count = 0
prev_index = 0
for i, _ in enumerate(sorted_dfpd_dev.sentence_id, start=1):
    temp_df = retacred_dev.merge(sorted_dfpd_dev[:i])
    print(i, prev_index, len(temp_df))
    for j, sid in enumerate(temp_df[prev_index:].sentence_id):
        parnn_pred = int(parnn_dev[parnn_dev['sentence_id'] == sid]['prediction'])
        lstm_pred = int(lstm_dev[lstm_dev['sentence_id'] == sid]['prediction'])
        bilstm_pred = int(bilstm_dev[bilstm_dev['sentence_id'] == sid]['prediction'])
        cgcn_pred = int(cgcn_dev[cgcn_dev['sentence_id'] == sid]['prediction'])
        gcn_pred = int(gcn_dev[gcn_dev['sentence_id'] == sid]['prediction'])
        reannotation = redev_dct[sid]
        if (reannotation == parnn_pred or reannotation == lstm_pred or reannotation == bilstm_pred or 
            reannotation == cgcn_pred or reannotation == gcn_pred):
            count+=1
    print(i, count)
    match_dpred_dev.append(count)
    prev_index = len(temp_df)
    

# match_dpred_dev = []
# count = 0
# for i, sid in enumerate(sorted_dfpd_dev.sentence_id, start=1):
#     parnn_pred = int(parnn_dev[parnn_dev['sentence_id'] == sid]['prediction'])
#     lstm_pred = int(lstm_dev[lstm_dev['sentence_id'] == sid]['prediction'])
#     bilstm_pred = int(bilstm_dev[bilstm_dev['sentence_id'] == sid]['prediction'])
#     cgcn_pred = int(cgcn_dev[cgcn_dev['sentence_id'] == sid]['prediction'])
#     gcn_pred = int(gcn_dev[gcn_dev['sentence_id'] == sid]['prediction'])
#     if sid in redev_dct:
#         reannotation = redev_dct[sid]
#         if reannotation == parnn_pred or reannotation == lstm_pred or reannotation == bilstm_pred or reannotation == cgcn_pred or reannotation == gcn_pred:
#             count += 1
# #             match_dpred_dev.append(count)
#             match_dpred_dev.append(count/len(retacred_dev.merge(sorted_dfpd_dev[:i]))*100)
# #             print(count/len(retacred_dev.merge(sorted_dfpd_dev[:i]))*100)

In [None]:
print(len(match_dpred_dev))

In [None]:
match_dlca_dev = []
count = 0
prev_index = 0
for i, _ in enumerate(sorted_dflca_dev.sentence_id, start=1):
    temp_df = retacred_dev.merge(sorted_dflca_dev[:i])
    print(i, prev_index, len(temp_df))
    for j, sid in enumerate(temp_df[prev_index:].sentence_id):
        parnn_pred = int(parnn_dev[parnn_dev['sentence_id'] == sid]['prediction'])
        lstm_pred = int(lstm_dev[lstm_dev['sentence_id'] == sid]['prediction'])
        bilstm_pred = int(bilstm_dev[bilstm_dev['sentence_id'] == sid]['prediction'])
        cgcn_pred = int(cgcn_dev[cgcn_dev['sentence_id'] == sid]['prediction'])
        gcn_pred = int(gcn_dev[gcn_dev['sentence_id'] == sid]['prediction'])
        reannotation = redev_dct[sid]
        if (reannotation == parnn_pred or reannotation == lstm_pred or reannotation == bilstm_pred or 
            reannotation == cgcn_pred or reannotation == gcn_pred):
            count+=1
    print(i, count)
    match_dlca_dev.append(count)
    prev_index = len(temp_df)

# match_dlca_dev = []
# count = 0
# for i, sid in enumerate(sorted_dflca_dev.sentence_id, start=1):
#     parnn_pred = int(parnn_dev[parnn_dev['sentence_id'] == sid]['prediction'])
#     lstm_pred = int(lstm_dev[lstm_dev['sentence_id'] == sid]['prediction'])
#     bilstm_pred = int(bilstm_dev[bilstm_dev['sentence_id'] == sid]['prediction'])
#     cgcn_pred = int(cgcn_dev[cgcn_dev['sentence_id'] == sid]['prediction'])
#     gcn_pred = int(gcn_dev[gcn_dev['sentence_id'] == sid]['prediction'])
#     if sid in redev_dct:
#         reannotation = redev_dct[sid]
#         if reannotation == parnn_pred or reannotation == lstm_pred or reannotation == bilstm_pred or reannotation == cgcn_pred or reannotation == gcn_pred:
#             count += 1
# #             match_dlca_dev.append(count)
#             match_dlca_dev.append(count/len(retacred_dev.merge(sorted_dflca_dev[:i]))*100)
# #             print(count/len(retacred_dev.merge(sorted_dflca_dev[:i]))*100)

In [None]:
print(len(match_dlca_dev))

In [None]:
match_rl_dev = []
count = 0
prev_index = 0
for i, _ in enumerate(sorted_dfrl_dev.sentence_id, start=1):
    temp_df = retacred_dev.merge(sorted_dfrl_dev[:i])
    print(i, prev_index, len(temp_df))
    for j, sid in enumerate(temp_df[prev_index:].sentence_id):
        parnn_pred = int(parnn_dev[parnn_dev['sentence_id'] == sid]['prediction'])
        lstm_pred = int(lstm_dev[lstm_dev['sentence_id'] == sid]['prediction'])
        bilstm_pred = int(bilstm_dev[bilstm_dev['sentence_id'] == sid]['prediction'])
        cgcn_pred = int(cgcn_dev[cgcn_dev['sentence_id'] == sid]['prediction'])
        gcn_pred = int(gcn_dev[gcn_dev['sentence_id'] == sid]['prediction'])
        reannotation = redev_dct[sid]
        if (reannotation == parnn_pred or reannotation == lstm_pred or reannotation == bilstm_pred or 
            reannotation == cgcn_pred or reannotation == gcn_pred):
            count+=1
    print(i, count)
    match_rl_dev.append(count)
    prev_index = len(temp_df)

# match_rl_dev = []
# count = 0
# for i, sid in enumerate(sorted_dfrl_dev.sentence_id, start=1):
#     parnn_pred = int(parnn_dev[parnn_dev['sentence_id'] == sid]['prediction'])
#     lstm_pred = int(lstm_dev[lstm_dev['sentence_id'] == sid]['prediction'])
#     bilstm_pred = int(bilstm_dev[bilstm_dev['sentence_id'] == sid]['prediction'])
#     cgcn_pred = int(cgcn_dev[cgcn_dev['sentence_id'] == sid]['prediction'])
#     gcn_pred = int(gcn_dev[gcn_dev['sentence_id'] == sid]['prediction'])
#     if sid in redev_dct:
#         reannotation = redev_dct[sid]
#         if reannotation == parnn_pred or reannotation == lstm_pred or reannotation == bilstm_pred or reannotation == cgcn_pred or reannotation == gcn_pred:
#             count += 1
# #             match_rl_dev.append(count)
#             match_rl_dev.append(count/len(retacred_dev.merge(sorted_dfrl_dev[:i]))*100)
# #             print(count/len(retacred_dev.merge(sorted_dfrl_dev[:i]))*100)

In [None]:
print(len(match_rl_dev))

### Visualizations

In [None]:
match = pd.DataFrame({'Confidence':match_conf, 'GD':match_dpred, 'LD':match_dlca, 'RL':match_rl})
dev_match = pd.DataFrame({'Confidence':match_conf_dev, 'GD':match_dpred_dev, 'LD':match_dlca_dev, 'RL':match_rl_dev})

plt.figure(figsize=(10,5))
ax1 = plt.subplot(1,2,1)
ax1 = sns.lineplot(data=match, dashes=False)
plt.ylabel('Model Agreement', fontsize='large', fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('Test Dataset', fontsize='large', fontweight='bold')
# ax1.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=100))

ax2 = plt.subplot(1,2,2)
ax2 = sns.lineplot(data=dev_match, dashes=False)
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('Dev Dataset', fontsize='large', fontweight='bold')
# ax2.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=100))

plt.savefig('retacred_match')

## Model Performance analysis after Re-Annotation

In [None]:
def calculate_f1(labels, predictions):
    num_predicted_labels = 0
    num_gold_labels = 0
    num_correct_labels = 0
    for label, prediction in zip(labels, predictions):
        if prediction != 0:
            num_predicted_labels += 1
        if label != 0:
            num_gold_labels += 1
            if prediction == label:
                num_correct_labels += 1

    if num_predicted_labels > 0:
        precision = num_correct_labels / num_predicted_labels
    else:
        precision = 0.0
    recall = num_correct_labels / num_gold_labels
    if recall == 0.0:
        f1 = 0.0
    else:
        f1 = round((2 * precision * recall / (precision + recall)) * 100, 2)
        
    return f1

### PARNN

In [None]:
parnn_labels = dict(list(zip(list(parnn_test.sentence_id), list(parnn_test.ground_truth))))
parnn_preds = dict(list(zip(list(parnn_test.sentence_id), list(parnn_test.prediction))))

#### Random

In [None]:
sids = list(tacred_test.sentence_id)
labels = [parnn_labels[sid] for sid in sids]
predictions = [parnn_preds[sid] for sid in sids]
pf1_re = []
prev = 0
for i, sid in enumerate(sids, start=1):
    for j in range(i):
        if sids[j] in re_test:
            labels[j] = LABEL_TO_ID[re_test[sids[j]]]
    f1 = calculate_f1(labels, predictions)
    pf1_re.append(f1)
    print(i, f1)

#### confidence

In [None]:
sids = list(sorted_dfconf.sentence_id)
labels = [parnn_labels[sid] for sid in sids]
predictions = [parnn_preds[sid] for sid in sids]
pf1_conf = []
prev = 0
for i, sid in enumerate(sids, start=1):
    for j in range(i):
        if sids[j] in re_test:
            labels[j] = LABEL_TO_ID[re_test[sids[j]]]
    f1 = calculate_f1(labels, predictions)
    pf1_conf.append(f1)
    print(i, f1)

#### distance between ground_truth and prediction

In [None]:
sids = list(sorted_dfpd.sentence_id)
labels = [parnn_labels[sid] for sid in sids]
predictions = [parnn_preds[sid] for sid in sids]
pf1_pd = []
prev = 0
for i, sid in enumerate(sids, start=1):
    for j in range(i):
        if sids[j] in re_test:
            labels[j] = LABEL_TO_ID[re_test[sids[j]]]
    f1 = calculate_f1(labels, predictions)
    pf1_pd.append(f1)
    print(i, f1)

#### distance between ground_truth and LCA

In [None]:
sids = list(sorted_dflca.sentence_id)
labels = [parnn_labels[sid] for sid in sids]
predictions = [parnn_preds[sid] for sid in sids]
pf1_lca = []
prev = 0
for i, sid in enumerate(sids, start=1):
    for j in range(i):
        if sids[j] in re_test:
            labels[j] = LABEL_TO_ID[re_test[sids[j]]]
    f1 = calculate_f1(labels, predictions)
    pf1_lca.append(f1)
    print(i, f1)

#### Ratio

In [None]:
sids = list(sorted_dfrl.sentence_id)
labels = [parnn_labels[sid] for sid in sids]
predictions = [parnn_preds[sid] for sid in sids]
pf1_ratio = []
prev = 0
for i, sid in enumerate(sids, start=1):
    for j in range(i):
        if sids[j] in re_test:
            labels[j] = LABEL_TO_ID[re_test[sids[j]]]
    f1 = calculate_f1(labels, predictions)
    pf1_ratio.append(f1)
    print(i, f1)

In [None]:
f1s = pd.DataFrame({'TACREV':pf1_conf, 'ReTACRED':pf1_re, 'GD':pf1_pd, 'LD':pf1_lca})

plt.figure(figsize=(6,6))
ax1 = sns.lineplot(data=f1s, dashes=False)
plt.ylabel('F1 Score', fontsize='large', fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('PARNN', fontsize='large', fontweight='bold')

plt.savefig('parnn-f1')

### CGCN

In [None]:
cgcn_labels = dict(list(zip(list(cgcn_test.sentence_id), list(cgcn_test.ground_truth))))
cgcn_preds = dict(list(zip(list(cgcn_test.sentence_id), list(cgcn_test.prediction))))

#### Random

In [None]:
sids = list(tacred_test.sentence_id)
labels = [cgcn_labels[sid] for sid in sids]
predictions = [cgcn_preds[sid] for sid in sids]
cf1_re = []
prev = 0
for i, sid in enumerate(sids, start=1):
    for j in range(i):
        if sids[j] in re_test:
            labels[j] = LABEL_TO_ID[re_test[sids[j]]]
    f1 = calculate_f1(labels, predictions)
    cf1_re.append(f1)
    print(i, f1)

#### Confidence

In [None]:
sids = list(sorted_dfconf.sentence_id)
labels = [cgcn_labels[sid] for sid in sids]
predictions = [cgcn_preds[sid] for sid in sids]
cf1_conf = []
prev = 0
for i, sid in enumerate(sids, start=1):
    for j in range(i):
        if sids[j] in re_test:
            labels[j] = LABEL_TO_ID[re_test[sids[j]]]
    f1 = calculate_f1(labels, predictions)
    cf1_conf.append(f1)
    print(i, f1)

#### distance between ground_truth and prediction

In [None]:
sids = list(sorted_dfpd.sentence_id)
labels = [cgcn_labels[sid] for sid in sids]
predictions = [cgcn_preds[sid] for sid in sids]
cf1_pd = []
prev = 0
for i, sid in enumerate(sids, start=1):
    for j in range(i):
        if sids[j] in re_test:
            labels[j] = LABEL_TO_ID[re_test[sids[j]]]
    f1 = calculate_f1(labels, predictions)
    cf1_pd.append(f1)
    print(i, f1)

#### distance between ground truth and LCA

In [None]:
sids = list(sorted_dflca.sentence_id)
labels = [cgcn_labels[sid] for sid in sids]
predictions = [cgcn_preds[sid] for sid in sids]
cf1_lca = []
prev = 0
for i, sid in enumerate(sids, start=1):
    for j in range(i):
        if sids[j] in re_test:
            labels[j] = LABEL_TO_ID[re_test[sids[j]]]
    f1 = calculate_f1(labels, predictions)
    cf1_lca.append(f1)
    print(i, f1)

#### Ratio

In [None]:
sids = list(sorted_dfrl.sentence_id)
labels = [cgcn_labels[sid] for sid in sids]
predictions = [cgcn_preds[sid] for sid in sids]
cf1_ratio = []
prev = 0
for i, sid in enumerate(sids, start=1):
    for j in range(i):
        if sids[j] in re_test:
            labels[j] = LABEL_TO_ID[re_test[sids[j]]]
    f1 = calculate_f1(labels, predictions)
    cf1_ratio.append(f1)
    print(i, f1)

In [None]:
f1s = pd.DataFrame({'TACRev':cf1_conf, 'ReTACRED':cf1_re, 'GD':cf1_pd, 'LD':cf1_lca})

plt.figure(figsize=(6,6))
ax1 = sns.lineplot(data=f1s, dashes=False)
plt.ylabel('F1 Score', fontsize='large', fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('CGCN', fontsize='large', fontweight='bold')

plt.savefig('cgcn-f1')

### GCN

In [None]:
gcn_labels = dict(list(zip(list(gcn_test.sentence_id), list(gcn_test.ground_truth))))
gcn_preds = dict(list(zip(list(gcn_test.sentence_id), list(gcn_test.prediction))))

#### Random

In [None]:
sids = list(tacred_test.sentence_id)
labels = [gcn_labels[sid] for sid in sids]
predictions = [gcn_preds[sid] for sid in sids]
gf1_re = []
prev = 0
for i, sid in enumerate(sids, start=1):
    for j in range(i):
        if sids[j] in re_test:
            labels[j] = LABEL_TO_ID[re_test[sids[j]]]
    f1 = calculate_f1(labels, predictions)
    gf1_re.append(f1)
    print(i, f1)

#### confidence

In [None]:
sids = list(sorted_dfconf.sentence_id)
labels = [gcn_labels[sid] for sid in sids]
predictions = [gcn_preds[sid] for sid in sids]
gf1_conf = []
prev = 0
for i, sid in enumerate(sids, start=1):
    for j in range(i):
        if sids[j] in re_test:
            labels[j] = LABEL_TO_ID[re_test[sids[j]]]
    f1 = calculate_f1(labels, predictions)
    gf1_conf.append(f1)
    print(i, f1)

#### distance between ground truth and prediction

In [None]:
sids = list(sorted_dfpd.sentence_id)
labels = [gcn_labels[sid] for sid in sids]
predictions = [gcn_preds[sid] for sid in sids]
gf1_pd = []
prev = 0
for i, sid in enumerate(sids, start=1):
    for j in range(i):
        if sids[j] in re_test:
            labels[j] = LABEL_TO_ID[re_test[sids[j]]]
    f1 = calculate_f1(labels, predictions)
    gf1_pd.append(f1)
    print(i, f1)

#### distance between ground truth and LCA

In [None]:
sids = list(sorted_dflca.sentence_id)
labels = [gcn_labels[sid] for sid in sids]
predictions = [gcn_preds[sid] for sid in sids]
gf1_lca = []
prev = 0
for i, sid in enumerate(sids, start=1):
    for j in range(i):
        if sids[j] in re_test:
            labels[j] = LABEL_TO_ID[re_test[sids[j]]]
    f1 = calculate_f1(labels, predictions)
    gf1_lca.append(f1)
    print(i, f1)

#### Ratio

In [None]:
sids = list(sorted_dfrl.sentence_id)
labels = [gcn_labels[sid] for sid in sids]
predictions = [gcn_preds[sid] for sid in sids]
gf1_ratio = []
prev = 0
for i, sid in enumerate(sids, start=1):
    for j in range(i):
        if sids[j] in re_test:
            labels[j] = LABEL_TO_ID[re_test[sids[j]]]
    f1 = calculate_f1(labels, predictions)
    gf1_ratio.append(f1)
    print(i, f1)

In [None]:
f1s = pd.DataFrame({'TACRev':gf1_conf, 'ReTACRED':gf1_re, 'GD':gf1_pd, 'LD':gf1_lca})

plt.figure(figsize=(6,6))
ax1 = sns.lineplot(data=f1s, dashes=False)
plt.ylabel('F1 Score', fontsize='large', fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('GCN', fontsize='large', fontweight='bold')

plt.savefig('gcn-f1')

### LSTM

In [None]:
lstm_labels = dict(list(zip(list(lstm_test.sentence_id), list(lstm_test.ground_truth))))
lstm_preds = dict(list(zip(list(lstm_test.sentence_id), list(lstm_test.prediction))))

In [None]:
sids = list(tacred_test.sentence_id)
labels = [lstm_labels[sid] for sid in sids]
predictions = [lstm_preds[sid] for sid in sids]
lf1_re = []
prev = 0
for i, sid in enumerate(sids, start=1):
    for j in range(i):
        if sids[j] in re_test:
            labels[j] = LABEL_TO_ID[re_test[sids[j]]]
    f1 = calculate_f1(labels, predictions)
    lf1_re.append(f1)
    print(i, f1)

In [None]:
sids = list(sorted_dfconf.sentence_id)
labels = [lstm_labels[sid] for sid in sids]
predictions = [lstm_preds[sid] for sid in sids]
lf1_conf = []
prev = 0
for i, sid in enumerate(sids, start=1):
    for j in range(i):
        if sids[j] in re_test:
            labels[j] = LABEL_TO_ID[re_test[sids[j]]]
    f1 = calculate_f1(labels, predictions)
    lf1_conf.append(f1)
    print(i, f1)

In [None]:
sids = list(sorted_dfpd.sentence_id)
labels = [lstm_labels[sid] for sid in sids]
predictions = [lstm_preds[sid] for sid in sids]
lf1_pd = []
prev = 0
for i, sid in enumerate(sids, start=1):
    for j in range(i):
        if sids[j] in re_test:
            labels[j] = LABEL_TO_ID[re_test[sids[j]]]
    f1 = calculate_f1(labels, predictions)
    lf1_pd.append(f1)
    print(i, f1)

In [None]:
sids = list(sorted_dflca.sentence_id)
labels = [lstm_labels[sid] for sid in sids]
predictions = [lstm_preds[sid] for sid in sids]
lf1_lca = []
prev = 0
for i, sid in enumerate(sids, start=1):
    for j in range(i):
        if sids[j] in re_test:
            labels[j] = LABEL_TO_ID[re_test[sids[j]]]
    f1 = calculate_f1(labels, predictions)
    lf1_lca.append(f1)
    print(i, f1)

In [None]:
sids = list(sorted_dfrl.sentence_id)
labels = [lstm_labels[sid] for sid in sids]
predictions = [lstm_preds[sid] for sid in sids]
lf1_ratio = []
prev = 0
for i, sid in enumerate(sids, start=1):
    for j in range(i):
        if sids[j] in re_test:
            labels[j] = LABEL_TO_ID[re_test[sids[j]]]
    f1 = calculate_f1(labels, predictions)
    lf1_ratio.append(f1)
    print(i, f1)

In [None]:
f1s = pd.DataFrame({'TACRev':lf1_conf, 'ReTACRED':lf1_re, 'GD':lf1_pd, 'LD':lf1_lca})

plt.figure(figsize=(6,6))
ax1 = sns.lineplot(data=f1s, dashes=False)
plt.ylabel('F1 Score', fontsize='large', fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('LSTM', fontsize='large', fontweight='bold')

plt.savefig('lstm-f1')

### BiLSTM

In [None]:
bilstm_labels = dict(list(zip(list(bilstm_test.sentence_id), list(bilstm_test.ground_truth))))
bilstm_preds = dict(list(zip(list(bilstm_test.sentence_id), list(bilstm_test.prediction))))

In [None]:
sids = list(tacred_test.sentence_id)
labels = [bilstm_labels[sid] for sid in sids]
predictions = [bilstm_preds[sid] for sid in sids]
bf1_re = []
prev = 0
for i, sid in enumerate(sids, start=1):
    for j in range(i):
        if sids[j] in re_test:
            labels[j] = LABEL_TO_ID[re_test[sids[j]]]
    f1 = calculate_f1(labels, predictions)
    bf1_re.append(f1)
    print(i, f1)

In [None]:
sids = list(sorted_dfconf.sentence_id)
labels = [bilstm_labels[sid] for sid in sids]
predictions = [bilstm_preds[sid] for sid in sids]
bf1_conf = []
prev = 0
for i, sid in enumerate(sids, start=1):
    for j in range(i):
        if sids[j] in re_test:
            labels[j] = LABEL_TO_ID[re_test[sids[j]]]
    f1 = calculate_f1(labels, predictions)
    bf1_conf.append(f1)
    print(i, f1)

In [None]:
sids = list(sorted_dfpd.sentence_id)
labels = [bilstm_labels[sid] for sid in sids]
predictions = [bilstm_preds[sid] for sid in sids]
bf1_pd = []
prev = 0
for i, sid in enumerate(sids, start=1):
    for j in range(i):
        if sids[j] in re_test:
            labels[j] = LABEL_TO_ID[re_test[sids[j]]]
    f1 = calculate_f1(labels, predictions)
    bf1_pd.append(f1)
    print(i, f1)

In [None]:
sids = list(sorted_dflca.sentence_id)
labels = [bilstm_labels[sid] for sid in sids]
predictions = [bilstm_preds[sid] for sid in sids]
bf1_lca = []
prev = 0
for i, sid in enumerate(sids, start=1):
    for j in range(i):
        if sids[j] in re_test:
            labels[j] = LABEL_TO_ID[re_test[sids[j]]]
    f1 = calculate_f1(labels, predictions)
    bf1_lca.append(f1)
    print(i, f1)

In [None]:
sids = list(sorted_dfrl.sentence_id)
labels = [bilstm_labels[sid] for sid in sids]
predictions = [bilstm_preds[sid] for sid in sids]
bf1_ratio = []
prev = 0
for i, sid in enumerate(sids, start=1):
    for j in range(i):
        if sids[j] in re_test:
            labels[j] = LABEL_TO_ID[re_test[sids[j]]]
    f1 = calculate_f1(labels, predictions)
    bf1_ratio.append(f1)
    print(i, f1)

In [None]:
f1s = pd.DataFrame({'TACRev':bf1_conf, 'ReTACRED':bf1_re, 'GD':bf1_pd, 'LD':bf1_lca})

plt.figure(figsize=(6,6))
ax1 = sns.lineplot(data=f1s, dashes=False)
plt.ylabel('F1 Score', fontsize='large', fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('BiLSTM', fontsize='large', fontweight='bold')

plt.savefig('bilstm-f1')

## Visualization

In [None]:
f1s = pd.DataFrame({'PARNN':pf1_conf, 'CGCN':cf1_conf, 'GCN':gf1_conf, 'LSTM':lf1_conf, 'BiLSTM':bf1_conf})

plt.figure(figsize=(6,6))
ax1 = sns.lineplot(data=f1s, dashes=False)
plt.ylabel('F1 Score', fontsize='large', fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('TACRev', fontsize='large', fontweight='bold')

plt.savefig('f1-tacrev')

In [None]:
f1s = pd.DataFrame({'PARNN':pf1_re, 'CGCN':cf1_re, 'GCN':gf1_re, 'LSTM':lf1_re, 'BiLSTM':bf1_re})

plt.figure(figsize=(6,6))
ax1 = sns.lineplot(data=f1s, dashes=False)
plt.ylabel('F1 Score', fontsize='large', fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('ReTACRED', fontsize='large', fontweight='bold')

plt.savefig('f1-retacred')

In [None]:
f1s = pd.DataFrame({'PARNN':pf1_pd, 'CGCN':cf1_pd, 'GCN':gf1_pd, 'LSTM':lf1_pd, 'BiLSTM':bf1_pd})

plt.figure(figsize=(6,6))
ax1 = sns.lineplot(data=f1s, dashes=False)
plt.ylabel('F1 Score', fontsize='large', fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('Graph Distance', fontsize='large', fontweight='bold')

plt.savefig('f1-pd')

In [None]:
f1s = pd.DataFrame({'PARNN':pf1_lca, 'CGCN':cf1_lca, 'GCN':gf1_lca, 'LSTM':lf1_lca, 'BiLSTM':bf1_lca})

plt.figure(figsize=(6,6))
ax1 = sns.lineplot(data=f1s, dashes=False)
plt.ylabel('F1 Score', fontsize='large', fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('LCA Distance', fontsize='large', fontweight='bold')

plt.savefig('f1-lca')

In [None]:
plt.figure(figsize=(20,20))

f1s = pd.DataFrame({'PARNN':pf1_conf, 'CGCN':cf1_conf, 'GCN':gf1_conf, 'LSTM':lf1_conf, 'BiLSTM':bf1_conf})
ax1 = plt.subplot(2,2,1)
ax1 = sns.lineplot(data=f1s, dashes=False)
plt.ylabel('F1 Score', fontsize=16, fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize=16, fontweight='bold')
plt.title('TACRev', fontsize=16, fontweight='bold')
plt.legend(fontsize=18)

f1s = pd.DataFrame({'PARNN':pf1_re, 'CGCN':cf1_re, 'GCN':gf1_re, 'LSTM':lf1_re, 'BiLSTM':bf1_re})
ax2 = plt.subplot(2,2,2)
ax2 = sns.lineplot(data=f1s, dashes=False)
plt.ylabel('F1 Score', fontsize=16, fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize=16, fontweight='bold')
plt.title('ReTACRED', fontsize=16, fontweight='bold')
plt.legend(fontsize=18)

f1s = pd.DataFrame({'PARNN':pf1_pd, 'CGCN':cf1_pd, 'GCN':gf1_pd, 'LSTM':lf1_pd, 'BiLSTM':bf1_pd})
ax3 = plt.subplot(2,2,3)
ax3 = sns.lineplot(data=f1s, dashes=False)
plt.ylabel('F1 Score', fontsize=16, fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize=16, fontweight='bold')
plt.title('Graph Distance', fontsize=16, fontweight='bold')
plt.legend(fontsize=18)

f1s = pd.DataFrame({'PARNN':pf1_lca, 'CGCN':cf1_lca, 'GCN':gf1_lca, 'LSTM':lf1_lca, 'BiLSTM':bf1_lca})
ax4 = plt.subplot(2,2,4)
ax4 = sns.lineplot(data=f1s, dashes=False)
plt.ylabel('F1 Score', fontsize=16, fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize=16, fontweight='bold')
plt.title('LCA Distance', fontsize=16, fontweight='bold')
plt.legend(fontsize=18)

plt.savefig('f1')

### confidence x lratio

In [None]:
parnn_test_df = parnn_test
parnn_test_df['lrc'] = (parnn_test_df.dl / parnn_test_df.dr) * parnn_test_df.confidence
parnn_test_df = parnn_test_df.loc[:,['sentence_id', 'lrc']]
parnn_test_df = parnn_test_df.rename(columns={'lrc':'lrc_parnn'})
print(len(parnn_test_df))

lstm_test_df = lstm_test
lstm_test_df['lrc'] = (lstm_test_df.dl / lstm_test_df.dr) * lstm_test_df.confidence
lstm_test_df = lstm_test_df.loc[:,['sentence_id', 'lrc']]
lstm_test_df = lstm_test_df.rename(columns={'lrc':'lrc_lstm'})
print(len(lstm_test_df))

bilstm_test_df = bilstm_test
bilstm_test_df['lrc'] = (bilstm_test_df.dl / bilstm_test_df.dr) * bilstm_test_df.confidence
bilstm_test_df = bilstm_test_df.loc[:,['sentence_id', 'lrc']]
bilstm_test_df = bilstm_test_df.rename(columns={'lrc':'lrc_bilstm'})
print(len(bilstm_test_df))

cgcn_test_df = cgcn_test
cgcn_test_df['lrc'] = (cgcn_test_df.dl / cgcn_test_df.dr) * cgcn_test_df.confidence
cgcn_test_df = cgcn_test_df.loc[:,['sentence_id', 'lrc']]
cgcn_test_df = cgcn_test_df.rename(columns={'lrc':'lrc_cgcn'})
print(len(cgcn_test_df))

gcn_test_df = gcn_test
gcn_test_df['lrc'] = (gcn_test_df.dl / gcn_test_df.dr) * gcn_test_df.confidence
gcn_test_df = gcn_test_df.loc[:,['sentence_id', 'lrc']]
gcn_test_df = gcn_test_df.rename(columns={'lrc':'lrc_gcn'})
print(len(gcn_test_df))

dflrc = parnn_test_df.merge(lstm_test_df).merge(bilstm_test_df).merge(cgcn_test_df).merge(gcn_test_df)
dflrc['lrc'] = (dflrc['lrc_parnn'] + dflrc['lrc_lstm'] + dflrc['lrc_bilstm'] 
                  + dflrc['lrc_cgcn'] + dflrc['lrc_gcn']) / 5

dflrc

In [None]:
sorted_dflrc = dflrc.sort_values(by=['lrc'], ascending=False)
sorted_dflrc

In [None]:
re_freq_lrc = []
for i in range(len(sorted_dflrc)):
    n_dp = len(retacred_test.merge(sorted_dflrc[:i]))
    re_freq_lrc.append(n_dp)
    
print(len(re_freq_lrc))

re_per_lrc = []
for i in range(1, len(sorted_dflrc)):
    n_dp = len(retacred_test.merge(sorted_dflrc[:i])) / i
    re_per_lrc.append(n_dp)
    
print(len(re_per_lrc))

re_intersection_lrc = []
for i in range(5,len(sorted_dflrc)):
    s = list(retacred_test.merge(sorted_dflrc[:i])['sentence_id'])
    c = list(retacred_test.merge(sorted_dfconf[:i])['sentence_id'])
    re_intersection_lrc.append(jaccard_similarity(s, c)*100)
    
print(len(re_intersection_lrc))

In [None]:
retacred_test.merge(sorted_dflrc)

### square(confidence) x lratio

In [None]:
parnn_test_df = parnn_test
parnn_test_df['lrcs'] = (parnn_test_df.dl / parnn_test_df.dr) * np.power(parnn_test_df.confidence, 2)
parnn_test_df = parnn_test_df.loc[:,['sentence_id', 'lrcs']]
parnn_test_df = parnn_test_df.rename(columns={'lrcs':'lrcs_parnn'})
print(len(parnn_test_df))

lstm_test_df = lstm_test
lstm_test_df['lrcs'] = (lstm_test_df.dl / lstm_test_df.dr) * np.power(lstm_test_df.confidence, 2)
lstm_test_df = lstm_test_df.loc[:,['sentence_id', 'lrcs']]
lstm_test_df = lstm_test_df.rename(columns={'lrcs':'lrcs_lstm'})
print(len(lstm_test_df))

bilstm_test_df = bilstm_test
bilstm_test_df['lrcs'] = (bilstm_test_df.dl / bilstm_test_df.dr) * np.power(bilstm_test_df.confidence, 2)
bilstm_test_df = bilstm_test_df.loc[:,['sentence_id', 'lrcs']]
bilstm_test_df = bilstm_test_df.rename(columns={'lrcs':'lrcs_bilstm'})
print(len(bilstm_test_df))

cgcn_test_df = cgcn_test
cgcn_test_df['lrcs'] = (cgcn_test_df.dl / cgcn_test_df.dr) * np.power(cgcn_test_df.confidence, 2)
cgcn_test_df = cgcn_test_df.loc[:,['sentence_id', 'lrcs']]
cgcn_test_df = cgcn_test_df.rename(columns={'lrcs':'lrcs_cgcn'})
print(len(cgcn_test_df))

gcn_test_df = gcn_test
gcn_test_df['lrcs'] = (gcn_test_df.dl / gcn_test_df.dr) * np.power(gcn_test_df.confidence, 2)
gcn_test_df = gcn_test_df.loc[:,['sentence_id', 'lrcs']]
gcn_test_df = gcn_test_df.rename(columns={'lrcs':'lrcs_gcn'})
print(len(gcn_test_df))

dflrcs = parnn_test_df.merge(lstm_test_df).merge(bilstm_test_df).merge(cgcn_test_df).merge(gcn_test_df)
dflrcs['lrcs'] = (dflrcs['lrcs_parnn'] + dflrcs['lrcs_lstm'] + dflrcs['lrcs_bilstm'] 
                  + dflrcs['lrcs_cgcn'] + dflrcs['lrcs_gcn']) / 5

dflrcs

In [None]:
sorted_dflrcs = dflrcs.sort_values(by=['lrcs'], ascending=False)
sorted_dflrcs

In [None]:
re_freq_lrcs = []
for i in range(len(sorted_dflrcs)):
    n_dp = len(retacred_test.merge(sorted_dflrcs[:i]))
    re_freq_lrcs.append(n_dp)
    
print(len(re_freq_lrcs))

re_per_lrcs = []
for i in range(1, len(sorted_dflrcs)):
    n_dp = len(retacred_test.merge(sorted_dflrcs[:i])) / i
    re_per_lrcs.append(n_dp)
    
print(len(re_per_lrcs))

re_intersection_lrcs = []
for i in range(5,len(sorted_dflrcs)):
    s = list(retacred_test.merge(sorted_dflrcs[:i])['sentence_id'])
    c = list(retacred_test.merge(sorted_dfconf[:i])['sentence_id'])
    re_intersection_lrcs.append(jaccard_similarity(s, c)*100)
    
print(len(re_intersection_lrcs))

In [None]:
retacred_test.merge(sorted_dflrcs)

## Sqrt(confidence) x lratio

In [None]:
parnn_test_df = parnn_test
parnn_test_df['lrcsq'] = (parnn_test_df.dl / parnn_test_df.dr) * np.sqrt(parnn_test_df.confidence)
parnn_test_df = parnn_test_df.loc[:,['sentence_id', 'lrcsq']]
parnn_test_df = parnn_test_df.rename(columns={'lrcsq':'lrcsq_parnn'})
print(len(parnn_test_df))

lstm_test_df = lstm_test
lstm_test_df['lrcsq'] = (lstm_test_df.dl / lstm_test_df.dr) * np.sqrt(lstm_test_df.confidence)
lstm_test_df = lstm_test_df.loc[:,['sentence_id', 'lrcsq']]
lstm_test_df = lstm_test_df.rename(columns={'lrcsq':'lrcsq_lstm'})
print(len(lstm_test_df))

bilstm_test_df = bilstm_test
bilstm_test_df['lrcsq'] = (bilstm_test_df.dl / bilstm_test_df.dr) * np.sqrt(bilstm_test_df.confidence)
bilstm_test_df = bilstm_test_df.loc[:,['sentence_id', 'lrcsq']]
bilstm_test_df = bilstm_test_df.rename(columns={'lrcsq':'lrcsq_bilstm'})
print(len(bilstm_test_df))

cgcn_test_df = cgcn_test
cgcn_test_df['lrcsq'] = (cgcn_test_df.dl / cgcn_test_df.dr) * np.sqrt(cgcn_test_df.confidence)
cgcn_test_df = cgcn_test_df.loc[:,['sentence_id', 'lrcsq']]
cgcn_test_df = cgcn_test_df.rename(columns={'lrcsq':'lrcsq_cgcn'})
print(len(cgcn_test_df))

gcn_test_df = gcn_test
gcn_test_df['lrcsq'] = (gcn_test_df.dl / gcn_test_df.dr) * np.sqrt(gcn_test_df.confidence)
gcn_test_df = gcn_test_df.loc[:,['sentence_id', 'lrcsq']]
gcn_test_df = gcn_test_df.rename(columns={'lrcsq':'lrcsq_gcn'})
print(len(gcn_test_df))

dflrcsq = parnn_test_df.merge(lstm_test_df).merge(bilstm_test_df).merge(cgcn_test_df).merge(gcn_test_df)
dflrcsq['lrcsq'] = (dflrcsq['lrcsq_parnn'] + dflrcsq['lrcsq_lstm'] + dflrcsq['lrcsq_bilstm'] 
                  + dflrcsq['lrcsq_cgcn'] + dflrcsq['lrcsq_gcn']) / 5

dflrcsq

In [None]:
sorted_dflrcsq = dflrcsq.sort_values(by=['lrcsq'], ascending=False)
sorted_dflrcsq

In [None]:
re_freq_lrcsq = []
for i in range(len(sorted_dflrcsq)):
    n_dp = len(retacred_test.merge(sorted_dflrcsq[:i]))
    re_freq_lrcsq.append(n_dp)
    
print(len(re_freq_lrcsq))

re_per_lrcsq = []
for i in range(1, len(sorted_dflrcsq)):
    n_dp = len(retacred_test.merge(sorted_dflrcsq[:i])) / i
    re_per_lrcsq.append(n_dp)
    
print(len(re_per_lrcsq))

re_intersection_lrcsq = []
for i in range(5,len(sorted_dflrcsq)):
    s = list(retacred_test.merge(sorted_dflrcsq[:i])['sentence_id'])
    c = list(retacred_test.merge(sorted_dfconf[:i])['sentence_id'])
    re_intersection_lrcsq.append(jaccard_similarity(s, c)*100)
    
print(len(re_intersection_lrcsq))

In [None]:
retacred_test.merge(sorted_dflrcsq)

In [None]:
re_freq_ens = []
n_dp = 0
step_size = 200
prev_s1 = prev_s2 = list()
r = set(list(retacred_test['sentence_id']))
for i in range(len(sorted_dfconf)):
    s1 = s2 = list()
    if i % step_size == 0:
        j = int(i/2)
        s1 = prev_s1[:]
        s1 += list(sorted_dfrl[len(prev_s1):j]['sentence_id'])
        print(len(prev_s1), len(s1))
        k=0
        while len(set(s1+s2)) < 2*j and (j+k < len(sorted_dfconf)):
            s2 = prev_s2[:]
            s2 += list(sorted_dfconf[len(prev_s2):j+k]['sentence_id'])
            print(i, j, len(prev_s2), k, "s1 + s2  :: ", len(set(s1+s2)))
            k += 1
        s = set(s1 + s2)
        n_dp= len(r.intersection(s))
        print(i, n_dp)
        prev_s1 = s1
        prev_s2 = s2
        print(len(prev_s1), len(prev_s2))
        print("")
    re_freq_ens.append(n_dp)
    
    
print(len(re_freq_ens))

In [None]:
parnn_test

In [None]:
parnn_test_df = parnn_test.sort_values(by=['confidence'], ascending=False).reset_index(drop=True)
parnn_test_df['conf_percentile'] = (len(parnn_test_df) - np.array(parnn_test_df.index) + 1) / len(parnn_test_df)
parnn_test_df = parnn_test_df.sort_values(by=['lratio'], ascending=False).reset_index(drop=True)
parnn_test_df['lratio_percentile'] = (len(parnn_test_df) - np.array(parnn_test_df.index) + 1) / len(parnn_test_df)
parnn_test_df['percentile'] = parnn_test_df.lratio_percentile * np.power(parnn_test_df.conf_percentile, 2)
parnn_test_df = parnn_test_df.loc[:,['sentence_id', 'percentile']]
parnn_test_df = parnn_test_df.rename(columns={'percentile':'percentile_parnn'})

lstm_test_df = lstm_test.sort_values(by=['confidence'], ascending=False).reset_index(drop=True)
lstm_test_df['conf_percentile'] = (len(lstm_test_df) - np.array(lstm_test_df.index) + 1) / len(lstm_test_df)
lstm_test_df = lstm_test_df.sort_values(by=['lratio'], ascending=False).reset_index(drop=True)
lstm_test_df['lratio_percentile'] = (len(lstm_test_df) - np.array(lstm_test_df.index) + 1) / len(lstm_test_df)
lstm_test_df['percentile'] = lstm_test_df.lratio_percentile * np.power(lstm_test_df.conf_percentile, 2)
lstm_test_df = lstm_test_df.loc[:,['sentence_id', 'percentile']]
lstm_test_df = lstm_test_df.rename(columns={'percentile':'percentile_lstm'})

bilstm_test_df = bilstm_test.sort_values(by=['confidence'], ascending=False).reset_index(drop=True)
bilstm_test_df['conf_percentile'] = (len(bilstm_test_df) - np.array(bilstm_test_df.index) + 1) / len(bilstm_test_df)
bilstm_test_df = bilstm_test_df.sort_values(by=['lratio'], ascending=False).reset_index(drop=True)
bilstm_test_df['lratio_percentile'] = (len(bilstm_test_df) - np.array(bilstm_test_df.index) + 1) / len(bilstm_test_df)
bilstm_test_df['percentile'] = bilstm_test_df.lratio_percentile * np.power(bilstm_test_df.conf_percentile, 2)
bilstm_test_df = bilstm_test_df.loc[:,['sentence_id', 'percentile']]
bilstm_test_df = bilstm_test_df.rename(columns={'percentile':'percentile_bilstm'})

cgcn_test_df = cgcn_test.sort_values(by=['confidence'], ascending=False).reset_index(drop=True)
cgcn_test_df['conf_percentile'] = (len(cgcn_test_df) - np.array(cgcn_test_df.index) + 1) / len(cgcn_test_df)
cgcn_test_df = cgcn_test_df.sort_values(by=['lratio'], ascending=False).reset_index(drop=True)
cgcn_test_df['lratio_percentile'] = (len(cgcn_test_df) - np.array(cgcn_test_df.index) + 1) / len(cgcn_test_df)
cgcn_test_df['percentile'] = cgcn_test_df.lratio_percentile * np.power(cgcn_test_df.conf_percentile, 2)
cgcn_test_df = cgcn_test_df.loc[:,['sentence_id', 'percentile']]
cgcn_test_df = cgcn_test_df.rename(columns={'percentile':'percentile_cgcn'})

gcn_test_df = gcn_test.sort_values(by=['confidence'], ascending=False).reset_index(drop=True)
gcn_test_df['conf_percentile'] = (len(gcn_test_df) - np.array(gcn_test_df.index) + 1) / len(gcn_test_df)
gcn_test_df = gcn_test_df.sort_values(by=['lratio'], ascending=False).reset_index(drop=True)
gcn_test_df['lratio_percentile'] = (len(gcn_test_df) - np.array(gcn_test_df.index) + 1) / len(gcn_test_df)
gcn_test_df['percentile'] = gcn_test_df.lratio_percentile * np.power(gcn_test_df.conf_percentile, 2)
gcn_test_df = gcn_test_df.loc[:,['sentence_id', 'percentile']]
gcn_test_df = gcn_test_df.rename(columns={'percentile':'percentile_gcn'})

dfpercentile = parnn_test_df.merge(lstm_test_df).merge(bilstm_test_df).merge(cgcn_test_df).merge(gcn_test_df)
dfpercentile['percentile'] = (dfpercentile['percentile_parnn'] + dfpercentile['percentile_lstm'] + dfpercentile['percentile_bilstm'] 
                  + dfpercentile['percentile_cgcn'] + dfpercentile['percentile_gcn']) / 5

dfpercentile

In [None]:
sorted_dfpercentile = dfpercentile.sort_values(by=['percentile'], ascending=False)
sorted_dfpercentile

In [None]:
re_freq_percentile = []
for i in range(len(sorted_dfpercentile)):
    n_dp = len(retacred_test.merge(sorted_dfpercentile[:i]))
    re_freq_percentile.append(n_dp)
    
print(len(re_freq_percentile))

re_per_percentile = []
for i in range(1, len(sorted_dfpercentile)):
    n_dp = len(retacred_test.merge(sorted_dfpercentile[:i])) / i
    re_per_percentile.append(n_dp)
    
print(len(re_per_percentile))

re_intersection_percentile = []
for i in range(5,len(sorted_dfpercentile)):
    s = list(retacred_test.merge(sorted_dfpercentile[:i])['sentence_id'])
    c = list(retacred_test.merge(sorted_dfconf[:i])['sentence_id'])
    re_intersection_percentile.append(jaccard_similarity(s, c)*100)
    
print(len(re_intersection_percentile))

### Visualization

In [None]:
freq = pd.DataFrame({'Confidence':re_freq_c, 'Ratio':re_freq_rl, 'Ratio*Conf':re_freq_lrc, 'Ratio*Square(Conf)':re_freq_lrcs, 
                     'Ratio*Sqrt(Conf)':re_freq_lrcsq, 'PERCENTILE':re_freq_percentile})
plt.figure(figsize=(20,16))
ax1 = plt.subplot(2,2,1)
ax1 = sns.lineplot(data=freq, dashes=False)
plt.ylabel('Number of sentences common with LC', fontsize='large', fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('Test Dataset', fontsize='large', fontweight='bold')
# plt.yticks(np.arange(0, 700, 70))


freq = pd.DataFrame({'Confidence':re_freq_c, 'Ratio':re_freq_rl, 'Ratio*Conf':re_freq_lrc, 'Ratio*Square(Conf)':re_freq_lrcs, 
                     'Ratio*Sqrt(Conf)':re_freq_lrcsq, 'PERCENTILE':re_freq_percentile})
freq = freq / 3936 * 100
ax2 = plt.subplot(2,2,2)
ax2 = sns.lineplot(data=freq, dashes=False)
plt.ylabel('Percentage of sentences common with LC', fontsize='large', fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('Test Dataset', fontsize='large', fontweight='bold')
ax2.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=100))

freq = pd.DataFrame({'Confidence':re_per_c, 'Ratio':re_per_rl, 'Ratio*Conf':re_per_lrc, 'Ratio*Square(Conf)':re_per_lrcs, 
                     'Ratio*Sqrt(Conf)':re_per_lrcsq, 'PERCENTILE':re_per_percentile})
freq = freq*100
ax3 = plt.subplot(2,2,3)
ax3 = sns.lineplot(data=freq, dashes=False)
plt.ylabel('Ratio of sentences common with LC and Reannotation Budget', fontsize='large', fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('Test Dataset', fontsize='large', fontweight='bold')
ax3.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=100))

intersection = pd.DataFrame({'Confidence Vs Confidence':re_intersection_c, 'Ratio Vs Confidence':re_intersection_rl, 'Ratio*Conf Vs Confidence':re_intersection_lrc,
                             'Ratio*Square(Conf) Vs Confidence':re_intersection_lrcs, 'Ratio*Sqrt(Conf) Vs Confidence':re_intersection_lrcsq, 
                             'PERCENTILE':re_intersection_percentile})
ax4 = plt.subplot(2,2,4)
ax4 = sns.lineplot(data=intersection, dashes=False)
plt.ylabel('Jaccard Similarity', fontsize='large', fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('Test Dataset', fontsize='large', fontweight='bold')
ax4.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=100))

# plt.savefig('ratio-confidence-test-analysis')

In [None]:
sorted_dflrcsq.head(20)

In [None]:
sorted_dflrcs.head(20)

In [None]:
sorted_dflrc.head(20)

In [None]:
sorted_dfrl.head(20)

## Rough

In [None]:
# rank = []
# for sid in tt_dict:
#     cnt = 0
# #     print(sid, tt_dict[sid])
#     if sid in list(parnn_test.sentence_id):
#         if tt_dict[sid] == int(parnn_test[parnn_test.sentence_id == sid]['prediction']):
#             cnt+=1
#             print(sid, tt_dict[sid], int(parnn_test[parnn_test.sentence_id == sid]['prediction']), cnt)
    
#     if sid in list(lstm_test.sentence_id):
#         if tt_dict[sid] == int(lstm_test[lstm_test.sentence_id == sid]['prediction']):
#             cnt+=1
#             print(sid, tt_dict[sid], int(lstm_test[lstm_test.sentence_id == sid]['prediction']), cnt)
            
#     if sid in list(bilstm_test.sentence_id):
#         if tt_dict[sid] == int(bilstm_test[bilstm_test.sentence_id == sid]['prediction']):
#             cnt+=1
#             print(sid, tt_dict[sid], int(bilstm_test[bilstm_test.sentence_id == sid]['prediction']), cnt)
            
#     if sid in list(cgcn_test.sentence_id):
#         if tt_dict[sid] == int(cgcn_test[cgcn_test.sentence_id == sid]['prediction']):
#             cnt+=1
#             print(sid, tt_dict[sid], int(cgcn_test[cgcn_test.sentence_id == sid]['prediction']), cnt)
    
#     if sid in list(gcn_test.sentence_id):
#         if tt_dict[sid] == int(gcn_test[gcn_test.sentence_id == sid]['prediction']):
#             cnt+=1
#             print(sid, tt_dict[sid], int(gcn_test[gcn_test.sentence_id == sid]['prediction']), cnt)
        

In [None]:
re_train = json.load(open('./../dataset/tacred/json/Re-TACRED/train_id2label.json'))
print("Total Number of instances in ReTACRED-Reduced train set  :: {}".format(len(re_train)))

