In [None]:
import pandas as pd
import json
import textstat as txt
from itertools import groupby

import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

# from sklearn.metrics import jaccard_score

# Loading TACRED dataset

In [None]:
train_data = json.load(open('./../dataset/tacred/train.json'))
print("Number of Training instances :: {}".format(len(train_data)))

dev_data = json.load(open('./../dataset/tacred/dev.json'))
print("Number of Dev instances :: {}".format(len(dev_data)))

test_data = json.load(open('./../dataset/tacred/test.json'))
print("Number of Test instances :: {}".format(len(test_data)))

# Sentence Attributes

### Sentence Length

In [None]:
def generate_sentence_length_mapping(dataset):
    s_ids = []
    s_lens = []
    for eg in dataset:
        s_id = eg['id']
        tokens = eg['token']
        s_ids.append(s_id)
        s_lens.append(len(tokens))
    
    assert len(s_ids) == len(s_lens) 
    print(len(s_ids), len(s_lens) )
    df = pd.DataFrame({'sentence_id':s_ids, 'sentence_len':s_lens})
    return df

In [None]:
df_test_len = generate_sentence_length_mapping(test_data)
df_dev_len = generate_sentence_length_mapping(dev_data)

In [None]:
def convert_token(token):
    """ Convert PTB tokens to normal tokens """
    if (token.lower() == '-lrb-'):
            return '('
    elif (token.lower() == '-rrb-'):
        return ')'
    elif (token.lower() == '-lsb-'):
        return '['
    elif (token.lower() == '-rsb-'):
        return ']'
    elif (token.lower() == '-lcb-'):
        return '{'
    elif (token.lower() == '-rcb-'):
        return '}'
    return token

### Flesch-Kincaid Grade readability score

In [None]:
def generate_flesch_kincaid_grade(dataset):
    s_ids = []
    score = []
    for i, eg in enumerate(dataset, start=1):
        s_id = eg['id']
        tokens  = eg['token']
        sentence = ' '.join([convert_token(t) for t in tokens])
        s_ids.append(s_id)
        score.append(txt.flesch_kincaid_grade(sentence))
    assert len(s_ids) == len(score)
    print(len(s_ids), len(score))
    df = pd.DataFrame({'sentence_id':s_ids, 'f_k_score':score})
    return df

In [None]:
df_test_fk = generate_flesch_kincaid_grade(test_data)
df_dev_fk = generate_flesch_kincaid_grade(dev_data)

### Entity specific features

In [None]:
def generate_entity_specific_attribute(dataset):
    s_ids = []
    distance = []
    e_count = []
    for eg in dataset:
        s_id = eg['id']
        s_ids.append(s_id)
        s_start = eg['subj_start']
        s_end = eg['subj_end']
        o_start = eg['obj_start']
        o_end = eg['obj_end']
        s_type = eg['subj_type']
        o_type = eg['obj_type']
        subj = eg['token'][s_start:s_end+1]
        obj = eg['token'][o_start:o_end+1]
        # number of tokens between the subject and object entities
        if s_end < o_start:
            e_dist = o_start - s_end - 1
        elif o_end < s_start:
            e_dist = s_start - o_end - 1
        distance.append(e_dist)

        # Number of entities in the sentence based on stanford NER
        ner = eg['stanford_ner']
        s = e = 0
        n_e = 0
        p_et = None
        for i, et in enumerate(ner):
            if et == 'O':
                if e != 0:
                    n_e += 1
                    s = e = 0
                    p_et = None
                continue
            else:
                if p_et and p_et != et:
                    n_e += 1
                    s = e = 0
                    p_et = None
                s += 1
                e += 1
            p_et = et
        e_count.append(n_e)
    assert len(s_ids) == len(distance)
    assert len(s_ids) == len(e_count)
    print(len(s_ids), len(distance), len(e_count))
    df = pd.DataFrame({'sentence_id':s_ids, 'entities_distance':distance, 'entities_count':e_count})
    return df
            

In [None]:
df_test_ef = generate_entity_specific_attribute(test_data)
df_dev_ef = generate_entity_specific_attribute(dev_data)

## Loading TACREV dataset

In [None]:
tacrev_test = json.load(open('./tacrev_patch/test_patch.json'))

sentence_id = []
for item in tacrev_test:
    sentence_id.append(item['id'])
    
df_test = pd.DataFrame({'sentence_id':sentence_id})
# df_test

In [None]:
tacrev_dev = json.load(open('./tacrev_patch/dev_patch.json'))

sentence_id = []
for item in tacrev_dev:
    sentence_id.append(item['id'])
    
df_dev = pd.DataFrame({'sentence_id':sentence_id})
# df_dev

# Jaccard Similarity Score

In [None]:
def jaccard_similarity(list1, list2):
    s1 = set(list1)
    s2 = set(list2)
    return float(len(s1.intersection(s2))) / float(len(s1.union(s2)))

## Confidence for test data

In [None]:
df1 = pd.read_csv('new_parnn_test.tsv', sep='\t')
df1 = df1.loc[:,['sentence_id', 'confidence']]
df1 = df1.rename(columns={'confidence':'confidence_parnn'})
print(len(df1))

df2 = pd.read_csv('new_lstm_test.tsv', sep='\t')
df2 = df2.loc[:,['sentence_id', 'confidence']]
df2 = df2.rename(columns={'confidence': 'confidence_lstm'})
print(len(df2))

df3 = pd.read_csv('new_cgcn_test.tsv', sep='\t')
df3 = df3.loc[:,['sentence_id', 'confidence']]
df3 = df3.rename(columns={'confidence': 'confidence_gcn'})
print(len(df3))

dfconf = df1.merge(df2).merge(df3)
dfconf['confidence'] = (dfconf['confidence_parnn'] + dfconf['confidence_lstm'] + dfconf['confidence_gcn']) / 3
dfconf = dfconf.merge(df_test_len)
dfconf = dfconf.merge(df_test_fk)
dfconf = dfconf.merge(df_test_ef)
print('\n')
print(len(dfconf))
dfconf

In [None]:
sorted_dfconf = dfconf.sort_values(by=['confidence'], ascending=False)
sorted_dfconf

In [None]:
freq_c = []
for i in range(len(sorted_dfconf)):
    n_dp = len(df_test.merge(sorted_dfconf[:i]))
    freq_c.append(n_dp)
print(len(freq_c))

In [None]:
intersection_c = []
for i in range(5,len(sorted_dfconf)):
    s = list(df_test.merge(sorted_dfconf[:i])['sentence_id'])
    c = list(df_test.merge(sorted_dfconf[:i])['sentence_id'])
#     print(jaccard_similarity(s, c)*100)
    intersection_c.append(jaccard_similarity(s, c)*100)
print(len(intersection_c))

In [None]:
df_test.merge(sorted_dfconf)

## Confidence for dev data

In [None]:
df1 = pd.read_csv('new_parnn_dev.tsv', sep='\t')
df1 = df1.loc[:,['sentence_id', 'confidence']]
df1 = df1.rename(columns={'confidence':'confidence_parnn'})
print(len(df1))

df2 = pd.read_csv('new_lstm_dev.tsv', sep='\t')
df2 = df2.loc[:,['sentence_id', 'confidence']]
df2 = df2.rename(columns={'confidence': 'confidence_lstm'})
print(len(df2))

df3 = pd.read_csv('new_cgcn_dev.tsv', sep='\t')
df3 = df3.loc[:,['sentence_id', 'confidence']]
df3 = df3.rename(columns={'confidence': 'confidence_gcn'})
print(len(df3))

dfconf_dev = df1.merge(df2).merge(df3)
dfconf_dev['confidence'] = (dfconf_dev['confidence_parnn'] + dfconf_dev['confidence_lstm'] + dfconf_dev['confidence_gcn']) / 3
dfconf_dev = dfconf_dev.merge(df_dev_len)
dfconf_dev = dfconf_dev.merge(df_dev_fk)
dfconf_dev = dfconf_dev.merge(df_dev_ef)
print('\n')
print(len(dfconf_dev))
dfconf_dev

In [None]:
sorted_dfconf_dev = dfconf_dev.sort_values(by=['confidence'], ascending=False)

In [None]:
dev_freq_c = []
for i in range(len(sorted_dfconf_dev)):
    n_dp = len(df_dev.merge(sorted_dfconf_dev[:i]))
    dev_freq_c.append(n_dp)
print(len(dev_freq_c))

In [None]:
dev_intersection_c = []
for i in range(5, len(sorted_dfconf_dev)):
    s = list(df_dev.merge(sorted_dfconf_dev[:i])['sentence_id'])
    c = list(df_dev.merge(sorted_dfconf_dev[:i])['sentence_id'])
#     print(s,c)
#     print(jaccard_similarity(s, c)*100)
    dev_intersection_c.append(jaccard_similarity(s, c)*100)
print(len(dev_intersection_c))

In [None]:
df_dev.merge(sorted_dfconf_dev)

## Prediction distance from ground truth on test data

In [None]:
df1 = pd.read_csv('new_parnn_test.tsv', sep='\t')
df1 = df1.loc[:,['sentence_id', 'd_prediction']]
df1 = df1.rename(columns={'d_prediction':'d_prediction_parnn'})
print(len(df1))

df2 = pd.read_csv('new_lstm_test.tsv', sep='\t')
df2 = df2.loc[:,['sentence_id', 'd_prediction']]
df2 = df2.rename(columns={'d_prediction': 'd_prediction_lstm'})
print(len(df2))

df3 = pd.read_csv('new_cgcn_test.tsv', sep='\t')
df3 = df3.loc[:,['sentence_id', 'd_prediction']]
df3 = df3.rename(columns={'d_prediction': 'd_prediction_gcn'})
print(len(df3))

df_pd = df1.merge(df2).merge(df3)
df_pd['d_prediction'] = (df_pd['d_prediction_parnn'] + df_pd['d_prediction_lstm'] + df_pd['d_prediction_gcn']) / 3
df_pd = df_pd.merge(df_test_len)
df_pd = df_pd.merge(df_test_fk)
df_pd = df_pd.merge(df_test_ef)
print('\n')
print(len(df_pd))
df_pd

In [None]:
sorted_df_pd = df_pd.sort_values(by=['d_prediction'], ascending=False)

In [None]:
freq_p = []
for i in range(len(sorted_df_pd)):
    n_dp = len(df_test.merge(sorted_df_pd[:i]))
    freq_p.append(n_dp)
print(len(freq_p))

In [None]:
intersection_p = []
for i in range(5,len(sorted_df_pd)):
    s = list(df_test.merge(sorted_df_pd[:i])['sentence_id'])
    c = list(df_test.merge(sorted_dfconf[:i])['sentence_id'])
#     print(s,c)
#     print(jaccard_similarity(s, c)*100)
    intersection_p.append(jaccard_similarity(s, c)*100)
print(len(intersection_p))

In [None]:
df_test.merge(sorted_df_pd)

# Prediction distance from ground truth on dev data

In [None]:
df1 = pd.read_csv('new_parnn_dev.tsv', sep='\t')
df1 = df1.loc[:,['sentence_id', 'd_prediction']]
df1 = df1.rename(columns={'d_prediction':'d_prediction_parnn'})
print(len(df1))

df2 = pd.read_csv('new_lstm_dev.tsv', sep='\t')
df2 = df2.loc[:,['sentence_id', 'd_prediction']]
df2 = df2.rename(columns={'d_prediction': 'd_prediction_lstm'})
print(len(df2))

df3 = pd.read_csv('new_cgcn_dev.tsv', sep='\t')
df3 = df3.loc[:,['sentence_id', 'd_prediction']]
df3 = df3.rename(columns={'d_prediction': 'd_prediction_gcn'})
print(len(df3))

df_pd_dev = df1.merge(df2).merge(df3)
df_pd_dev['d_prediction'] = (df_pd_dev['d_prediction_parnn'] + df_pd_dev['d_prediction_lstm'] + df_pd_dev['d_prediction_gcn']) / 3
df_pd_dev = df_pd_dev.merge(df_dev_len)
df_pd_dev = df_pd_dev.merge(df_dev_fk)
df_pd_dev = df_pd_dev.merge(df_dev_ef)
print('\n')
print(len(df_pd_dev))
df_pd_dev

In [None]:
sorted_df_pd_dev = df_pd_dev.sort_values(by=['d_prediction'], ascending=False)

In [None]:
dev_freq_p = []
for i in range(len(sorted_df_pd_dev)):
    n_dp = len(df_dev.merge(sorted_df_pd_dev[:i]))
    dev_freq_p.append(n_dp)
print(len(dev_freq_p))

In [None]:
dev_intersection_p = []
for i in range(5, len(sorted_df_pd_dev)):
    s = list(df_dev.merge(sorted_df_pd_dev[:i])['sentence_id'])
    c = list(df_dev.merge(sorted_dfconf_dev[:i])['sentence_id'])
#     print(s,c)
#     print(jaccard_similarity(s, c)*100)
    dev_intersection_p.append(jaccard_similarity(s, c)*100)
print(len(dev_intersection_p))

In [None]:
df_dev.merge(sorted_df_pd_dev)

## Prediction distance from lca test data

In [None]:
df1 = pd.read_csv('new_parnn_test.tsv', sep='\t')
df1 = df1.loc[:,['sentence_id', 'd_lca']]
df1 = df1.rename(columns={'d_lca':'d_lca_parnn'})
print(len(df1))

df2 = pd.read_csv('new_lstm_test.tsv', sep='\t')
df2 = df2.loc[:,['sentence_id', 'd_lca']]
df2 = df2.rename(columns={'d_lca': 'd_lca_lstm'})
print(len(df2))

df3 = pd.read_csv('new_cgcn_test.tsv', sep='\t')
df3 = df3.loc[:,['sentence_id', 'd_lca']]
df3 = df3.rename(columns={'d_lca': 'd_lca_gcn'})
print(len(df3))

dflca = df1.merge(df2).merge(df3)
dflca['d_lca'] = (dflca['d_lca_parnn'] + dflca['d_lca_lstm'] + dflca['d_lca_gcn']) / 3
dflca = dflca.merge(df_test_len)
dflca = dflca.merge(df_test_fk)
dflca = dflca.merge(df_test_ef)
print('\n')
print(len(dflca))
dflca

In [None]:
sorted_dflca = dflca.sort_values(by=['d_lca'], ascending=False)

In [None]:
freq_l = []
for i in range(len(sorted_dflca)):
    n_dp = len(df_test.merge(sorted_dflca[:i]))
    freq_l.append(n_dp)
print(len(freq_l))

In [None]:
intersection_l = []
for i in range(5, len(sorted_dflca)):
    s = list(df_test.merge(sorted_dflca[:i])['sentence_id'])
    c = list(df_test.merge(sorted_dfconf[:i])['sentence_id'])
#     print(s,c)
#     print(jaccard_similarity(s, c)*100)
    intersection_l.append(jaccard_similarity(s, c)*100)
print(len(intersection_l))

In [None]:
df_test.merge(sorted_dflca)

# Prediction distance from lca dev data

In [None]:
df1 = pd.read_csv('new_parnn_dev.tsv', sep='\t')
df1 = df1.loc[:,['sentence_id', 'd_lca']]
df1 = df1.rename(columns={'d_lca':'d_lca_parnn'})
print(len(df1))

df2 = pd.read_csv('new_lstm_dev.tsv', sep='\t')
df2 = df2.loc[:,['sentence_id', 'd_lca']]
df2 = df2.rename(columns={'d_lca': 'd_lca_lstm'})
print(len(df2))

df3 = pd.read_csv('new_cgcn_dev.tsv', sep='\t')
df3 = df3.loc[:,['sentence_id', 'd_lca']]
df3 = df3.rename(columns={'d_lca': 'd_lca_gcn'})
print(len(df3))

dflca_dev = df1.merge(df2).merge(df3)
dflca_dev['d_lca'] = (dflca_dev['d_lca_parnn'] + dflca_dev['d_lca_lstm'] + dflca_dev['d_lca_gcn']) / 3
dflca_dev = dflca_dev.merge(df_dev_len)
dflca_dev = dflca_dev.merge(df_dev_fk)
dflca_dev = dflca_dev.merge(df_dev_ef)
print('\n')
print(len(dflca_dev))
dflca_dev

In [None]:
sorted_dflca_dev = dflca_dev.sort_values(by=['d_lca'], ascending=False)

In [None]:
dev_freq_l = []
for i in range(len(sorted_dflca_dev)):
    n_dp = len(df_dev.merge(sorted_dflca_dev[:i]))
    dev_freq_l.append(n_dp)
print(len(dev_freq_l))

In [None]:
dev_intersection_l = []
for i in range(5,len(sorted_dflca_dev)):
    s = list(df_dev.merge(sorted_dflca_dev[:i])['sentence_id'])
    c = list(df_dev.merge(sorted_dfconf_dev[:i])['sentence_id'])
#     print(jaccard_similarity(s, c)*100)
    dev_intersection_l.append(jaccard_similarity(s, c)*100)
print(len(dev_intersection_l))

In [None]:
df_dev.merge(sorted_dflca_dev)

## TESTING

In [None]:
df_test.merge(sorted_dflca)['sentence_id'].nunique()

In [None]:
df_dev.merge(sorted_dflca_dev)['sentence_id'].nunique()

# Ratio of Distance on test

In [None]:
# df1 = pd.read_csv('new_parnn_test.tsv', sep='\t')
# df1['d_l/d_p'] = df1['d_lca'] / df1['d_prediction']
# df1 = df1.loc[:, ['sentence_id', 'd_l/d_p']]
# df1 = df1.rename(columns={'d_l/d_p':'d_l/d_p_parnn'})

# df2 = pd.read_csv('new_lstm_test.tsv', sep='\t')
# df2['d_l/d_p'] = df2['d_lca'] / df2['d_prediction']
# df2 = df2.loc[:, ['sentence_id', 'd_l/d_p']]
# df2 = df2.rename(columns={'d_l/d_p':'d_l/d_p_lstm'})

# df3 = pd.read_csv('new_cgcn_test.tsv', sep='\t')
# df3['d_l/d_p'] = df3['d_lca'] / df3['d_prediction']
# df3 = df3.loc[:, ['sentence_id', 'd_l/d_p']]
# df3 = df3.rename(columns={'d_l/d_p':'d_l/d_p_gcn'})

# df = df1.merge(df2).merge(df3)
# df['d_l/d_p'] = (df['d_l/d_p_parnn'] + df['d_l/d_p_lstm'] + df['d_l/d_p_gcn']) / 3
# df = df.merge(df_test_len)
# df = df.merge(df_test_fk)
# df = df.merge(df_test_ef)
# df

In [None]:
# sorted_dfratio = df.sort_values(by=['d_l/d_p'], ascending=False)

In [None]:
# freq_r = []
# for i in range(len(sorted_dfratio)):
#     n_dp = len(df_test.merge(sorted_dfratio[:i]))
#     freq_r.append(n_dp)
# print(len(freq_r))

In [None]:
# intersection_r = []
# for i in range(5, len(sorted_dfratio)):
#     s = list(df_test.merge(sorted_dfratio[:i])['sentence_id'])
#     c = list(df_test.merge(sorted_dfconf[:i])['sentence_id'])
# #     print(s,c)
# #     print(jaccard_similarity(s, c)*100)
#     intersection_r.append(jaccard_similarity(s, c)*100)
# print(len(intersection_r))

# Ratio of distance on dev

In [None]:
# df1 = pd.read_csv('parnn_dev.tsv', sep='\t')
# df1['d_l/d_p'] = df1['d_lca'] / df1['d_prediction']
# df1 = df1.loc[:, ['sentence_id', 'd_l/d_p']]
# df1 = df1.rename(columns={'d_l/d_p':'d_l/d_p_parnn'})

# df2 = pd.read_csv('lstm_dev.tsv', sep='\t')
# df2['d_l/d_p'] = df2['d_lca'] / df2['d_prediction']
# df2 = df2.loc[:, ['sentence_id', 'd_l/d_p']]
# df2 = df2.rename(columns={'d_l/d_p':'d_l/d_p_lstm'})

# df3 = pd.read_csv('gcn_dev.tsv', sep='\t')
# df3['d_l/d_p'] = df3['d_lca'] / df3['d_prediction']
# df3 = df3.loc[:, ['sentence_id', 'd_l/d_p']]
# df3 = df3.rename(columns={'d_l/d_p':'d_l/d_p_gcn'})

# df = df1.merge(df2).merge(df3)
# df['d_l/d_p'] = (df['d_l/d_p_parnn'] + df['d_l/d_p_lstm'] + df['d_l/d_p_gcn']) / 3
# df = df.merge(df_dev_len)
# df = df.merge(df_dev_fk)
# df = df.merge(df_dev_ef)
# df

In [None]:
# df.to_csv('d_ratio_dev.tsv', sep='\t')

In [None]:
# sorted_df = df.sort_values(by=['d_l/d_p'], ascending=False)

In [None]:
# df_dev.merge(sorted_df[:3000])

### Visualising Strategies

In [None]:
import matplotlib.ticker as ticker

In [None]:
freq = pd.DataFrame({'Confidence':freq_c, 'GD':freq_p, 'LD':freq_l})
dev_freq = pd.DataFrame({'Confidence':dev_freq_c, 'GD':dev_freq_p, 'LD':dev_freq_l})
plt.figure(figsize=(10,5))
ax1 = plt.subplot(1,2,1)
ax1 = sns.lineplot(data=freq)
plt.ylabel('% of sentences common with LC', fontsize='large', fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('Test Dataset', fontsize='large', fontweight='bold')
plt.yticks(np.arange(0, 672, 67.1))
plt.grid()
ax1.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=671))

ax2 = plt.subplot(1,2,2)
ax2 = sns.lineplot(data=dev_freq)
# plt.ylabel('Common with TACREV')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('Dev Dataset', fontsize='large', fontweight='bold')
plt.yticks(np.arange(0, 1062, 106.1))
ax2.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=1061))

plt.savefig('common')

In [None]:
intersection = pd.DataFrame({'Confidence vs Confidence':intersection_c, 'GD vs Confidence':intersection_p, 'LD vs Confidence':intersection_l})
dev_intersection = pd.DataFrame({'Confidence vs Confidence':dev_intersection_c, 'GD vs Confidence':dev_intersection_p, 'LD vs Confidence':dev_intersection_l})
plt.figure(figsize=(10,5))
ax1 = plt.subplot(1,2,1)
ax1 = sns.lineplot(data=intersection)
plt.ylabel('Jaccard Similarity', fontsize='large', fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('Test Dataset', fontsize='large', fontweight='bold')
# plt.yticks(np.arange(0, 643, 64.2))
ax1.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=100))

ax2 = plt.subplot(1,2,2)
ax2 = sns.lineplot(data=dev_intersection)
# plt.ylabel('Common with TACREV')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('Dev Dataset', fontsize='large', fontweight='bold')
# plt.yticks(np.arange(0, 1050, 104.9))
ax2.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=100))

plt.savefig('intersection')

In [None]:
# plt.figure()
# plt.scatter(range(len(freq)), np.array(freq.confidence), marker='o')
# plt.scatter(range(len(freq)), np.array(freq.d_prediction), marker='+')
# plt.scatter(range(len(freq)), np.array(freq.d_lca), marker='x')

# Visualization of correct and incorrect predictions

## Test Data

In [None]:
df1 = pd.read_csv('new_parnn_test.tsv', sep='\t')
df1['correctness'] = 0
df1['model'] = 'parnn'
df1 = df1.merge(df_test_len)
df1 = df1.merge(df_test_fk)
df1 = df1.merge(df_test_ef)

df1c = pd.read_csv('new_crct_parnn_test.tsv', sep='\t')
df1c['correctness'] = 1
df1c['model'] = 'parnn'
df1c = df1c.merge(df_test_len)
df1c = df1c.merge(df_test_fk)
df1c = df1c.merge(df_test_ef)

df2 = pd.read_csv('new_lstm_test.tsv', sep='\t')
df2['correctness'] = 0
df2['model'] = 'lstm'
df2 = df2.merge(df_test_len)
df2 = df2.merge(df_test_fk)
df2 = df2.merge(df_test_ef)

df2c = pd.read_csv('new_crct_lstm_test.tsv', sep='\t')
df2c['correctness'] = 1
df2c['model'] = 'lstm'
df2c = df2c.merge(df_test_len)
df2c = df2c.merge(df_test_fk)
df2c = df2c.merge(df_test_ef)

df3 = pd.read_csv('new_cgcn_test.tsv', sep='\t')
df3['correctness'] = 0
df3['model'] = 'cgcn'
df3 = df3.merge(df_test_len)
df3 = df3.merge(df_test_fk)
df3 = df3.merge(df_test_ef)

df3c = pd.read_csv('new_crct_cgcn_test.tsv', sep='\t')
df3c['correctness'] = 1
df3c['model'] = 'cgcn'
df3c = df3c.merge(df_test_len)
df3c = df3c.merge(df_test_fk)
df3c = df3c.merge(df_test_ef)

In [None]:
d = df1[df1.ground_truth != df1.prediction]
d = d[(d.ground_truth != 0) & (d.prediction != 0)]
d.sort_values(by=['d_lca'], ascending=False)

In [None]:
data1 = df1.append(df1c, ignore_index=True)
data2 = df2.append(df2c, ignore_index=True)
data3 = df3.append(df3c, ignore_index=True)

In [None]:
fig = plt.figure(figsize=(15,4))

ax1 = plt.subplot(131)
ax1 = sns.violinplot(data=data1, x='correctness', y='sentence_len')
ax1 = plt.title('PARNN')

ax2 = plt.subplot(132)
ax2 = sns.violinplot(data=data2, x='correctness', y='sentence_len')
ax2 = plt.title('LSTM')

ax3 = plt.subplot(133)
ax3 = sns.violinplot(data=data3, x='correctness', y='sentence_len')
ax3 = plt.title('CGCN')

In [None]:
fig = plt.figure(figsize=(15,4))



ax1 = plt.subplot(131)
ax1 = plt.hist([np.array(df1.sentence_len), np.array(df1c.sentence_len)], bins=[10,20,30,40,50,60,70,80,90,100], label=['0', '1'])
ax1 = plt.title('PARNN')
ax1 = plt.xlabel('Sentence length')
ax1 = plt.legend()

plt.ylabel('Frequency')

ax2 = plt.subplot(132)
ax2 = plt.hist([np.array(df2.sentence_len), np.array(df2c.sentence_len)], bins=[10,20,30,40,50,60,70,80,90,100], label=['0', '1'])
ax2 = plt.title('LSTM')
ax2 = plt.xlabel('Sentence length')
ax2 = plt.legend()

ax3 = plt.subplot(133)
ax3 = plt.hist([np.array(df3.sentence_len), np.array(df3c.sentence_len)], bins=[10,20,30,40,50,60,70,80,90,100], label=['0', '1'])
ax3 = plt.title('CGCN')
ax3 = plt.xlabel('Sentence length')
ax3 = plt.legend()

In [None]:
fig = plt.figure(figsize=(15,4))

ax1 = plt.subplot(131)
ax1 = sns.violinplot(data=data1, x='correctness', y='f_k_score')
ax1 = plt.title('PARNN')

ax2 = plt.subplot(132)
ax2 = sns.violinplot(data=data2, x='correctness', y='f_k_score')
ax2 = plt.title('LSTM')

ax3 = plt.subplot(133)
ax3 = sns.violinplot(data=data3, x='correctness', y='f_k_score')
ax3 = plt.title('CGCN')

In [None]:
fig = plt.figure(figsize=(15,4))

ax1 = plt.subplot(131)
ax1 = sns.violinplot(data=data1, x='correctness', y='entities_distance')
ax1 = plt.title('PARNN')

ax2 = plt.subplot(132)
ax2 = sns.violinplot(data=data2, x='correctness', y='entities_distance')
ax2 = plt.title('LSTM')

ax3 = plt.subplot(133)
ax3 = sns.violinplot(data=data3, x='correctness', y='entities_distance')
ax3 = plt.title('CGCN')

In [None]:
fig = plt.figure(figsize=(15,4))

ax1 = plt.subplot(131)
ax1 = sns.violinplot(data=data1, x='correctness', y='entities_count')
ax1 = plt.title('PARNN')

ax2 = plt.subplot(132)
ax2 = sns.violinplot(data=data2, x='correctness', y='entities_count')
ax2 = plt.title('LSTM')

ax3 = plt.subplot(133)
ax3 = sns.violinplot(data=data3, x='correctness', y='entities_count')
ax3 = plt.title('CGCN')

## Dev data

In [None]:
df1_dev = pd.read_csv('new_parnn_dev.tsv', sep='\t')
df1_dev['correctness'] = 0
df1_dev['model'] = 'parnn'
df1_dev = df1_dev.merge(df_dev_len)
df1_dev = df1_dev.merge(df_dev_fk)
df1_dev = df1_dev.merge(df_dev_ef)

df1c_dev = pd.read_csv('new_crct_parnn_dev.tsv', sep='\t')
df1c_dev['correctness'] = 1
df1c_dev['model'] = 'parnn'
df1c_dev = df1c_dev.merge(df_dev_len)
df1c_dev = df1c_dev.merge(df_dev_fk)
df1c_dev = df1c_dev.merge(df_dev_ef)


df2_dev = pd.read_csv('new_lstm_dev.tsv', sep='\t')
df2_dev['correctness'] = 0
df2_dev['model'] = 'lstm'
df2_dev = df2_dev.merge(df_dev_len)
df2_dev = df2_dev.merge(df_dev_fk)
df2_dev = df2_dev.merge(df_dev_ef)

df2c_dev = pd.read_csv('new_crct_lstm_dev.tsv', sep='\t')
df2c_dev['correctness'] = 1
df2c_dev['model'] = 'lstm'
df2c_dev = df2c_dev.merge(df_dev_len)
df2c_dev = df2c_dev.merge(df_dev_fk)
df2c_dev = df2c_dev.merge(df_dev_ef)

df3_dev = pd.read_csv('new_cgcn_dev.tsv', sep='\t')
df3_dev['correctness'] = 0
df3_dev['model'] = 'cgcn'
df3_dev = df3_dev.merge(df_dev_len)
df3_dev = df3_dev.merge(df_dev_fk)
df3_dev = df3_dev.merge(df_dev_ef)

df3c_dev = pd.read_csv('new_crct_cgcn_dev.tsv', sep='\t')
df3c_dev['correctness'] = 1
df3c_dev['model'] = 'cgcn'
df3c_dev = df3c_dev.merge(df_dev_len)
df3c_dev = df3c_dev.merge(df_dev_fk)
df3c_dev = df3c_dev.merge(df_dev_ef)

In [None]:
data1_dev = df1_dev.append(df1c_dev, ignore_index=True)
data2_dev = df2_dev.append(df2c_dev, ignore_index=True)
data3_dev = df3_dev.append(df3c_dev, ignore_index=True)

In [None]:
fig = plt.figure(figsize=(15,4))

ax1 = plt.subplot(131)
ax1 = sns.violinplot(data=data1_dev, x='correctness', y='sentence_len')
ax1 = plt.title('PARNN')

ax2 = plt.subplot(132)
ax2 = sns.violinplot(data=data2_dev, x='correctness', y='sentence_len')
ax2 = plt.title('LSTM')

ax3 = plt.subplot(133)
ax3 = sns.violinplot(data=data3_dev, x='correctness', y='sentence_len')
ax3 = plt.title('CGCN')

In [None]:
fig = plt.figure(figsize=(15,4))

ax1 = plt.subplot(131)
ax1 = sns.violinplot(data=data1_dev, x='correctness', y='f_k_score')
ax1 = plt.title('PARNN')

ax2 = plt.subplot(132)
ax2 = sns.violinplot(data=data2_dev, x='correctness', y='f_k_score')
ax2 = plt.title('LSTM')

ax3 = plt.subplot(133)
ax3 = sns.violinplot(data=data3_dev, x='correctness', y='f_k_score')
ax3 = plt.title('CGCN')

In [None]:
fig = plt.figure(figsize=(15,4))

ax1 = plt.subplot(131)
ax1 = sns.violinplot(data=data1_dev, x='correctness', y='entities_distance')
ax1 = plt.title('PARNN')

ax2 = plt.subplot(132)
ax2 = sns.violinplot(data=data2_dev, x='correctness', y='entities_distance')
ax2 = plt.title('LSTM')

ax3 = plt.subplot(133)
ax3 = sns.violinplot(data=data3_dev, x='correctness', y='entities_distance')
ax3 = plt.title('CGCN')

In [None]:
fig = plt.figure(figsize=(15,4))

ax1 = plt.subplot(131)
ax1 = sns.violinplot(data=data1_dev, x='correctness', y='entities_count')
ax1 = plt.title('PARNN')

ax2 = plt.subplot(132)
ax2 = sns.violinplot(data=data2_dev, x='correctness', y='entities_count')
ax2 = plt.title('LSTM')

ax3 = plt.subplot(133)
ax3 = sns.violinplot(data=data3_dev, x='correctness', y='entities_count')
ax3 = plt.title('CGCN')

# Identifying label change in TACREV

In [None]:
LABEL_TO_ID = {'no_relation': 0, 'per:title': 1, 'org:top_members/employees': 2, 'per:employee_of': 3, 'org:alternate_names': 4, 'org:country_of_headquarters': 5, 'per:countries_of_residence': 6, 'org:city_of_headquarters': 7, 'per:cities_of_residence': 8, 'per:age': 9, 'per:stateorprovinces_of_residence': 10, 'per:origin': 11, 'org:subsidiaries': 12, 'org:parents': 13, 'per:spouse': 14, 'org:stateorprovince_of_headquarters': 15, 'per:children': 16, 'per:other_family': 17, 'per:alternate_names': 18, 'org:members': 19, 'per:siblings': 20, 'per:schools_attended': 21, 'per:parents': 22, 'per:date_of_death': 23, 'org:member_of': 24, 'org:founded_by': 25, 'org:website': 26, 'per:cause_of_death': 27, 'org:political/religious_affiliation': 28, 'org:founded': 29, 'per:city_of_death': 30, 'org:shareholders': 31, 'org:number_of_employees/members': 32, 'per:date_of_birth': 33, 'per:city_of_birth': 34, 'per:charges': 35, 'per:stateorprovince_of_death': 36, 'per:religion': 37, 'per:stateorprovince_of_birth': 38, 'per:country_of_birth': 39, 'org:dissolved': 40, 'per:country_of_death': 41, 'per:nationality': 42, 'org:location_of_headquarters':43, 'per:location_of_birth':44, 'per:location_of_death':45, 'per:location_of_residence':46, 'per:family':47, 'per-per':48, 'per-org':49, 'per-misc':50, 'per-loc':51, 'org-per':52, 'org-org':53, 'org-misc':54, 'org-loc':55, 'per':56, 'org':57, 'relation':58, 'root':59}

## Test Data

In [None]:
tacrev_test = pd.read_json('./tacrev_patch/test_patch.json')
tacrev_test['relation'] = [LABEL_TO_ID[rel] for rel in list(tacrev_test['relation'])]
tacrev_test = tacrev_test.rename(columns={'id':'sentence_id'})
tacrev_test

In [None]:
tt_dict = dict(zip(tacrev_test.sentence_id, tacrev_test.relation))
tt_dict

In [None]:
match_conf = []
count = 0
for i, sid in enumerate(sorted_dfconf.sentence_id, start=1):
    parnn_pred = int(df1[df1['sentence_id'] == sid]['prediction'])
    lstm_pred = int(df2[df2['sentence_id'] == sid]['prediction'])
    cgcn_pred = int(df3[df3['sentence_id'] == sid]['prediction'])
    if sid in tt_dict:
        reannotation = tt_dict[sid]
        if reannotation == parnn_pred or reannotation == lstm_pred or reannotation == cgcn_pred:
            count += 1
            print(i, reannotation, count, len(df_test.merge(sorted_dfconf[:i])), count/len(df_test.merge(sorted_dfconf[:i]))*100)
            match_conf.append(count/len(df_test.merge(sorted_dfconf[:i]))*100)

In [None]:
len(match_conf)

In [None]:
match_dpred = []
count = 0
for i, sid in enumerate(sorted_df_pd.sentence_id, start=1):
    parnn_pred = int(df1[df1['sentence_id'] == sid]['prediction'])
    lstm_pred = int(df2[df2['sentence_id'] == sid]['prediction'])
    cgcn_pred = int(df3[df3['sentence_id'] == sid]['prediction'])
    if sid in tt_dict:
        reannotation = tt_dict[sid]
        if reannotation == parnn_pred or reannotation == lstm_pred or reannotation == cgcn_pred:
            count += 1
            print(i, reannotation, count, len(df_test.merge(sorted_df_pd[:i])), count/len(df_test.merge(sorted_df_pd[:i]))*100)
            match_dpred.append(count/len(df_test.merge(sorted_df_pd[:i]))*100)

In [None]:
len(match_dpred)

In [None]:
match_dlca = []
count = 0
for i, sid in enumerate(sorted_dflca.sentence_id, start=1):
    parnn_pred = int(df1[df1['sentence_id'] == sid]['prediction'])
    lstm_pred = int(df2[df2['sentence_id'] == sid]['prediction'])
    cgcn_pred = int(df3[df3['sentence_id'] == sid]['prediction'])
    if sid in tt_dict:
        reannotation = tt_dict[sid]
        if reannotation == parnn_pred or reannotation == lstm_pred or reannotation == cgcn_pred:
            count += 1
            print(i, reannotation, count, len(df_test.merge(sorted_dflca[:i])), count/len(df_test.merge(sorted_dflca[:i]))*100)
            match_dlca.append(count/len(df_test.merge(sorted_dflca[:i]))*100)

In [None]:
len(match_dlca)

## Dev Data

In [None]:
tacrev_dev = pd.read_json('./tacrev_patch/dev_patch.json')
tacrev_dev['relation'] = [LABEL_TO_ID[rel] for rel in list(tacrev_dev['relation'])]
tacrev_dev = tacrev_dev.rename(columns={'id':'sentence_id'})
tacrev_dev

In [None]:
td_dict = dict(zip(tacrev_dev.sentence_id, tacrev_dev.relation))
td_dict

In [None]:
match_conf_dev = []
count = 0
for i, sid in enumerate(sorted_dfconf_dev.sentence_id, start=1):
    parnn_pred = int(df1_dev[df1_dev['sentence_id'] == sid]['prediction'])
    lstm_pred = int(df2_dev[df2_dev['sentence_id'] == sid]['prediction'])
    cgcn_pred = int(df3_dev[df3_dev['sentence_id'] == sid]['prediction'])
    if sid in td_dict:
        reannotation = td_dict[sid]
        if reannotation == parnn_pred or reannotation == lstm_pred or reannotation == cgcn_pred:
            count += 1
            print(i, reannotation, count, len(df_dev.merge(sorted_dfconf_dev[:i])), count/len(df_dev.merge(sorted_dfconf_dev[:i]))*100)
            match_conf_dev.append(count/len(df_dev.merge(sorted_dfconf_dev[:i]))*100)

In [None]:
len(match_conf_dev)

In [None]:
match_dpred_dev = []
count = 0
for i, sid in enumerate(sorted_df_pd_dev.sentence_id, start=1):
    parnn_pred = int(df1_dev[df1_dev['sentence_id'] == sid]['prediction'])
    lstm_pred = int(df2_dev[df2_dev['sentence_id'] == sid]['prediction'])
    cgcn_pred = int(df3_dev[df3_dev['sentence_id'] == sid]['prediction'])
    if sid in td_dict:
        reannotation = td_dict[sid]
        if reannotation == parnn_pred or reannotation == lstm_pred or reannotation == cgcn_pred:
            count += 1
            print(i, reannotation, count, len(df_dev.merge(sorted_df_pd_dev[:i])), count/len(df_dev.merge(sorted_df_pd_dev[:i]))*100)
            match_dpred_dev.append(count/len(df_dev.merge(sorted_df_pd_dev[:i]))*100)

In [None]:
len(match_dpred_dev)

In [None]:
match_dlca_dev = []
count = 0
for i, sid in enumerate(sorted_dflca_dev.sentence_id, start=1):
    parnn_pred = int(df1_dev[df1_dev['sentence_id'] == sid]['prediction'])
    lstm_pred = int(df2_dev[df2_dev['sentence_id'] == sid]['prediction'])
    cgcn_pred = int(df3_dev[df3_dev['sentence_id'] == sid]['prediction'])
    if sid in td_dict:
        reannotation = td_dict[sid]
        if reannotation == parnn_pred or reannotation == lstm_pred or reannotation == cgcn_pred:
            count += 1
            print(i, reannotation, count, len(df_dev.merge(sorted_dflca_dev[:i])), count/len(df_dev.merge(sorted_dflca_dev[:i]))*100)
            match_dlca_dev.append(count/len(df_dev.merge(sorted_dflca_dev[:i]))*100)

In [None]:
len(match_dlca_dev)

In [None]:
match = pd.DataFrame({'Confidence':match_conf, 'GD':match_dpred, 'LD':match_dlca})
dev_match = pd.DataFrame({'Confidence':match_conf_dev, 'GD':match_dpred_dev, 'LD':match_dlca_dev})
plt.figure(figsize=(10,5))
ax1 = plt.subplot(1,2,1)
ax1 = sns.lineplot(data=match)
plt.ylabel('Model Agreement', fontsize='large', fontweight='bold')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('Test Dataset', fontsize='large', fontweight='bold')
ax1.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=100))

ax2 = plt.subplot(1,2,2)
ax2 = sns.lineplot(data=dev_match)
# plt.ylabel('Prediction of atleast one model matching TACREV')
plt.xlabel('Reannotation Budget', fontsize='large', fontweight='bold')
plt.title('Dev Dataset', fontsize='large', fontweight='bold')
ax2.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=100))

plt.savefig('match')

## Train Data

### Top 100 based on confidence score

In [None]:
df1 = pd.read_csv('./original_dataset_results_12_2_2021/parnn_train.tsv', sep='\t')
df1 = df1.loc[:,['sentence_id', 'confidence']]
df1 = df1.rename(columns={'confidence':'confidence_parnn'})

df2 = pd.read_csv('./original_dataset_results_12_2_2021/lstm_train.tsv', sep='\t')
df2 = df2.loc[:,['sentence_id', 'confidence']]
df2 = df2.rename(columns={'confidence': 'confidence_lstm'})

df3 = pd.read_csv('./original_dataset_results_12_2_2021/gcn_train.tsv', sep='\t')
df3 = df3.loc[:,['sentence_id', 'confidence']]
df3 = df3.rename(columns={'confidence': 'confidence_gcn'})

dfconf = df1.merge(df2).merge(df3)
dfconf['confidence'] = (dfconf['confidence_parnn'] + dfconf['confidence_lstm'] + dfconf['confidence_gcn']) / 3
dfconf

In [None]:
sorted_dfconf = dfconf.sort_values(by=['confidence'], ascending=False)
conf_100 = sorted_dfconf[:100]
conf_100

### Top 100 based on distance of prediction from ground truth

In [None]:
df1 = pd.read_csv('./original_dataset_results_12_2_2021/parnn_train.tsv', sep='\t')
df1 = df1.loc[:,['sentence_id', 'd_prediction']]
df1 = df1.rename(columns={'d_prediction':'d_prediction_parnn'})

df2 = pd.read_csv('./original_dataset_results_12_2_2021/lstm_train.tsv', sep='\t')
df2 = df2.loc[:,['sentence_id', 'd_prediction']]
df2 = df2.rename(columns={'d_prediction': 'd_prediction_lstm'})

df3 = pd.read_csv('./original_dataset_results_12_2_2021/gcn_train.tsv', sep='\t')
df3 = df3.loc[:,['sentence_id', 'd_prediction']]
df3 = df3.rename(columns={'d_prediction': 'd_prediction_gcn'})

dfdpred = df1.merge(df2).merge(df3)
dfdpred['d_prediction'] = (dfdpred['d_prediction_parnn'] + dfdpred['d_prediction_lstm'] + dfdpred['d_prediction_gcn']) / 3
dfdpred

In [None]:
sorted_dfdpred = dfdpred.sort_values(by=['d_prediction'], ascending=False)
dpred_100 = sorted_dfdpred[:100]
dpred_100

### Top 100 based on distance of lca from ground truth

In [None]:
df1 = pd.read_csv('./original_dataset_results_12_2_2021/parnn_train.tsv', sep='\t')
df1 = df1.loc[:,['sentence_id', 'd_lca']]
df1 = df1.rename(columns={'d_lca':'d_lca_parnn'})

df2 = pd.read_csv('./original_dataset_results_12_2_2021/lstm_train.tsv', sep='\t')
df2 = df2.loc[:,['sentence_id', 'd_lca']]
df2 = df2.rename(columns={'d_lca': 'd_lca_lstm'})

df3 = pd.read_csv('./original_dataset_results_12_2_2021/gcn_train.tsv', sep='\t')
df3 = df3.loc[:,['sentence_id', 'd_lca']]
df3 = df3.rename(columns={'d_lca': 'd_lca_gcn'})

dfdlca = df1.merge(df2).merge(df3)
dfdlca['d_lca'] = (dfdlca['d_lca_parnn'] + dfdlca['d_lca_lstm'] + dfdlca['d_lca_gcn']) / 3
dfdlca

In [None]:
sorted_dfdlca = dfdlca.sort_values(by=['d_lca'], ascending=False)
dlca_100 = sorted_dfdlca[:100]
dlca_100

### Common Sentences

In [None]:
conf_list = list(conf_100.sentence_id)
dpred_list = list(dpred_100.sentence_id)
dlca_list = list(dlca_100.sentence_id)

In [None]:
c_conf = 0

for eg in train_data:
    print('###',eg['id'])
    if eg['id'] in conf_list:
        print(eg['id'])
        print(' '.join(eg['token']))
        print((eg['token'][eg['subj_start']:eg['subj_end']+1], eg['token'][eg['obj_start']:eg['obj_end']+1]))
        print(eg['relation'])
        correctness = input("Is relation correct? (Y/N)")
        if correctness == 'N':
            c_conf += 1

In [None]:
c_conf

In [None]:
c_dpred = 0
c = 1
for eg in train_data:
    print('###',eg['id'])
    if eg['id'] in dpred_list:
        print(c)
        c+=1
        print(eg['id'])
        print(' '.join(eg['token']))
        print((eg['token'][eg['subj_start']:eg['subj_end']+1], eg['token'][eg['obj_start']:eg['obj_end']+1]))
        print(eg['relation'])
        correctness = input("Is relation correct? (Y/N)")
        if correctness == 'N':
            c_conf += 1

In [None]:
c_dpred

In [None]:
c_dlca = 0
c = 1
for eg in train_data:
    print('###',eg['id'])
    if eg['id'] in dlca_list:
        print(c)
        c+=1
        print(eg['id'])
        print(' '.join(eg['token']))
        print((eg['token'][eg['subj_start']:eg['subj_end']+1], eg['token'][eg['obj_start']:eg['obj_end']+1]))
        print(eg['relation'])
        correctness = input("Is relation correct? (Y/N)")
        if correctness == 'N':
            c_dlca += 1

In [None]:
c_dlca