In [1]:
import pandas as pd
import json
import textstat as txt
from itertools import groupby
import pickle
import csv

import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
train_data = json.load(open('./../dataset/tacred/json/train.json'))
print("Number of Training instances :: {}".format(len(train_data)))

dev_data = json.load(open('./../dataset/tacred/json/dev.json'))
print("Number of Dev instances :: {}".format(len(dev_data)))

test_data = json.load(open('./../dataset/tacred/json/test.json'))
print("Number of Test instances :: {}".format(len(test_data)))

Number of Training instances :: 68124
Number of Dev instances :: 22631
Number of Test instances :: 15509


In [30]:
hierarchy_filtered = pickle.load(open('/home/akshay/re_rc/ACL-SIGIR2021/filtered_ids.pkl', 'rb'))

In [None]:
def convert_token(token):
    """ Convert PTB tokens to normal tokens """
    if (token.lower() == '-lrb-'):
            return '('
    elif (token.lower() == '-rrb-'):
        return ')'
    elif (token.lower() == '-lsb-'):
        return '['
    elif (token.lower() == '-rsb-'):
        return ']'
    elif (token.lower() == '-lcb-'):
        return '{'
    elif (token.lower() == '-rcb-'):
        return '}'
    return token

In [None]:
def print_details(sid, data=train_data):
    for eg in data:
        if eg['id'] == sid:
            print(eg['id'])
            print(' '.join([convert_token(tok) for tok in eg['token']]))
            print(len(eg['token']))
            print((eg['token'][eg['subj_start']:eg['subj_end']+1], eg['token'][eg['obj_start']:eg['obj_end']+1]))
            print(eg['subj_start'], eg['subj_end'], eg['obj_start'], eg['obj_end'])
            print(eg['relation'])
            print(eg['stanford_ner'])
            print(eg['subj_type'])
            print(eg['obj_type'])
            print("-----"*20)

In [None]:
def generate_flesch_kincaid_grade(dataset):
    s_ids = []
    score = []
    for i, eg in enumerate(dataset, start=1):
        s_id = eg['id']
        tokens  = eg['token']
        sentence = ' '.join([convert_token(t) for t in tokens])
        s_ids.append(s_id)
        score.append(txt.flesch_kincaid_grade(sentence))
    assert len(s_ids) == len(score)
    print(len(s_ids), len(score))
    df = pd.DataFrame({'sentence_id':s_ids, 'f_k_score':score})
    return df

In [None]:
df_train_fk = generate_flesch_kincaid_grade(train_data)

In [None]:
def generate_sentence_length_mapping(dataset):
    s_ids = []
    s_lens = []
    for eg in dataset:
        s_id = eg['id']
        tokens = eg['token']
        s_ids.append(s_id)
        s_lens.append(len(tokens))
    
    assert len(s_ids) == len(s_lens) 
    print(len(s_ids), len(s_lens) )
    df = pd.DataFrame({'sentence_id':s_ids, 'sentence_len':s_lens})
    return df

In [None]:
df_train_len = generate_sentence_length_mapping(train_data)
df_dev_len = generate_sentence_length_mapping(dev_data)

In [None]:
df_train_len.head()

In [None]:
df_dev_len.head()

In [None]:
def get_coarse_type(et):
    if et in ['CITY', 'COUNTRY', 'STATE_OR_PROVINCE']:
        return 'LOCATION'
    elif et in ['URL', 'NATIONALITY', 'TITLE', 'CRIMINAL_CHARGE', 'RELIGION', 'DURATION', 'IDEOLOGY', 'CAUSE_OF_DEATH']:
        return 'MISC'
    else:
        return et

In [None]:
def get_similar_entity_count(ets, st, ot):
#     print(ets)
    cst = get_coarse_type(st)
    cot = get_coarse_type(ot)
#     print(st, cst)
#     print(ot, cot)
    sc = max(0, sum([et == cst or et == cot for et in ets])-2)
#     print(sc)
    return sc

In [None]:
def generate_entity_specific_attribute(dataset):
    s_ids = []
    distance = []
    e_count = []
    sim_ent_count = []
    uniq_ets = []
    same_arg = []
    for eg in dataset:
        s_id = eg['id']
        s_ids.append(s_id)
        s_start = eg['subj_start']
        s_end = eg['subj_end']
        o_start = eg['obj_start']
        o_end = eg['obj_end']
        s_type = eg['subj_type']
        o_type = eg['obj_type']
        subj = eg['token'][s_start:s_end+1]
        obj = eg['token'][o_start:o_end+1]
        # number of tokens between the subject and object entities
        if s_end < o_start:
            e_dist = o_start - s_end - 1
        elif o_end < s_start:
            e_dist = s_start - o_end - 1
        distance.append(e_dist)
#         print(s_id)
        # Number of entities in the sentence based on stanford NER
        ner = eg['stanford_ner']
#         print(ner)
        s = e = 0
        n_e = 0
        p_et = None
        etypes = []
        for i, et in enumerate(ner):
            if et == 'O':
                if e != 0:
                    n_e += 1
                    etypes.append(p_et)
                    s = e = 0
                    p_et = None
                continue
            else:
                if p_et and p_et != et:
                    n_e += 1
                    etypes.append(p_et)
                    s = e = 0
                    p_et = None
                s += 1
                e += 1
            p_et = et
        assert len(etypes) == n_e
        e_count.append(n_e)
        
        # Number of entities with type similar to arguements
        sc = get_similar_entity_count(etypes, s_type, o_type)
        sim_ent_count.append(sc)
        
        # Same arguement types
        if s_type == o_type:
            same_arg.append(1)
        else:
            same_arg.append(0)
        
        # Identifying unique entity types
        if s_type not in uniq_ets:
            uniq_ets.append(s_type)
        if o_type not in uniq_ets:
            uniq_ets.append(o_type)
            
    assert len(s_ids) == len(distance)
    assert len(s_ids) == len(e_count)
    print(len(s_ids), len(distance), len(e_count))
    df = pd.DataFrame({'sentence_id':s_ids, 'entities_distance':distance, 'entities_count':e_count, 
                       'sim_arg_type_count':sim_ent_count, 'same_args':same_arg})
    return df

In [None]:
df_train_ef = generate_entity_specific_attribute(train_data)
df_dev_ef = generate_entity_specific_attribute(dev_data)

In [None]:
df_train_ef.head()

In [None]:
df_dev_ef.head()

In [None]:
# df_train_attr = df_train_ef.merge(df_train_len).merge(df_train_fk)
# df_train_attr.head()

df_train_attr = df_train_ef.merge(df_train_len)
df_train_attr.head()

In [None]:
df_dev_attr = df_dev_ef.merge(df_dev_len)
df_dev_attr.head()

### Noisy Instances for Elimination

In [3]:
pnoisy = pickle.load(open('./Pallneg_noisy_ids.pkl', 'rb'))
parnn_train_noisy = pnoisy['train']
parnn_dev_noisy = pnoisy['dev']

In [4]:
cnoisy = pickle.load(open('./allneg_noisy_ids.pkl', 'rb'))
cgcn_train_noisy = cnoisy['train']
cgcn_dev_noisy = cnoisy['dev']

In [5]:
common_train = set(parnn_train_noisy).intersection(cgcn_train_noisy)
print(len(cgcn_train_noisy), len(parnn_train_noisy), len(common_train))

common_dev = set(parnn_dev_noisy).intersection(cgcn_dev_noisy)
print(len(cgcn_dev_noisy), len(parnn_dev_noisy), len(common_dev))

1878 1568 797
969 852 454


In [None]:
noisy_train = df_train_attr[df_train_attr.sentence_id.isin(common_train)]
noisy_train.describe()

In [None]:
rest_train = df_train_attr[~df_train_attr.sentence_id.isin(common_train)]
rest_train.describe()

In [None]:
l = len(noisy_train)
print("Number of common (PALSTM & CGCN) noisy train instances  ::  {}".format(l))
print("Number of instances with entities distance equals to 1  :: {}".
      format(round(len(noisy_train[noisy_train.entities_distance == 1]) / l *100, 2)))
print("Number of instances with entities distance greater than 8  :: {}".
      format(round(len(noisy_train[noisy_train.entities_distance > 8]) / l *100, 2)))
print("Number of instances with entities count greater than 5  :: {}".
      format(round(len(noisy_train[noisy_train.entities_count > 5]) / l *100, 2)))
print("Number of instances with entities count greater than 3  :: {}".
      format(round(len(noisy_train[noisy_train.entities_count > 3]) / l *100, 2)))
print("Number of instances with more than 1 entity similar to arguements' type :: {}".
      format(round(len(noisy_train[noisy_train.sim_arg_type_count > 1]) / l *100, 2)))
print("Number of instances with both the arguement of same type type :: {}".
      format(round(len(noisy_train[noisy_train.same_args == 1]) / l *100, 2)))
print("Number of instances with length of sentence greater than 30 :: {}".
      format(round(len(noisy_train[noisy_train.sentence_len > 30]) / l *100, 2)))

In [None]:
noisy_dev = df_dev_attr[df_dev_attr.sentence_id.isin(common_dev)]
noisy_dev.describe()

In [None]:
rest_dev = df_dev_attr[~df_dev_attr.sentence_id.isin(common_dev)]
rest_dev.describe()

In [None]:
l = len(noisy_dev)
print("Number of common (PALSTM & CGCN) noisy dev instances  ::  {}".format(l))
print("Number of instances with entities distance equals to 1  :: {}".
      format(round(len(noisy_dev[noisy_dev.entities_distance == 1]) / l *100, 2)))
print("Number of instances with entities distance greater than 8  :: {}".
      format(round(len(noisy_dev[noisy_dev.entities_distance > 8]) / l *100, 2)))
print("Number of instances with entities count greater than 5  :: {}".
      format(round(len(noisy_dev[noisy_dev.entities_count > 5]) / l *100, 2)))
print("Number of instances with entities count greater than 3  :: {}".
      format(round(len(noisy_dev[noisy_dev.entities_count > 3]) / l *100, 2)))
print("Number of instances with more than 1 entity similar to arguements' type :: {}".
      format(round(len(noisy_dev[noisy_dev.sim_arg_type_count > 1]) / l *100, 2)))
print("Number of instances with both the arguement of same type type :: {}".
      format(round(len(noisy_dev[noisy_dev.same_args == 1]) / l *100, 2)))
print("Number of instances with length of sentence greater than 30 :: {}".
      format(round(len(noisy_dev[noisy_dev.sentence_len > 30]) / l *100, 2)))

In [None]:
sids = noisy_train.sentence_id.values
indexes = np.random.choice(len(sids), size=100, replace=False)

for i, index in enumerate(indexes, start=1):
    print(i)
    print_details(sids[index])

### Noisy Instances for Reannotation

In [27]:
pnoisy = pickle.load(open('./Pallneg_relabel_ids.pkl', 'rb'))
parnn_train_noisy = pnoisy['train']
parnn_dev_noisy = pnoisy['dev']

In [28]:
cnoisy = pickle.load(open('./allneg_relabel_ids.pkl', 'rb'))
cgcn_train_noisy = cnoisy['train']
cgcn_dev_noisy = cnoisy['dev']

In [29]:
common_train = set(parnn_train_noisy).intersection(cgcn_train_noisy)
print(len(cgcn_train_noisy), len(parnn_train_noisy), len(common_train))

common_dev = set(parnn_dev_noisy).intersection(cgcn_dev_noisy)
print(len(cgcn_dev_noisy), len(parnn_dev_noisy), len(common_dev))

1642 1354 694
843 768 403


In [None]:
noisy_train = df_train_attr[df_train_attr.sentence_id.isin(common_train)]
noisy_train.describe()

In [None]:
rest_train = df_train_attr[~df_train_attr.sentence_id.isin(common_train)]
rest_train.describe()

In [None]:
l = len(noisy_train)
print("Number of common (PALSTM & CGCN) noisy train instances  ::  {}".format(l))
print("Number of instances with entities distance equals to 1  :: {}".
      format(round(len(noisy_train[noisy_train.entities_distance == 1]) / l *100, 2)))
print("Number of instances with entities distance greater than 8  :: {}".
      format(round(len(noisy_train[noisy_train.entities_distance > 8]) / l *100, 2)))
print("Number of instances with entities count greater than 5  :: {}".
      format(round(len(noisy_train[noisy_train.entities_count > 5]) / l *100, 2)))
print("Number of instances with entities count greater than 3  :: {}".
      format(round(len(noisy_train[noisy_train.entities_count > 3]) / l *100, 2)))
print("Number of instances with more than 1 entity similar to arguements' type :: {}".
      format(round(len(noisy_train[noisy_train.sim_arg_type_count > 1]) / l *100, 2)))
print("Number of instances with both the arguement of same type type :: {}".
      format(round(len(noisy_train[noisy_train.same_args == 1]) / l *100, 2)))
print("Number of instances with length of sentence greater than 30 :: {}".
      format(round(len(noisy_train[noisy_train.sentence_len > 30]) / l *100, 2)))

In [None]:
noisy_dev = df_dev_attr[df_dev_attr.sentence_id.isin(common_dev)]
noisy_dev.describe()

In [None]:
rest_dev = df_dev_attr[~df_dev_attr.sentence_id.isin(common_dev)]
rest_dev.describe()

In [None]:
l = len(noisy_dev)
print("Number of common (PALSTM & CGCN) noisy dev instances  ::  {}".format(l))
print("Number of instances with entities distance equals to 1  :: {}".
      format(round(len(noisy_dev[noisy_dev.entities_distance == 1]) / l *100, 2)))
print("Number of instances with entities distance greater than 8  :: {}".
      format(round(len(noisy_dev[noisy_dev.entities_distance > 8]) / l *100, 2)))
print("Number of instances with entities count greater than 5  :: {}".
      format(round(len(noisy_dev[noisy_dev.entities_count > 5]) / l *100, 2)))
print("Number of instances with entities count greater than 3  :: {}".
      format(round(len(noisy_dev[noisy_dev.entities_count > 3]) / l *100, 2)))
print("Number of instances with more than 1 entity similar to arguements' type :: {}".
      format(round(len(noisy_dev[noisy_dev.sim_arg_type_count > 1]) / l *100, 2)))
print("Number of instances with both the arguement of same type type :: {}".
      format(round(len(noisy_dev[noisy_dev.same_args == 1]) / l *100, 2)))
print("Number of instances with length of sentence greater than 30 :: {}".
      format(round(len(noisy_dev[noisy_dev.sentence_len > 30]) / l *100, 2)))

In [None]:
print_details('61b3a89f60d0777c9f0f')

In [None]:
print_details('61b3aa9e363a44f45c48')

In [None]:
print_details('61b3a65fb9b7111c4ca4')

In [None]:
sids = noisy_train.sentence_id.values
indexes = np.random.choice(len(sids), size=100, replace=False)

for i, index in enumerate(indexes, start=1):
    print(i)
    print_details(sids[index])

In [None]:
len(sids)

In [None]:
sids = noisy_train.sentence_id.values
tmp = []
for i, sid in enumerate(sids, start=1):
    for eg in train_data:
        if eg['id'] == sid:
            tmp.append((eg['subj_type'], eg['obj_type']))

In [None]:
from collections import Counter

In [None]:
sorted(dict(Counter(tmp)).items(), key=lambda item:item[1], reverse=True)

## Effect on Predictions

### Eliminating noisy negative instances

In [None]:
cgcn = pd.read_csv('./cgcn/results/prediction_stats/cgcn.csv', header=0)
cgcn_en = pd.read_csv('cgcn/results/prediction_stats/cgcn-elimination-k3.csv')

cgcn_en_diff = pd.DataFrame({'relation':cgcn.relation,
                              'f1_diff':cgcn_en.f1 - cgcn.f1})
# cgcn_en_diff

In [None]:
print("Performance Improved  :: ", len(cgcn_en_diff[cgcn_en_diff.f1_diff > 0]))
print("Performance Declined  :: ", len(cgcn_en_diff[cgcn_en_diff.f1_diff < 0]))
print("Performance Remained Same  :: ", len(cgcn_en_diff[cgcn_en_diff.f1_diff == 0]))

In [None]:
parnn = pd.read_csv('./tacred/results/prediction_stats/parnn.csv', header=0)
parnn_en = pd.read_csv('tacred/results/prediction_stats/parnn-elimination-k3.csv')

parnn_en_diff = pd.DataFrame({'relation':parnn.relation,
                              'f1_diff':parnn_en.f1 - parnn.f1})
# parnn_en_diff

In [None]:
print("Performance Improved  :: ", len(parnn_en_diff[parnn_en_diff.f1_diff > 0]))
print("Performance Declined  :: ", len(parnn_en_diff[parnn_en_diff.f1_diff < 0]))
print("Performance Remained Same  :: ", len(parnn_en_diff[parnn_en_diff.f1_diff == 0]))

In [None]:
eni = set(list(parnn_en_diff.relation[parnn_en_diff.f1_diff > 0])).intersection(
        set(list(cgcn_en_diff.relation[cgcn_en_diff.f1_diff > 0])))
print("Performance Improved  :: ", eni)
end = set(list(parnn_en_diff.relation[parnn_en_diff.f1_diff < 0])).intersection(
        set(list(cgcn_en_diff.relation[cgcn_en_diff.f1_diff < 0])))
print("Performance Declined  :: ", end)
ens = set(list(parnn_en_diff.relation[parnn_en_diff.f1_diff == 0])).intersection(
        set(list(cgcn_en_diff.relation[cgcn_en_diff.f1_diff == 0])))
print("Performance Remained Same  :: ", ens)

In [None]:
print("Performance Improved  :: ", len(set(list(parnn_en_diff.relation[parnn_en_diff.f1_diff > 0])).intersection(
        set(list(cgcn_en_diff.relation[cgcn_en_diff.f1_diff > 0])))))
print("Performance Declined  :: ", len(set(list(parnn_en_diff.relation[parnn_en_diff.f1_diff < 0])).intersection(
        set(list(cgcn_en_diff.relation[cgcn_en_diff.f1_diff < 0])))))
print("Performance Remained Same  :: ", len(set(list(parnn_en_diff.relation[parnn_en_diff.f1_diff == 0])).intersection(
        set(list(cgcn_en_diff.relation[cgcn_en_diff.f1_diff == 0])))))

### Eliminating Noisy Negative & Positive Instances

In [None]:
cgcn_enp = pd.read_csv('cgcn/results/prediction_stats/cgcn-elimination-posneg-k3.csv')

cgcn_enp_diff = pd.DataFrame({'relation':cgcn.relation,
                              'f1_diff':cgcn_enp.f1 - cgcn.f1})
# cgcn_enp_diff

In [None]:
print("Performance Improved  :: ", len(cgcn_enp_diff[cgcn_enp_diff.f1_diff > 0]))
print("Performance Declined  :: ", len(cgcn_enp_diff[cgcn_enp_diff.f1_diff < 0]))
print("Performance Remained Same  :: ", len(cgcn_enp_diff[cgcn_enp_diff.f1_diff == 0]))

In [None]:
parnn_enp = pd.read_csv('tacred/results/prediction_stats/parnn-elimination-posneg-k3.csv')

parnn_enp_diff = pd.DataFrame({'relation':parnn.relation,
                              'f1_diff':parnn_enp.f1 - parnn.f1})
# parnn_enp_diff

In [None]:
print("Performance Improved  :: ", len(parnn_enp_diff[parnn_enp_diff.f1_diff > 0]))
print("Performance Declined  :: ", len(parnn_enp_diff[parnn_enp_diff.f1_diff < 0]))
print("Performance Remained Same  :: ", len(parnn_enp_diff[parnn_enp_diff.f1_diff == 0]))

In [None]:
enpi = set(list(parnn_enp_diff.relation[parnn_enp_diff.f1_diff > 0])).intersection(
        set(list(cgcn_enp_diff.relation[cgcn_enp_diff.f1_diff > 0])))
print("Performance Improved  :: ", enpi)
enpd = set(list(parnn_enp_diff.relation[parnn_enp_diff.f1_diff < 0])).intersection(
        set(list(cgcn_enp_diff.relation[cgcn_enp_diff.f1_diff < 0])))
print("Performance Declined  :: ", enpd)
enps = set(list(parnn_enp_diff.relation[parnn_enp_diff.f1_diff == 0])).intersection(
        set(list(cgcn_enp_diff.relation[cgcn_enp_diff.f1_diff == 0])))
print("Performance Remained Same  :: ", enps)

In [None]:
print("Performance Improved  :: ", len(set(list(parnn_enp_diff.relation[parnn_enp_diff.f1_diff > 0])).intersection(
        set(list(cgcn_enp_diff.relation[cgcn_enp_diff.f1_diff > 0])))))
print("Performance Declined  :: ", len(set(list(parnn_enp_diff.relation[parnn_enp_diff.f1_diff < 0])).intersection(
        set(list(cgcn_enp_diff.relation[cgcn_enp_diff.f1_diff < 0])))))
print("Performance Remained Same  :: ", len(set(list(parnn_enp_diff.relation[parnn_enp_diff.f1_diff == 0])).intersection(
        set(list(cgcn_enp_diff.relation[cgcn_enp_diff.f1_diff == 0])))))

### Reannotating Noisy Negative Instances

In [None]:
cgcn_rn = pd.read_csv('cgcn/results/prediction_stats/cgcn-replace-neg.csv')

cgcn_rn_diff = pd.DataFrame({'relation':cgcn.relation,
                              'f1_diff':cgcn_rn.f1 - cgcn.f1})
# cgcn_rn_diff

In [None]:
print("Performance Improved  :: ", len(cgcn_rn_diff[cgcn_rn_diff.f1_diff > 0]))
print("Performance Declined  :: ", len(cgcn_rn_diff[cgcn_rn_diff.f1_diff < 0]))
print("Performance Remained Same  :: ", len(cgcn_rn_diff[cgcn_rn_diff.f1_diff == 0]))

In [None]:
parnn_rn = pd.read_csv('tacred/results/prediction_stats/parnn-replace-neg.csv')

parnn_rn_diff = pd.DataFrame({'relation':parnn.relation,
                              'f1_diff':parnn_rn.f1 - parnn.f1})
# parnn_rn_diff

In [None]:
print("Performance Improved  :: ", len(parnn_rn_diff[parnn_rn_diff.f1_diff > 0]))
print("Performance Declined  :: ", len(parnn_rn_diff[parnn_rn_diff.f1_diff < 0]))
print("Performance Remained Same  :: ", len(parnn_rn_diff[parnn_rn_diff.f1_diff == 0]))

In [None]:
rni = set(list(parnn_rn_diff.relation[parnn_rn_diff.f1_diff > 0])).intersection(
        set(list(cgcn_rn_diff.relation[cgcn_rn_diff.f1_diff > 0])))
print("Performance Improved  :: ", rni)
rnd = set(list(parnn_rn_diff.relation[parnn_rn_diff.f1_diff < 0])).intersection(
        set(list(cgcn_rn_diff.relation[cgcn_rn_diff.f1_diff < 0])))
print("Performance Declined  :: ", rnd)
rns = set(list(parnn_rn_diff.relation[parnn_rn_diff.f1_diff == 0])).intersection(
        set(list(cgcn_rn_diff.relation[cgcn_rn_diff.f1_diff == 0])))
print("Performance Remained Same  :: ", rns)

In [None]:
list(cgcn_rn_diff.relation[cgcn_rn_diff.f1_diff > 0])

In [None]:
print("Performance Improved  :: ", len(set(list(parnn_rn_diff.relation[parnn_rn_diff.f1_diff > 0])).intersection(
        set(list(cgcn_rn_diff.relation[cgcn_rn_diff.f1_diff > 0])))))
print("Performance Declined  :: ", len(set(list(parnn_rn_diff.relation[parnn_rn_diff.f1_diff < 0])).intersection(
        set(list(cgcn_rn_diff.relation[cgcn_rn_diff.f1_diff < 0])))))
print("Performance Remained Same  :: ", len(set(list(parnn_rn_diff.relation[parnn_rn_diff.f1_diff == 0])).intersection(
        set(list(cgcn_rn_diff.relation[cgcn_rn_diff.f1_diff == 0])))))

### Reannotating Noisy Negative and Positive Instances

In [None]:
cgcn_rnp = pd.read_csv('cgcn/results/prediction_stats/cgcn-replace-posneg.csv')

cgcn_rnp_diff = pd.DataFrame({'relation':cgcn.relation,
                              'f1_diff':cgcn_rnp.f1 - cgcn.f1})
# cgcn_rnp_diff

In [None]:
print("Performance Improved  :: ", len(cgcn_rnp_diff[cgcn_rnp_diff.f1_diff > 0]))
print("Performance Declined  :: ", len(cgcn_rnp_diff[cgcn_rnp_diff.f1_diff < 0]))
print("Performance Remained Same  :: ", len(cgcn_rnp_diff[cgcn_rnp_diff.f1_diff == 0]))

In [None]:
parnn_rnp = pd.read_csv('tacred/results/prediction_stats/parnn-replace-posneg.csv')

parnn_rnp_diff = pd.DataFrame({'relation':parnn.relation,
                              'f1_diff':parnn_rnp.f1 - parnn.f1})
# parnn_rnp_diff

In [None]:
print("Performance Improved  :: ", len(parnn_rnp_diff[parnn_rnp_diff.f1_diff > 0]))
print("Performance Declined  :: ", len(parnn_rnp_diff[parnn_rnp_diff.f1_diff < 0]))
print("Performance Remained Same  :: ", len(parnn_rnp_diff[parnn_rnp_diff.f1_diff == 0]))

In [None]:
rnpi = set(list(parnn_rnp_diff.relation[parnn_rnp_diff.f1_diff > 0])).intersection(
        set(list(cgcn_rnp_diff.relation[cgcn_rnp_diff.f1_diff > 0])))
print("Performance Improved  :: ", rnpi)
rnpd = set(list(parnn_rnp_diff.relation[parnn_rnp_diff.f1_diff < 0])).intersection(
        set(list(cgcn_rnp_diff.relation[cgcn_rnp_diff.f1_diff < 0])))
print("Performance Declined  :: ", rnpd)
rnps = set(list(parnn_rnp_diff.relation[parnn_rnp_diff.f1_diff == 0])).intersection(
        set(list(cgcn_rnp_diff.relation[cgcn_rnp_diff.f1_diff == 0])))
print("Performance Remained Same  :: ", rnps)

In [None]:
print("Performance Improved  :: ", len(set(list(parnn_rnp_diff.relation[parnn_rnp_diff.f1_diff > 0])).intersection(
        set(list(cgcn_rnp_diff.relation[cgcn_rnp_diff.f1_diff > 0])))))
print("Performance Declined  :: ", len(set(list(parnn_rnp_diff.relation[parnn_rnp_diff.f1_diff < 0])).intersection(
        set(list(cgcn_rnp_diff.relation[cgcn_rnp_diff.f1_diff < 0])))))
print("Performance Remained Same  :: ", len(set(list(parnn_rnp_diff.relation[parnn_rnp_diff.f1_diff == 0])).intersection(
        set(list(cgcn_rnp_diff.relation[cgcn_rnp_diff.f1_diff == 0])))))

In [None]:
imp = list(eni.intersection(enpi).intersection(rni).intersection(rnpi))
imp

In [None]:
dec = list(end.intersection(enpd).intersection(rnd).intersection(rnpd))
dec

In [None]:
sam = list(ens.intersection(enps).intersection(rns).intersection(rnps))
sam

In [None]:
len(list(rni.intersection(rnpi)))

## Robustness Analysis

### Eliminating Noisy Negative Instances

In [None]:
pnoisy = pickle.load(open('./tacred/Pallneg_noisy_test_ids.pkl', 'rb'))
pnoisy = pnoisy['test']
print("Number of noisy instances  ::  {}".format(len(pnoisy)))

p23noisy = pickle.load(open('./tacred/P23allneg_noisy_test_ids.pkl', 'rb'))
p23noisy = p23noisy['test']
print("Number of noisy instances  ::  {}".format(len(p23noisy)))

p78noisy = pickle.load(open('./tacred/P78allneg_noisy_test_ids.pkl', 'rb'))
p78noisy = p78noisy['test']
print("Number of noisy instances  ::  {}".format(len(p78noisy)))

p61noisy = pickle.load(open('./tacred/P61allneg_noisy_test_ids.pkl', 'rb'))
p61noisy = p61noisy['test']
print("Number of noisy instances  ::  {}".format(len(p61noisy)))

p12noisy = pickle.load(open('./tacred/P12allneg_noisy_test_ids.pkl', 'rb'))
p12noisy = p12noisy['test']
print("Number of noisy instances  ::  {}".format(len(p12noisy)))

In [None]:
len(set(pnoisy).intersection(set(p23noisy)))

In [None]:
len(set(pnoisy).intersection(set(p78noisy)))

In [None]:
len(set(pnoisy).intersection(set(p61noisy)))

In [None]:
len(set(pnoisy).intersection(set(p12noisy)))

In [None]:
len(set(pnoisy).intersection(set(p23noisy)).intersection(set(p78noisy)).intersection(set(p61noisy)).intersection(set(p12noisy)))

### Reannotating Noisy Negative Instances

In [None]:
pnoisy = pickle.load(open('./tacred/Pallneg_relabel_ids.pkl', 'rb'))
pnoisy = pnoisy['test']
print("Number of noisy instances  ::  {}".format(len(pnoisy)))

p23noisy = pickle.load(open('./tacred/P23allneg_relabel_test_ids.pkl', 'rb'))
p23noisy = p23noisy['test']
print("Number of noisy instances  ::  {}".format(len(p23noisy)))

p78noisy = pickle.load(open('./tacred/P78allneg_relabel_test_ids.pkl', 'rb'))
p78noisy = p78noisy['test']
print("Number of noisy instances  ::  {}".format(len(p78noisy)))

p61noisy = pickle.load(open('./tacred/P61allneg_relabel_test_ids.pkl', 'rb'))
p61noisy = p61noisy['test']
print("Number of noisy instances  ::  {}".format(len(p61noisy)))

p12noisy = pickle.load(open('./tacred/P12allneg_relabel_test_ids.pkl', 'rb'))
p12noisy = p12noisy['test']
print("Number of noisy instances  ::  {}".format(len(p12noisy)))

In [None]:
len(set(pnoisy.keys()).intersection(set(p23noisy.keys())))

In [None]:
len(set(pnoisy.keys()).intersection(set(p78noisy.keys())))

In [None]:
len(set(pnoisy.keys()).intersection(set(p61noisy.keys())))

In [None]:
len(set(pnoisy.keys()).intersection(set(p12noisy.keys())))

In [None]:
len(set(pnoisy.keys()).intersection(set(p78noisy.keys())).intersection(set(p61noisy.keys())).intersection(set(p23noisy.keys())).intersection(set(p12noisy.keys())))

## Finding common instance with hierarchy-based filtering

### Elimination Instances

In [32]:
hierarchy_filtered.keys()

dict_keys(['train', 'dev', 'test'])

In [37]:
parnn = pickle.load(open('./tacred/Pnegpos_noisy_ids.pkl', 'rb'))
parnn_test = pickle.load(open('./tacred/Pnegpos_noisy_test_ids.pkl', 'rb'))
parnn['test'] = parnn_test['test']
parnn.keys()

dict_keys(['train', 'dev', 'test'])

In [41]:
cgcn = pickle.load(open('./cgcn/negpos_noisy_ids.pkl', 'rb'))
cgcn_test = pickle.load(open('./cgcn/negpos_noisy_test_ids.pkl', 'rb'))
cgcn['test'] = cgcn_test['test']
cgcn.keys()

dict_keys(['train', 'dev', 'test'])

In [46]:
noisy = {
    key : set(parnn[key]).intersection(cgcn[key]) for key in hierarchy_filtered.keys()
}
noisy.keys()

dict_keys(['train', 'dev', 'test'])

In [47]:
for key, val in hierarchy_filtered.items():
    common = [v for v in val if v in noisy[key]]
    print(key, len(common), len(val), len(noisy[key]))

train 0 117 797
dev 8 35 892
test 2 12 520


### Reannotation Instances

In [48]:
parnn = pickle.load(open('./tacred/Pnegpos_relabel_ids.pkl', 'rb'))
parnn.keys()

dict_keys(['train', 'dev', 'test'])

In [49]:
cgcn = pickle.load(open('./cgcn/negpos_relabel_ids.pkl', 'rb'))
cgcn.keys()

dict_keys(['train', 'dev', 'test'])

In [50]:
noisy = {
    key : set(parnn[key]).intersection(cgcn[key]) for key in hierarchy_filtered.keys()
}
noisy.keys()

dict_keys(['train', 'dev', 'test'])

In [51]:
for key, val in hierarchy_filtered.items():
    common = [v for v in val if v in noisy[key]]
    print(key, len(common), len(val), len(noisy[key]))

train 7 117 1166
dev 8 35 703
test 2 12 408
