In [2]:
import os, json
from sklearn.metrics import confusion_matrix
from sklearn.metrics.cluster import normalized_mutual_info_score
from sklearn.metrics import confusion_matrix
from seqeval.metrics import classification_report, accuracy_score, f1_score, recall_score, precision_score
from seqeval.scheme import IOB2

In [3]:
with open("topicRank_KW.json", "r") as infile:
    topicPartiteRank_kws = json.loads(infile.read()) 
    
with open("multiPartiteRank_KW.json", "r") as infile:
    multiPartiteRank_kws = json.loads(infile.read())
    
with open("positionRank_KW.json", "r") as infile:
    positionRank_kws = json.loads(infile.read())

with open("scispacy_KW.json", "r") as infile:
    scispacy_kws = json.loads(infile.read())
    
with open("processedTexts.json", "r") as infile:
    processed_texts = json.loads(infile.read())

In [4]:
files = os.listdir("/home/rgoli/MetaMap-src/data/gold_standard/AnnotationResults_Abstracts/done")
manual_pmids = [file.split('.')[0][4:] for file in files if file!='.ipynb_checkpoints']
pmid_manual_kw={}
for file in files:
    if file=='.ipynb_checkpoints': continue
    with open("/home/rgoli/MetaMap-src/data/gold_standard/AnnotationResults_Abstracts/done/"+file,'r') as fp:
        temp = fp.read().splitlines()
        pmid_manual_kw[temp[0].strip()]=temp[1:]

In [7]:
def get_metrics(text_dict, kws_dict, gs_dict):
    ## For confusion matrix
    test_mappers = []
    gs_mappers = []
    
    ## Iterate over PMID texts and get mappingf of keywords
    for pmid,text in text_dict.items():
        text_len = len(text)
        
        test_kws = [x.split() for x in kws_dict[pmid]]
        gs_kws = [x.split() for x in gs_dict[pmid]]
        
        result = []
        test_mapper = [0]*text_len
        gs_mapper = [0]*text_len
        
        for kw in test_kws:
            kw_len = len(kw)
            i=0
            while i<text_len-kw_len:
                if text[i:i+kw_len]==kw and test_mapper[i:i+kw_len]==[0]*kw_len:
                    test_mapper[i:i+kw_len]=[1]*kw_len
                i+=1
                
        for kw in gs_kws:
            kw_len=len(kw)
            i=0
            while i<text_len-kw_len:
                if text[i:i+kw_len]==kw and gs_mapper[i:i+kw_len]==[0]*kw_len:
                    gs_mapper[i:i+kw_len]=[1]*kw_len
                i+=1 
        test_mappers.append(test_mapper)
        gs_mappers.append(gs_mapper)
        
    ## Calculate Metrics from Text Mappers
    TP, TN, FN, FP = 0, 0, 0, 0
    N = len(text_dict.keys())
    i=0
    nmi_arr=[]
    while i<N:
        matrix = confusion_matrix(gs_mappers[i],test_mappers[i], labels=[1,0])
        nmi_arr.append(normalized_mutual_info_score(gs_mappers[i],test_mappers[i]))
        TP += matrix[0][0]
        FN += matrix[0][1]
        FP += matrix[1][0]
        TN += matrix[1][1]

        i+=1

    nmi = sum(nmi_arr)/len(nmi_arr)
        
    accuracy = (TP+TN)/(TP+FN+FP+TN)
    misclassification = (FP+FN)/(TP+TN+FP+FN)
    precision = TP/(TP+FP)
    sensitivity = TP/(TP+FN)
    specificity = TN/(TN+FP)
    f1score=(2 * precision * sensitivity) / (precision + sensitivity)
    print("""
    Accuracy:\t{}
    Misclassification:\t{}
    Precision:\t{}
    Sensitivity/Recall:\t{}
    Specificity:\t{}
    F1 Score:\t{}
    NMI:\t{}""".format(accuracy,misclassification,precision, sensitivity,specificity,f1score,nmi))
    
    # print(classification_report(gs_mappers, test_mappers, mode='strict', scheme=IOB2))
    # print("Precision given by SeqEval: {:.2f}%".format(precision_score(gs_mappers, test_mappers)*100))
    # print("Recall given by SeqEval: {:.2f}%".format(recall_score(gs_mappers, test_mappers)*100))
    # print("F1-Score given by SeqEval: {:.2f}%".format(f1_score(gs_mappers, test_mappers)*100))
    # print("Accuracy given by SeqEval: {:.2f}%".format(accuracy_score(gs_mappers, test_mappers)*100))    

In [8]:
print("Metrics for sciSpacy")
get_metrics(processed_texts,scispacy_kws,pmid_manual_kw)

Metrics for sciSpacy

    Accuracy:	0.6882071144633575
    Misclassification:	0.31179288553664253
    Precision:	0.3625648279113626
    Sensitivity/Recall:	0.8124669836238775
    Specificity:	0.6584996211164436
    F1 Score:	0.5013854930725347
    NMI:	0.1395377814386023


In [22]:
print("Metrics for Position Rank")
get_metrics(processed_texts,positionRank_kws,pmid_manual_kw)

Metrics for Position Rank

    Accuracy:	0.7676077871776578
    Misclassification:	0.23239221282234226
    Precision:	0.39135317237507017
    Sensitivity/Recall:	0.3681986265187533
    Specificity:	0.8630967416014145
    F1 Score:	0.3794229722373435
    NMI:	0.06926592253284956


In [23]:
print("Metrics for Multi Partite Rank")
get_metrics(processed_texts,multiPartiteRank_kws,pmid_manual_kw)

Metrics for Multi Partite Rank

    Accuracy:	0.7606767913566405
    Misclassification:	0.2393232086433595
    Precision:	0.3747936158503027
    Sensitivity/Recall:	0.35974643423137875
    Specificity:	0.8565294266228846
    F1 Score:	0.3671159029649596
    NMI:	0.05519556739959275


In [24]:
print("Metrics for Topic Rank")
get_metrics(processed_texts,topicPartiteRank_kws,pmid_manual_kw)

Metrics for Topic Rank

    Accuracy:	0.7690347569055143
    Misclassification:	0.2309652430944858
    Precision:	0.391758560650029
    Sensitivity/Recall:	0.35657686212361334
    Specificity:	0.8676433442788583
    F1 Score:	0.37334070796460184
    NMI:	0.057050633351478384
