In [1]:
import sys
# caution: path[0] is reserved for script path (or '' in REPL)
sys.path.insert(1, '../modules/')

In [2]:
import json

def read_json_array(path):
    '''Read JSON given array of dictionaries'''
    with open(path, "r", encoding="utf-8") as fp:
        datas = json.loads(fp.read())
    return datas

trainData = read_json_array("../data/pubmed_data/json_data_synthetic_labels/v2_train.json")
testData = read_json_array("../data/pubmed_data/json_data_synthetic_labels/v2_test.json")
gs40Data = read_json_array("../data/pubmed_data/json_data_synthetic_labels/testgs.json")
gs91Data = read_json_array("../data/pubmed_data/json_data_synthetic_labels/v2_testgs91.json")

In [7]:
import unidecode
import contractions
from nltk import sent_tokenize, word_tokenize
import nltk
import logging
import re
import string

logger = logging.getLogger(__name__)
stop_words = nltk.corpus.stopwords.words('english')
#deselect 'no' and 'not' from stop words
stop_words.remove('no')
stop_words.remove('not')

## Do not remove '.' ',' ';' ':'
english_punctuations = ['``','?', '（','）','(', ')',
                '[', ']', '&', '!', '*', '@', '#', '$', '%','\\','\"','}','{','=','/','>','<','|','+','_','~']
english_punctuations_arr= ''.join(english_punctuations)

def sent2words(sent):
    '''Pre-process sentence text to words'''
    
    ## Expand Words
    sent = contractions.fix(sent)
    
    ## Convert sentence to lowercase and strip whitespaces
    sent = sent.lower().strip()
    
    ## Convert unicode characters in sentence to ASCII
    sent = unidecode.unidecode(sent)
    sent = sent.encode("ascii", "ignore").decode()
    
    ## Remove URL's from Sentence
    sent = re.sub('http[s]?://\S+', '', sent)
    
    ## Remove words like 2.0-2
    sent = re.sub(' \d*\.*\d*\-+\d*\.*\d* ', ' ', sent)
    
    # Remove punctuation characters
    sent = sent.translate(str.maketrans(english_punctuations_arr,' '*len(english_punctuations_arr)))
    
    pos, length = {}, {}
    words = word_tokenize(sent)
    for i, (w, p) in enumerate(nltk.pos_tag(words)):
        # Computational features
        pos[w] = p
        length[w] = len(w)
    
    # Remove Stop Words
    # words = [word for word in word_tokenize(sent) if not word in stop_words]
    
    # Remove punctuation only strings from sentence
    words = [word for word in words if not all(c in string.punctuation for c in word) and len(word)>1]
    
    # Remove Numericals from sentence
    words = [x for x in words if not all(c.isdigit() for c in x)] 
    
    ## Return words, POS tags, lengths of words, processed sentence
    return words, pos, length, ' '.join(words)

# Mark Keyphrases with BIO Format Labels
def mark_keyword_all_sentences(keywords, sentences):

    sentence_lengths = [len(sent) for sent in sentences]
    logger.debug("Sentence length: %s"%sentence_lengths)

    complete_text = []
    for sent in sentences:
        complete_text.extend(sent)
    logger.debug("Complete Text: %s"%complete_text)

    complete_text_len = len(complete_text)
    mapper = ['O']*complete_text_len
    kws = [sent2words(x)[0] for x in keywords]

    for kw in kws:
        kw_len=len(kw)
        if kw_len == 0:
            continue
        i=0
        while i<complete_text_len-kw_len:
            if complete_text[i:i+kw_len]==kw and mapper[i:i+kw_len]==['O']*kw_len:
                mapper[i:i+kw_len]=['I-KP']*kw_len
                mapper[i]='B-KP'
            i+=1

    final_mapper = []
    final_tokens = []
    start=0

    for slen in sentence_lengths:
        final_mapper.append(mapper[start:start+slen])
        final_tokens.append(complete_text[start:start+slen])
        start+=slen
    
    return complete_text, final_mapper, final_tokens

In [13]:
def createBERTtSV(data,filename):
    # import csv
    # trainCSV = "../data/trainBERT.tsv"
    f = open(filename,'w')
    f.close()

    for article in data:
        textData = []
        for sent in sent_tokenize(article['title']+' '+article['abstract']):
            words, _, _, _ = sent2words(sent)
            # words.append('.')
            textData.append(words)
        # textData = [word_tokenize(sent) for sent in sent_tokenize(article['title']+' '+article['abstract'])]
        #print(textData)
        _, mapper, tokens = mark_keyword_all_sentences(article['keywords'],textData)
        with open(filename,'a') as f:
        # with open(trainCSV, 'w', encoding='utf8', newline='') as tsv_file:
        #     tsv_writer = csv.writer(tsv_file, delimiter='\t', lineterminator='\n')
        #     tsv_writer.writerow(["Word", "Count"])
            print("-DOCSTART- -X- O O", file=f)
            for sentM, sentT in zip(mapper, tokens):
                for wordM, wordT in zip(sentM, sentT):
                    print("{}\t{}".format(wordT,wordM), file=f)
        # print(mapper, tokens)

In [14]:
totalData = trainData+testData
datalen = len(totalData)
print("Data: {}, Train: {} Val: {}".format(datalen, datalen*0.8, datalen*0.2))
createBERTtSV(totalData[:int(0.8*datalen)],'../data/trainBERT.tsv')
createBERTtSV(totalData[int(0.8*datalen):],'../data/valBERT.tsv')
createBERTtSV(gs40Data,'../data/finetuneBERT.tsv')
gs_datalen = len(gs40Data)
print("GS Data: {}, GS Train: {} GS Val: {}".format(gs_datalen, gs_datalen*0.8, gs_datalen*0.2))
createBERTtSV(gs40Data[:int(0.8*gs_datalen)],'../data/finetuneBERT_T.tsv')
createBERTtSV(gs40Data[int(0.8*gs_datalen):],'../data/finetuneBERT_V.tsv')
createBERTtSV(gs91Data,'../data/testBERT.tsv')

Data: 3110, Train: 2488.0 Val: 622.0
GS Data: 42, GS Train: 33.6 GS Val: 8.4


In [32]:
# python -m spacy convert trainBERT.tsv ./ -t json -n 1 -c iob
# python -m spacy convert valBERT.tsv ./ -t json -n 1 -c iob
# python -m spacy convert finetuneBERT.tsv ./ -t json -n 1 -c iob
# python -m spacy convert finetuneBERT_T.tsv ./ -t json -n 1 -c iob
# python -m spacy convert finetuneBERT_V.tsv ./ -t json -n 1 -c iob
# python -m spacy convert testBERT.tsv ./ -t json -n 1 -c iob

In [33]:
# python -m spacy convert trainBERT.json ./ -t spacy
# python -m spacy convert valBERT.json ./ -t spacy
# python -m spacy convert finetuneBERT.json ./ -t spacy
# python -m spacy convert finetuneBERT_T.json ./ -t spacy
# python -m spacy convert finetuneBERT_V.json ./ -t spacy
# python -m spacy convert testBERT.json ./ -t spacy

In [11]:
# (pytorch) [rgoli@node0092 MetaMap-src]$ python -m spacy init fill-config base_config.cfg config.cfg
# (pytorch) [rgoli@node0092 MetaMap-src]$ python -m spacy train -g 0 keywordExtraction/config.cfg --output ./cdssBERToutput --paths.train ./data/trainBERT.spacy --paths.dev ./data/valBERT.spacy 
# ℹ Saving to output directory: cdssBERToutput
# ℹ Using GPU: 0

# =========================== Initializing pipeline ===========================
# [2022-08-21 16:30:21,565] [INFO] Set up nlp object from config
# [2022-08-21 16:30:21,573] [INFO] Pipeline: ['transformer', 'ner']
# [2022-08-21 16:30:21,576] [INFO] Created vocabulary
# [2022-08-21 16:30:21,577] [INFO] Finished initializing nlp object
# Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias']
# - This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
# - This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
# [2022-08-21 16:30:40,873] [INFO] Initialized pipeline components: ['transformer', 'ner']
# ✔ Initialized pipeline

# ============================= Training pipeline =============================
# ℹ Pipeline: ['transformer', 'ner']
# ℹ Initial learn rate: 0.0
# E    #       LOSS TRANS...  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
# ---  ------  -------------  --------  ------  ------  ------  ------
#   0       0       11835.34    813.72    0.94    2.13    0.60    0.01

#   0     200      368174.14  86589.40   66.50   69.84   63.46    0.66
#   0     400       10601.42  16174.71   81.82   81.70   81.95    0.82
#   1     600        3710.65   4980.88   75.95   64.41   92.55    0.76
#   1     800        4241.40   5441.69   87.19   83.35   91.39    0.87
#   2    1000        2000.63   2331.30   89.06   91.56   86.69    0.89
#   2    1200        2140.98   2292.71   89.96   90.97   88.98    0.90
#   3    1400        2050.20   2001.31   89.92   92.31   87.64    0.90
#   3    1600        1542.22   1511.29   91.90   91.72   92.07    0.92
#   4    1800        1634.71   1524.12   91.61   90.44   92.81    0.92
#   4    2000        1514.30   1392.00   91.19   89.42   93.04    0.91
#   5    2200        1266.26   1175.33   90.58   86.93   94.55    0.91
#   5    2400        1230.94   1113.63   92.44   92.49   92.38    0.92
#   6    2600        1191.09   1025.46   92.35   90.89   93.87    0.92
#   6    2800        1018.01    887.95   92.21   89.29   95.32    0.92
#   7    3000         741.42    681.03   92.45   91.14   93.81    0.92
#   7    3200        1176.01    904.78   92.35   90.27   94.53    0.92
#   8    3400        1003.15    802.40   92.36   90.25   94.56    0.92
#   8    3600         746.13    616.04   92.11   90.42   93.87    0.92
#   9    3800         885.09    706.52   92.16   90.00   94.43    0.92
#   9    4000         518.79    446.13   91.90   88.98   95.01    0.92
#   9    4200         731.12    590.08   91.22   88.17   94.48    0.91
#  10    4400         514.87    433.87   92.89   91.64   94.18    0.93
#  10    4600         469.26    420.22   91.79   89.71   93.96    0.92
#  11    4800         376.35    356.56   91.44   89.40   93.59    0.91
#  11    5000         519.54    433.74   92.48   90.62   94.42    0.92
#  12    5200         488.42    404.42   90.82   87.42   94.48    0.91
#  12    5400         244.71    248.49   91.50   87.72   95.63    0.92
#  13    5600         222.53    252.38   92.75   91.73   93.79    0.93
#  13    5800         302.91    291.42   92.11   89.51   94.87    0.92
#  14    6000         600.91    489.17   92.24   90.16   94.40    0.92
# ✔ Saved pipeline to output directory
# cdssBERToutput/model-last
# (pytorch) [rgoli@node0092 MetaMap-src]$ 

In [12]:
!pwd

/home/rgoli/MetaMap-src/keywordExtraction


In [4]:
import spacy
nlp = spacy.load('../cdssBERToutput/model-best')

In [8]:
def getTargetPreds(data):
    comb_arr = []
    for article in data:
        comb_arr.append(article['title']+' '+article['abstract'])

    y_preds, y_targets = [], []
    for idx, doc in enumerate(nlp.pipe(comb_arr)):

        kws = [str(x) for x in doc.ents]
        textData = []
        for sent in sent_tokenize(article['title']+' '+article['abstract']):
            words, _, _, _ = sent2words(sent)
            textData.append(words)
        # textData = [word_tokenize(sent) for sent in sent_tokenize(data[idx]['title']+' '+data[idx]['abstract'])]

        _, target, _ = mark_keyword_all_sentences(data[idx]['keywords'],textData)
        _, predict, _ = mark_keyword_all_sentences(kws,textData)
        #print(gs40Data[idx]['id'],gs40Data[idx]['keywords'],kws,sep='\n')

        #print(target,predict,sep='\n')
        y_preds.extend(predict)
        y_targets.extend(target)
    
    return y_preds, y_targets

y_preds, y_targets = getTargetPreds(gs40Data)

In [37]:
from sklearn.metrics import confusion_matrix
from seqeval.metrics import classification_report, accuracy_score, f1_score, recall_score, precision_score
from seqeval.scheme import IOB2
print('GS42 SeqEval Metrics on roberta-base:')
print(classification_report(y_targets, y_preds, mode='strict', scheme=IOB2))
print("Precision given by SeqEval: {:.2f}%".format(precision_score(y_targets, y_preds)*100))
print("Recall given by SeqEval: {:.2f}%".format(recall_score(y_targets, y_preds)*100))
print("F1-Score given by SeqEval: {:.2f}%".format(f1_score(y_targets, y_preds)*100))
print("Accuracy given by SeqEval: {:.2f}%".format(accuracy_score(y_targets, y_preds)*100))

GS42 SeqEval Metrics:
              precision    recall  f1-score   support

          KP       0.41      0.76      0.53       197

   micro avg       0.41      0.76      0.53       197
   macro avg       0.41      0.76      0.53       197
weighted avg       0.41      0.76      0.53       197

Precision given by SeqEval: 40.98%
Recall given by SeqEval: 76.14%
F1-Score given by SeqEval: 53.29%
Accuracy given by SeqEval: 96.69%


In [38]:
y_preds, y_targets = getTargetPreds(gs91Data)
print('GS91 SeqEval Metrics on roberta-base:')
print(classification_report(y_targets, y_preds, mode='strict', scheme=IOB2))
print("Precision given by SeqEval: {:.2f}%".format(precision_score(y_targets, y_preds)*100))
print("Recall given by SeqEval: {:.2f}%".format(recall_score(y_targets, y_preds)*100))
print("F1-Score given by SeqEval: {:.2f}%".format(f1_score(y_targets, y_preds)*100))
print("Accuracy given by SeqEval: {:.2f}%".format(accuracy_score(y_targets, y_preds)*100))

GS91 SeqEval Metrics:
              precision    recall  f1-score   support

          KP       0.61      0.86      0.71       347

   micro avg       0.61      0.86      0.71       347
   macro avg       0.61      0.86      0.71       347
weighted avg       0.61      0.86      0.71       347

Precision given by SeqEval: 61.19%
Recall given by SeqEval: 85.88%
F1-Score given by SeqEval: 71.46%
Accuracy given by SeqEval: 98.93%


In [7]:
from sklearn.metrics import confusion_matrix
from seqeval.metrics import classification_report, accuracy_score, f1_score, recall_score, precision_score
from seqeval.scheme import IOB2
print('GS42 SeqEval Metrics on allenai/scibert_scivocab_cased:')
print(classification_report(y_targets, y_preds, mode='strict', scheme=IOB2))
print("Precision given by SeqEval: {:.2f}%".format(precision_score(y_targets, y_preds)*100))
print("Recall given by SeqEval: {:.2f}%".format(recall_score(y_targets, y_preds)*100))
print("F1-Score given by SeqEval: {:.2f}%".format(f1_score(y_targets, y_preds)*100))
print("Accuracy given by SeqEval: {:.2f}%".format(accuracy_score(y_targets, y_preds)*100))

GS42 SeqEval Metrics on allenai/scibert_scivocab_cased:
              precision    recall  f1-score   support

          KP       0.43      0.73      0.54       197

   micro avg       0.43      0.73      0.54       197
   macro avg       0.43      0.73      0.54       197
weighted avg       0.43      0.73      0.54       197

Precision given by SeqEval: 43.33%
Recall given by SeqEval: 72.59%
F1-Score given by SeqEval: 54.27%
Accuracy given by SeqEval: 97.09%


In [8]:
y_preds, y_targets = getTargetPreds(gs91Data)
print('GS91 SeqEval Metrics on allenai/scibert_scivocab_cased:')
print(classification_report(y_targets, y_preds, mode='strict', scheme=IOB2))
print("Precision given by SeqEval: {:.2f}%".format(precision_score(y_targets, y_preds)*100))
print("Recall given by SeqEval: {:.2f}%".format(recall_score(y_targets, y_preds)*100))
print("F1-Score given by SeqEval: {:.2f}%".format(f1_score(y_targets, y_preds)*100))
print("Accuracy given by SeqEval: {:.2f}%".format(accuracy_score(y_targets, y_preds)*100))

GS91 SeqEval Metrics on allenai/scibert_scivocab_cased:
              precision    recall  f1-score   support

          KP       0.76      0.87      0.81       347

   micro avg       0.76      0.87      0.81       347
   macro avg       0.76      0.87      0.81       347
weighted avg       0.76      0.87      0.81       347

Precision given by SeqEval: 76.01%
Recall given by SeqEval: 86.74%
F1-Score given by SeqEval: 81.02%
Accuracy given by SeqEval: 99.37%


In [3]:
import spacy
from sklearn.metrics import confusion_matrix
from seqeval.metrics import classification_report, accuracy_score, f1_score, recall_score, precision_score
from seqeval.scheme import IOB2

In [12]:
nlp = spacy.load('../cdssBER42GS/model-best')
y_preds, y_targets = getTargetPreds(gs91Data)
print('GS91 SeqEval Metrics on allenai/scibert_scivocab_cased + 42GS Train + 91GS Test:\n')
print(classification_report(y_targets, y_preds, mode='strict', scheme=IOB2))
print("Precision given by SeqEval: {:.2f}%".format(precision_score(y_targets, y_preds)*100))
print("Recall given by SeqEval: {:.2f}%".format(recall_score(y_targets, y_preds)*100))
print("F1-Score given by SeqEval: {:.2f}%".format(f1_score(y_targets, y_preds)*100))
print("Accuracy given by SeqEval: {:.2f}%".format(accuracy_score(y_targets, y_preds)*100))

GS91 SeqEval Metrics on allenai/scibert_scivocab_cased + 42GS Train + 91GS Test:

              precision    recall  f1-score   support

          KP       0.70      0.58      0.64       347

   micro avg       0.70      0.58      0.64       347
   macro avg       0.70      0.58      0.64       347
weighted avg       0.70      0.58      0.64       347

Precision given by SeqEval: 69.90%
Recall given by SeqEval: 58.21%
F1-Score given by SeqEval: 63.52%
Accuracy given by SeqEval: 98.93%


In [13]:
nlp = spacy.load('../cdssBER91GS/model-best')
y_preds, y_targets = getTargetPreds(gs40Data)
print('GS91 SeqEval Metrics on allenai/scibert_scivocab_cased + 91GS Train + 42GS Test:\n')
print(classification_report(y_targets, y_preds, mode='strict', scheme=IOB2))
print("Precision given by SeqEval: {:.2f}%".format(precision_score(y_targets, y_preds)*100))
print("Recall given by SeqEval: {:.2f}%".format(recall_score(y_targets, y_preds)*100))
print("F1-Score given by SeqEval: {:.2f}%".format(f1_score(y_targets, y_preds)*100))
print("Accuracy given by SeqEval: {:.2f}%".format(accuracy_score(y_targets, y_preds)*100))

GS91 SeqEval Metrics on allenai/scibert_scivocab_cased + 91GS Train + 42GS Test:

              precision    recall  f1-score   support

          KP       0.47      0.71      0.57       197

   micro avg       0.47      0.71      0.57       197
   macro avg       0.47      0.71      0.57       197
weighted avg       0.47      0.71      0.57       197

Precision given by SeqEval: 47.46%
Recall given by SeqEval: 71.07%
F1-Score given by SeqEval: 56.91%
Accuracy given by SeqEval: 97.26%


In [14]:
nlp = spacy.load('en_core_sci_lg')
y_preds, y_targets = getTargetPreds(gs91Data)
print('\nGS91 SeqEval Metrics on en_core_sci_lg + 91GS Test:\n')
print(classification_report(y_targets, y_preds, mode='strict', scheme=IOB2))
print("Precision given by SeqEval: {:.2f}%".format(precision_score(y_targets, y_preds)*100))
print("Recall given by SeqEval: {:.2f}%".format(recall_score(y_targets, y_preds)*100))
print("F1-Score given by SeqEval: {:.2f}%".format(f1_score(y_targets, y_preds)*100))
print("Accuracy given by SeqEval: {:.2f}%".format(accuracy_score(y_targets, y_preds)*100))

y_preds, y_targets = getTargetPreds(gs40Data)
print('\nGS91 SeqEval Metrics on en_core_sci_lg + 42GS Test:\n')
print(classification_report(y_targets, y_preds, mode='strict', scheme=IOB2))
print("Precision given by SeqEval: {:.2f}%".format(precision_score(y_targets, y_preds)*100))
print("Recall given by SeqEval: {:.2f}%".format(recall_score(y_targets, y_preds)*100))
print("F1-Score given by SeqEval: {:.2f}%".format(f1_score(y_targets, y_preds)*100))
print("Accuracy given by SeqEval: {:.2f}%".format(accuracy_score(y_targets, y_preds)*100))




GS91 SeqEval Metrics on en_core_sci_lg + 91GS Test:

              precision    recall  f1-score   support

          KP       0.23      0.59      0.33       347

   micro avg       0.23      0.59      0.33       347
   macro avg       0.23      0.59      0.33       347
weighted avg       0.23      0.59      0.33       347

Precision given by SeqEval: 22.67%
Recall given by SeqEval: 58.79%
F1-Score given by SeqEval: 32.72%
Accuracy given by SeqEval: 96.49%

GS91 SeqEval Metrics on en_core_sci_lg + 42GS Test:

              precision    recall  f1-score   support

          KP       0.18      0.62      0.28       197

   micro avg       0.18      0.62      0.28       197
   macro avg       0.18      0.62      0.28       197
weighted avg       0.18      0.62      0.28       197

Precision given by SeqEval: 18.07%
Recall given by SeqEval: 61.93%
F1-Score given by SeqEval: 27.98%
Accuracy given by SeqEval: 93.17%


In [11]:
nlp = spacy.load('../cdssSpacyBERToutput/model-best')
y_preds, y_targets = getTargetPreds(gs91Data)
print('\nSeqEval Metrics on en_core_sci_lg + Train/Val 42GS + 91GS Test:\n')
print(classification_report(y_targets, y_preds, mode='strict', scheme=IOB2))
print("Precision given by SeqEval: {:.2f}%".format(precision_score(y_targets, y_preds)*100))
print("Recall given by SeqEval: {:.2f}%".format(recall_score(y_targets, y_preds)*100))
print("F1-Score given by SeqEval: {:.2f}%".format(f1_score(y_targets, y_preds)*100))
print("Accuracy given by SeqEval: {:.2f}%".format(accuracy_score(y_targets, y_preds)*100))

y_preds, y_targets = getTargetPreds(gs40Data)
print('\nGS91 SeqEval Metrics on en_core_sci_lg + Train/Val 42GS + 42GS Test:\n')
print(classification_report(y_targets, y_preds, mode='strict', scheme=IOB2))
print("Precision given by SeqEval: {:.2f}%".format(precision_score(y_targets, y_preds)*100))
print("Recall given by SeqEval: {:.2f}%".format(recall_score(y_targets, y_preds)*100))
print("F1-Score given by SeqEval: {:.2f}%".format(f1_score(y_targets, y_preds)*100))
print("Accuracy given by SeqEval: {:.2f}%".format(accuracy_score(y_targets, y_preds)*100))


SeqEval Metrics on en_core_sci_lg + Train/Val 42GS + 91GS Test:

              precision    recall  f1-score   support

          KP       0.52      0.52      0.52       347

   micro avg       0.52      0.52      0.52       347
   macro avg       0.52      0.52      0.52       347
weighted avg       0.52      0.52      0.52       347

Precision given by SeqEval: 52.34%
Recall given by SeqEval: 51.59%
F1-Score given by SeqEval: 51.96%
Accuracy given by SeqEval: 98.53%

GS91 SeqEval Metrics on en_core_sci_lg + Train/Val 42GS + 42GS Test:

              precision    recall  f1-score   support

          KP       0.71      0.92      0.81       197

   micro avg       0.71      0.92      0.81       197
   macro avg       0.71      0.92      0.81       197
weighted avg       0.71      0.92      0.81       197

Precision given by SeqEval: 71.37%
Recall given by SeqEval: 92.39%
F1-Score given by SeqEval: 80.53%
Accuracy given by SeqEval: 99.00%


In [15]:
nlp = spacy.load('../cdssSpacyBERToutput2/model-best')
y_preds, y_targets = getTargetPreds(gs91Data)
print('\nSeqEval Metrics on en_core_sci_lg + Train:42GS Val:91GS + 91GS Test:\n')
print(classification_report(y_targets, y_preds, mode='strict', scheme=IOB2))
print("Precision given by SeqEval: {:.2f}%".format(precision_score(y_targets, y_preds)*100))
print("Recall given by SeqEval: {:.2f}%".format(recall_score(y_targets, y_preds)*100))
print("F1-Score given by SeqEval: {:.2f}%".format(f1_score(y_targets, y_preds)*100))
print("Accuracy given by SeqEval: {:.2f}%".format(accuracy_score(y_targets, y_preds)*100))

y_preds, y_targets = getTargetPreds(gs40Data)
print('\nGS91 SeqEval Metrics on en_core_sci_lg + Train:42GS Val:91GS + 42GS Test:\n')
print(classification_report(y_targets, y_preds, mode='strict', scheme=IOB2))
print("Precision given by SeqEval: {:.2f}%".format(precision_score(y_targets, y_preds)*100))
print("Recall given by SeqEval: {:.2f}%".format(recall_score(y_targets, y_preds)*100))
print("F1-Score given by SeqEval: {:.2f}%".format(f1_score(y_targets, y_preds)*100))
print("Accuracy given by SeqEval: {:.2f}%".format(accuracy_score(y_targets, y_preds)*100))


SeqEval Metrics on en_core_sci_lg + Train:42GS Val:91GS + 91GS Test:

              precision    recall  f1-score   support

          KP       0.60      0.51      0.55       347

   micro avg       0.60      0.51      0.55       347
   macro avg       0.60      0.51      0.55       347
weighted avg       0.60      0.51      0.55       347

Precision given by SeqEval: 59.80%
Recall given by SeqEval: 51.01%
F1-Score given by SeqEval: 55.05%
Accuracy given by SeqEval: 98.67%

GS91 SeqEval Metrics on en_core_sci_lg + Train:42GS Val:91GS + 42GS Test:

              precision    recall  f1-score   support

          KP       0.63      0.83      0.72       197

   micro avg       0.63      0.83      0.72       197
   macro avg       0.63      0.83      0.72       197
weighted avg       0.63      0.83      0.72       197

Precision given by SeqEval: 63.18%
Recall given by SeqEval: 82.74%
F1-Score given by SeqEval: 71.65%
Accuracy given by SeqEval: 98.38%


In [16]:
nlp = spacy.load('../cdssSpacyBERToutput3/model-best')
y_preds, y_targets = getTargetPreds(gs91Data)
print('\nSeqEval Metrics on en_core_sci_lg + Train:33GS Val:9GS + 91GS Test:\n')
print(classification_report(y_targets, y_preds, mode='strict', scheme=IOB2))
print("Precision given by SeqEval: {:.2f}%".format(precision_score(y_targets, y_preds)*100))
print("Recall given by SeqEval: {:.2f}%".format(recall_score(y_targets, y_preds)*100))
print("F1-Score given by SeqEval: {:.2f}%".format(f1_score(y_targets, y_preds)*100))
print("Accuracy given by SeqEval: {:.2f}%".format(accuracy_score(y_targets, y_preds)*100))

y_preds, y_targets = getTargetPreds(gs40Data)
print('\nGS91 SeqEval Metrics on en_core_sci_lg + Train:33GS Val:9GS + 42GS Test:\n')
print(classification_report(y_targets, y_preds, mode='strict', scheme=IOB2))
print("Precision given by SeqEval: {:.2f}%".format(precision_score(y_targets, y_preds)*100))
print("Recall given by SeqEval: {:.2f}%".format(recall_score(y_targets, y_preds)*100))
print("F1-Score given by SeqEval: {:.2f}%".format(f1_score(y_targets, y_preds)*100))
print("Accuracy given by SeqEval: {:.2f}%".format(accuracy_score(y_targets, y_preds)*100))


SeqEval Metrics on en_core_sci_lg + Train:33GS Val:9GS + 91GS Test:

              precision    recall  f1-score   support

          KP       0.68      0.51      0.58       347

   micro avg       0.68      0.51      0.58       347
   macro avg       0.68      0.51      0.58       347
weighted avg       0.68      0.51      0.58       347

Precision given by SeqEval: 67.95%
Recall given by SeqEval: 50.72%
F1-Score given by SeqEval: 58.09%
Accuracy given by SeqEval: 98.81%

GS91 SeqEval Metrics on en_core_sci_lg + Train:33GS Val:9GS + 42GS Test:

              precision    recall  f1-score   support

          KP       0.56      0.67      0.61       197

   micro avg       0.56      0.67      0.61       197
   macro avg       0.56      0.67      0.61       197
weighted avg       0.56      0.67      0.61       197

Precision given by SeqEval: 55.93%
Recall given by SeqEval: 67.01%
F1-Score given by SeqEval: 60.97%
Accuracy given by SeqEval: 97.83%
