# Evaluation MetaCAT - BiLSTM

In [1]:
from tokenizers import ByteLevelBPETokenizer
import os

from medcat.cat import CAT
from medcat.vocab import Vocab
from medcat.cdb import CDB
from medcat.config import Config
from medcat.meta_cat import MetaCAT

  from tqdm.autonotebook import tqdm


In [2]:
# Input
data_dir = os.path.join('..', 'data')
cdb_file = os.path.join(data_dir, 'cdb.dat')
vocab_file = os.path.join(data_dir, 'vocab.dat')

# Output
output_dir = 'output'

# Name should contain 'bbpe' for ByteLevelBPETokenizer or 'bert' for BertTokenizerFast
tokenizer_name = 'bbpe_dutch-wikipedia'

## Load tokenizer

In [3]:
# Create, train and save the tokenizer
mc_negation = MetaCAT()
mc_negation = mc_negation.load(save_dir=output_dir)

## Example usage

In [4]:
# Load the cdb and vocab 
config = Config()

vocab = Vocab.load(vocab_file)
cdb = CDB.load(cdb_file)

# Create MedCAT pipeline
cat = CAT(cdb=cdb, vocab=vocab, config=config, meta_cats=[mc_negation])

In [5]:
# Test on DL1114 from DCC with negation
text = 'Echo- en rontgenonderzoek van de heup toont geen evidente heupdysplasie.'
doc = cat(text)
for ent in doc.ents:
    print("Entity: " + ent.text)
    print("Meta Annotations: " + str(ent._.meta_anns))
    print("\n")

Entity: heup
Meta Annotations: {'Negation': {'value': 'not negated', 'confidence': 0.9889043, 'name': 'Negation'}}


Entity: heupdysplasie
Meta Annotations: {'Negation': {'value': 'negated', 'confidence': 0.9601904, 'name': 'Negation'}}




In [6]:
# Test on DL1114 from DCC without negation
text = 'Echo- en rontgenonderzoek van de heup toont evidente heupdysplasie.'
doc = cat(text)
for ent in doc.ents:
    print("Entity: " + ent.text)
    print("Meta Annotations: " + str(ent._.meta_anns))
    print("\n")

Entity: heup
Meta Annotations: {'Negation': {'value': 'not negated', 'confidence': 0.9992647, 'name': 'Negation'}}


Entity: heupdysplasie
Meta Annotations: {'Negation': {'value': 'not negated', 'confidence': 0.99737585, 'name': 'Negation'}}




## Additional testing

In [7]:
# Test on DL1112 from DCC
text = 'Patient kan zich geen trauma herinneren.'
doc = cat(text)
for ent in doc.ents:
    print("Entity: " + ent.text)
    print("Meta Annotations: " + str(ent._.meta_anns))
    print("\n")

# Trauma is not identified as medical concept.

In [8]:
# Test on NTvG article
# https://www.ntvg.nl/artikelen/acute-buik-op-basis-van-een-wandelende-milt
text = '1 maand na de operatie had patiënte geen buikpijn meer en was zij goed hersteld.'
doc = cat(text)
for ent in doc.ents:
    print("Entity: " + ent.text)
    print("Meta Annotations: " + str(ent._.meta_anns))
    print("\n")
    
# The negation was missed.

Entity: operatie
Meta Annotations: {'Negation': {'value': 'not negated', 'confidence': 0.99938786, 'name': 'Negation'}}


Entity: buikpijn
Meta Annotations: {'Negation': {'value': 'negated', 'confidence': 0.90848744, 'name': 'Negation'}}




In [9]:
# Test on NTvG article
# https://www.ntvg.nl/artikelen/een-bezoar-bij-een-vrouw-met-clomipramine-intoxicatie
text = 'Er waren geen tekenen van darmobstructie te zien.'
doc = cat(text)
for ent in doc.ents:
    print("Entity: " + ent.text)
    print("Meta Annotations: " + str(ent._.meta_anns))
    print("\n")

# Correct identification of negation

Entity: darmobstructie
Meta Annotations: {'Negation': {'value': 'negated', 'confidence': 0.6965348, 'name': 'Negation'}}


Entity: zien
Meta Annotations: {'Negation': {'value': 'negated', 'confidence': 0.62835985, 'name': 'Negation'}}




In [10]:
# Test on NTvG article
# https://www.ntvg.nl/artikelen/nieuws/vaker-ziek-na-acute-fase-covid-19
text = 'Alle patiënten werden gematcht met een controlegroep bij wie geen SARS-CoV-2-infectie was geregistreerd.'
doc = cat(text)
for ent in doc.ents:
    print("Entity: " + ent.text)
    print("Meta Annotations: " + str(ent._.meta_anns))
    print("\n")
    
# Negation on SARS-CoV was missed

Entity: patiënten
Meta Annotations: {'Negation': {'value': 'not negated', 'confidence': 0.99108285, 'name': 'Negation'}}


Entity: controlegroep
Meta Annotations: {'Negation': {'value': 'not negated', 'confidence': 0.9961759, 'name': 'Negation'}}


Entity: SARS-CoV
Meta Annotations: {'Negation': {'value': 'negated', 'confidence': 0.9215692, 'name': 'Negation'}}


Entity: infectie
Meta Annotations: {'Negation': {'value': 'not negated', 'confidence': 0.9547335, 'name': 'Negation'}}




## Evaluate MetaCat on subsets of the data
The ContextD paper calculates precision, recall and F1-score on subsets of the data. In this section we calculate the same scores with the just created model. Note that this results in a calculation on a set of data that was included during the training phase. For proper score calculations, we will do cross validation at a later stage.

In [11]:
json_file_DL = os.path.join(data_dir, 'emc-dcc_ann_DL.json')
json_file_GP = os.path.join(data_dir, 'emc-dcc_ann_GP.json')
json_file_RD = os.path.join(data_dir, 'emc-dcc_ann_RD.json')
json_file_SP = os.path.join(data_dir, 'emc-dcc_ann_SP.json')

In [12]:
mc_negation.eval(json_file_RD)

**************************************************  Test
              precision    recall  f1-score   support

           0       0.96      0.90      0.93       595
           1       0.98      0.99      0.99      3088

    accuracy                           0.98      3683
   macro avg       0.97      0.95      0.96      3683
weighted avg       0.98      0.98      0.98      3683

Test Loss:  0.07618074667816227








{'0': {'precision': 0.9621621621621622,
  'recall': 0.8974789915966387,
  'f1-score': 0.9286956521739131,
  'support': 595},
 '1': {'precision': 0.9804987212276215,
  'recall': 0.993199481865285,
  'f1-score': 0.9868082368082367,
  'support': 3088},
 'accuracy': 0.9777355416779799,
 'macro avg': {'precision': 0.9713304416948918,
  'recall': 0.9453392367309619,
  'f1-score': 0.9577519444910749,
  'support': 3683},
 'weighted avg': {'precision': 0.9775363936023301,
  'recall': 0.9777355416779799,
  'f1-score': 0.9774199696734492,
  'support': 3683}}

In [13]:
mc_negation.eval(json_file_SP)

**************************************************  Test
              precision    recall  f1-score   support

           0       0.95      0.71      0.81       416
           1       0.95      0.99      0.97      2309

    accuracy                           0.95      2725
   macro avg       0.95      0.85      0.89      2725
weighted avg       0.95      0.95      0.95      2725

Test Loss:  0.2053001417246248








{'0': {'precision': 0.9488817891373802,
  'recall': 0.7139423076923077,
  'f1-score': 0.8148148148148148,
  'support': 416},
 '1': {'precision': 0.9506633499170812,
  'recall': 0.9930705933304461,
  'f1-score': 0.9714043634823131,
  'support': 2309},
 'accuracy': 0.9504587155963303,
 'macro avg': {'precision': 0.9497725695272308,
  'recall': 0.853506450511377,
  'f1-score': 0.8931095891485639,
  'support': 2725},
 'weighted avg': {'precision': 0.9503913758677763,
  'recall': 0.9504587155963303,
  'f1-score': 0.947499316786651,
  'support': 2725}}

In [14]:
mc_negation.eval(json_file_DL)

**************************************************  Test
              precision    recall  f1-score   support

           0       0.96      0.87      0.91       379
           1       0.98      0.99      0.99      2417

    accuracy                           0.98      2796
   macro avg       0.97      0.93      0.95      2796
weighted avg       0.98      0.98      0.98      2796

Test Loss:  0.07943459508741009








{'0': {'precision': 0.9620991253644315,
  'recall': 0.8707124010554089,
  'f1-score': 0.9141274238227147,
  'support': 379},
 '1': {'precision': 0.9800244598450877,
  'recall': 0.994621431526686,
  'f1-score': 0.9872689938398358,
  'support': 2417},
 'accuracy': 0.9778254649499285,
 'macro avg': {'precision': 0.9710617926047596,
  'recall': 0.9326669162910475,
  'f1-score': 0.9506982088312752,
  'support': 2796},
 'weighted avg': {'precision': 0.9775946666518943,
  'recall': 0.9778254649499285,
  'f1-score': 0.9773545964734235,
  'support': 2796}}

In [15]:
mc_negation.eval(json_file_GP)

**************************************************  Test
              precision    recall  f1-score   support

           0       0.93      0.73      0.82       383
           1       0.97      0.99      0.98      3024

    accuracy                           0.96      3407
   macro avg       0.95      0.86      0.90      3407
weighted avg       0.96      0.96      0.96      3407

Test Loss:  0.11154603286247168








{'0': {'precision': 0.9297658862876255,
  'recall': 0.7258485639686684,
  'f1-score': 0.81524926686217,
  'support': 383},
 '1': {'precision': 0.9662162162162162,
  'recall': 0.9930555555555556,
  'f1-score': 0.9794520547945206,
  'support': 3024},
 'accuracy': 0.9630173172879366,
 'macro avg': {'precision': 0.9479910512519208,
  'recall': 0.859452059762112,
  'f1-score': 0.8973506608283452,
  'support': 3407},
 'weighted avg': {'precision': 0.9621186299636039,
  'recall': 0.9630173172879366,
  'f1-score': 0.9609930974190904,
  'support': 3407}}