# MetaCat - Error analysis 

In [1]:
from pathlib import Path
from medcat.cat import CAT
from medcat.vocab import Vocab
from medcat.cdb import CDB
from medcat.config import Config
from medcat.meta_cat import MetaCAT

  from tqdm.autonotebook import tqdm


In [2]:
# Input
data_dir = Path.cwd().parents[0] / 'data'
cdb_file = data_dir / 'cdb.dat'
vocab_file = data_dir / 'vocab.dat'

# Output
model_dir = Path.cwd().parents[0] / 'models' / 'bilstm'

# Name should contain 'bbpe' for ByteLevelBPETokenizer or 'bert' for BertTokenizerFast
tokenizer_name = 'bbpe_dutch-wikipedia'

## Load tokenizer

In [3]:
# Create, train and save the tokenizer
mc_negation = MetaCAT()
mc_negation = mc_negation.load(save_dir=model_dir, add_prefix_space=True)

## Example usage

In [4]:
# Load the cdb and vocab 
config = Config()

vocab = Vocab.load(vocab_file)
cdb = CDB.load(cdb_file)

# Create MedCAT pipeline
cat = CAT(cdb=cdb, vocab=vocab, config=config, meta_cats=[mc_negation])

In [5]:
# Test on DL1114 from DCC with negation
text = 'Echo- en rontgenonderzoek van de heup toont geen evidente heupdysplasie.'
doc = cat(text)
for ent in doc.ents:
    print("Entity: " + ent.text)
    print("Meta Annotations: " + str(ent._.meta_anns))
    print("\n")

Entity: heup
Meta Annotations: {'Negation': {'value': 'not negated', 'confidence': 0.9633982, 'name': 'Negation'}}


Entity: heupdysplasie
Meta Annotations: {'Negation': {'value': 'negated', 'confidence': 0.99851835, 'name': 'Negation'}}




In [6]:
# Test on DL1114 from DCC without negation
text = 'Echo- en rontgenonderzoek van de heup toont evidente heupdysplasie.'
doc = cat(text)
for ent in doc.ents:
    print("Entity: " + ent.text)
    print("Meta Annotations: " + str(ent._.meta_anns))
    print("\n")

Entity: heup
Meta Annotations: {'Negation': {'value': 'not negated', 'confidence': 0.9971554, 'name': 'Negation'}}


Entity: heupdysplasie
Meta Annotations: {'Negation': {'value': 'not negated', 'confidence': 0.9817072, 'name': 'Negation'}}




## Additional testing

In [7]:
# Test on DL1112 from DCC
text = 'Patient kan zich geen trauma herinneren.'
doc = cat(text)
for ent in doc.ents:
    print("Entity: " + ent.text)
    print("Meta Annotations: " + str(ent._.meta_anns))
    print("\n")

# Trauma is not identified as medical concept.

In [8]:
# Test on NTvG article
# https://www.ntvg.nl/artikelen/acute-buik-op-basis-van-een-wandelende-milt
text = '1 maand na de operatie had patiënte geen buikpijn meer en was zij goed hersteld.'
doc = cat(text)
for ent in doc.ents:
    print("Entity: " + ent.text)
    print("Meta Annotations: " + str(ent._.meta_anns))
    print("\n")
    
# The negation was correctly identified.

Entity: operatie
Meta Annotations: {'Negation': {'value': 'not negated', 'confidence': 0.999545, 'name': 'Negation'}}


Entity: buikpijn
Meta Annotations: {'Negation': {'value': 'not negated', 'confidence': 0.79412663, 'name': 'Negation'}}




In [9]:
# Test on NTvG article
# https://www.ntvg.nl/artikelen/een-bezoar-bij-een-vrouw-met-clomipramine-intoxicatie
text = 'Er waren geen tekenen van darmobstructie te zien.'
doc = cat(text)
for ent in doc.ents:
    print("Entity: " + ent.text)
    print("Meta Annotations: " + str(ent._.meta_anns))
    print("\n")

# Correct identification of negation, but incorrect linking of zien

Entity: darmobstructie
Meta Annotations: {'Negation': {'value': 'negated', 'confidence': 0.7368672, 'name': 'Negation'}}


Entity: zien
Meta Annotations: {'Negation': {'value': 'negated', 'confidence': 0.79253453, 'name': 'Negation'}}




In [10]:
# Test on NTvG article
# https://www.ntvg.nl/artikelen/nieuws/vaker-ziek-na-acute-fase-covid-19
text = 'Alle patiënten werden gematcht met een controlegroep bij wie geen SARS-CoV-2-infectie was geregistreerd.'
doc = cat(text)
for ent in doc.ents:
    print("Entity: " + ent.text)
    print("Meta Annotations: " + str(ent._.meta_anns))
    print("\n")
    
# Correct identification of negation

Entity: patiënten
Meta Annotations: {'Negation': {'value': 'not negated', 'confidence': 0.98904103, 'name': 'Negation'}}


Entity: controlegroep
Meta Annotations: {'Negation': {'value': 'not negated', 'confidence': 0.9999113, 'name': 'Negation'}}


Entity: SARS-CoV-2-infectie
Meta Annotations: {'Negation': {'value': 'not negated', 'confidence': 0.94040424, 'name': 'Negation'}}




In [11]:
text = 'Er zijn geen bijwerkingen gemeld van de scan'
doc = cat(text)
for ent in doc.ents:
    print("Entity: " + ent.text)
    print("Meta Annotations: " + str(ent._.meta_anns))
    print("\n")
# Negation was incorrectly identified
# Entity linking of bijwerkingen was missed

Entity: scan
Meta Annotations: {'Negation': {'value': 'not negated', 'confidence': 0.68284506, 'name': 'Negation'}}




### Check if add_prefix_space was added

In [12]:
text = " Zwelling treedt niet op."
doc = cat(text)
for ent in doc.ents:
    print("Entity: " + ent.text)
    print("Meta Annotations: " + str(ent._.meta_anns))
    print("\n")

# Negation was not identified. But difficult one, see next.

Entity: Zwelling
Meta Annotations: {'Negation': {'value': 'negated', 'confidence': 0.6843536, 'name': 'Negation'}}




In [13]:
text = "Zwelling treedt niet op."
doc = cat(text)
for ent in doc.ents:
    print("Entity: " + ent.text)
    print("Meta Annotations: " + str(ent._.meta_anns))
    print("\n")

# Negation was not identified. But difficult one, see next.

Entity: Zwelling
Meta Annotations: {'Negation': {'value': 'negated', 'confidence': 0.6843536, 'name': 'Negation'}}




In [14]:
text = "Geen zwelling treedt op."
doc = cat(text)
for ent in doc.ents:
    print("Entity: " + ent.text)
    print("Meta Annotations: " + str(ent._.meta_anns))
    print("\n")

# Negation was identified

Entity: zwelling
Meta Annotations: {'Negation': {'value': 'negated', 'confidence': 0.99862003, 'name': 'Negation'}}




In [15]:
text = "Geen zwelling treedt niet op."
doc = cat(text)
for ent in doc.ents:
    print("Entity: " + ent.text)
    print("Meta Annotations: " + str(ent._.meta_anns))
    print("\n")

# Negation was identified

Entity: zwelling
Meta Annotations: {'Negation': {'value': 'negated', 'confidence': 0.9986272, 'name': 'Negation'}}




## Evaluate MetaCat on subsets of the data
The ContextD paper calculates precision, recall and F1-score on subsets of the data. In this section we calculate the same scores with the just created model. Note that this results in a calculation on a set of data that was included during the training phase. For proper score calculations, we will do cross validation at a later stage.

In [None]:
json_file_DL = data_dir / 'emc-dcc_ann_DL.json'
json_file_GP = data_dir / 'emc-dcc_ann_GP.json'
json_file_RD = data_dir / 'emc-dcc_ann_RD.json'
json_file_SP = data_dir / 'emc-dcc_ann_SP.json'

In [None]:
mc_negation.eval(json_file_RD)

In [None]:
mc_negation.eval(json_file_SP)

In [None]:
mc_negation.eval(json_file_DL)

In [None]:
mc_negation.eval(json_file_GP)