# Negation detection with ConText

## Setup

In [1]:
import pathlib
import pickle
import pandas as pd
import spacy
from spacy import displacy
from spacy.tokens import Span, Doc
import medspacy
from medspacy.visualization import visualize_dep
from medspacy.context import ConTextRule, ConTextComponent
from sklearn.metrics import classification_report
# until bug is fixed in v6 of ipython kernel
import warnings
warnings.simplefilter('once')

### Recreate the spacy pipeline:

In [2]:
nlp = spacy.load(pathlib.Path('..','..','data','pipeline'))
# Load the custom attributes set during labeling
Doc.set_extension("data_type", default=None)
Doc.set_extension("doc_id", default=None)
Span.set_extension("entity_id", default=None)
Span.set_extension("negation", default="NotNegated")
Span.set_extension("experiencer", default="Patient")
Span.set_extension("temporality", default="Recent")

  and should_run_async(code)


### Load texts, rules:

In [3]:
docs_file = pathlib.Path('..','..','data', 'DCC_docs.pickle')
preproc_docs = pickle.loads(docs_file.read_bytes())

In [4]:
context_file = pathlib.Path('..','configs','context','contextD_triggers.json') # all negation triggers in test set from ContextD

In [5]:
context = ConTextComponent(nlp, rules="other", rule_list=str(context_file))

In [6]:
result_file = pathlib.Path('..', '..', 'results', 'rule-based_predictions.csv.gz')

## Example

In [7]:
[example_doc] = [d for d in preproc_docs if d._.doc_id == 'DL1112']

In [8]:
displacy.render(example_doc, style='ent')

Add ConText:

In [9]:
# Run just the context component; visualize the result
visualize_dep(context(example_doc))

## Run "ContextD on all docs

In [10]:
%%time
docs = []
for doc in preproc_docs:
    docs.append(context(doc))

CPU times: user 589 ms, sys: 70 µs, total: 589 ms
Wall time: 587 ms


## Score performance

### Negation

Table of all predictions:

In [11]:
res = []
for d in docs:
    for e in d.ents:
        res.append({'entity_id': e._.entity_id,
                    'annotation': 'negated' if e._.negation=='Negated' else 'not negated',
                    'rule_based': 'negated' if e._.is_negated else 'not negated'})
pd.DataFrame(res).to_csv(result_file, index=False, compression='gzip')

#### Total

In [12]:
trues = [True if e._.negation=='Negated' else False for d in docs for e in d.ents]
preds = [e._.is_negated for d in docs for e in d.ents]
target_names = ['not negated', 'negated']

In [13]:
print(classification_report(trues, preds, target_names=target_names))

              precision    recall  f1-score   support

 not negated       0.98      0.97      0.98     10791
     negated       0.82      0.89      0.86      1760

    accuracy                           0.96     12551
   macro avg       0.90      0.93      0.92     12551
weighted avg       0.96      0.96      0.96     12551



#### Per document type

In [14]:
data_types = ['GP', 'SP', 'RD', 'DL']
for dt in data_types:
    trues = [True if e._.negation=='Negated' else False for d in docs if d._.data_type==dt for e in d.ents]
    preds = [e._.is_negated for d in docs if d._.data_type==dt for e in d.ents]
    print(f"Data type: {dt}")
    print(classification_report(trues, preds, target_names=target_names))

Data type: GP
              precision    recall  f1-score   support

 not negated       0.97      0.95      0.96      3013
     negated       0.67      0.80      0.73       383

    accuracy                           0.93      3396
   macro avg       0.82      0.88      0.85      3396
weighted avg       0.94      0.93      0.94      3396

Data type: SP
              precision    recall  f1-score   support

 not negated       0.97      0.96      0.97      2273
     negated       0.81      0.84      0.82       403

    accuracy                           0.95      2676
   macro avg       0.89      0.90      0.89      2676
weighted avg       0.95      0.95      0.95      2676



  and should_run_async(code)


Data type: RD
              precision    recall  f1-score   support

 not negated       0.99      0.98      0.99      3088
     negated       0.90      0.97      0.93       595

    accuracy                           0.98      3683
   macro avg       0.95      0.97      0.96      3683
weighted avg       0.98      0.98      0.98      3683

Data type: DL
              precision    recall  f1-score   support

 not negated       0.99      0.98      0.99      2417
     negated       0.89      0.92      0.91       379

    accuracy                           0.97      2796
   macro avg       0.94      0.95      0.95      2796
weighted avg       0.97      0.97      0.97      2796



Compare to paper: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-014-0373-3/tables/5

- **GP**: 
    - Precision and recall much worse than in paper. Probably because we haven't added the GP-specific tweaks for the rules yet?
    - Also here recall is higher than precision; other way around in paper (for baseline)
- **SP**: Performance pretty similar (couple points worse)
- **RD**: Performance almost exactly the same as in paper (for final)
- **DL**: Performance pretty similar (bit worse)