In [14]:
import spacy
from negspacy.negation import Negex

!pip install spacy
!pip install negspacy
!pip install <Model URL> 'en_ner_bc5cdr_md'

#<Model URL>: https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.3.0/en_ner_bc5cdr_md-0.3.0.tar.gz

Load spacy language model. Add negspacy pipeline object. Filtering on entity types is optional.

In [20]:
bc5_model = "en_ner_bc5cdr_md"
sp_bc5_model = spacy.load("en_ner_bc5cdr_md")
clinical_note="Patient is a 60 year old having difficuly in breathing. \
Not diabetic. \
He feels that he has been in good health until this current episode. \
Appetite - good. No chest pain. \
No weight loss or episodes of stomach pain. \
Hypertension absent.\
"
clinical_note_3="Patient resting in bed. Patient given azithromycin without any difficulty. Patient has audible wheezing, \
states chest tightness. No evidence of hypertension.\
Patient denies nausea at this time. zofran declined. Patient is also having intermittent sweating associated with pneumonia. \
Patient refused pain but tylenol still given. Neither substance abuse nor alcohol use however cocaine once used in the last year. Alcoholism unlikely.\
Patient has headache and fever. Patient is not diabetic. \
No signs of diarrhea. Lab reports confirm lymphocytopenia. Cardaic rhythm is Sinus bradycardia. \
Patient also has a history of cardiac injury. No kidney injury reported. No abnormal rashes or ulcers. \
Patient might not have liver disease. Confirmed absence of hemoptysis. Although patient has severe pneumonia and fever \
, test reports are negative for COVID-19 infection. COVID-19 viral infection absent."

base_model = "en_core_web_sm"
sci_model = "en_core_sci_sm"
sp_base_model = spacy.load("en_core_web_sm")
sp_sci_model = spacy.load("en_core_sci_sm")
sp_bc5_model = spacy.load("en_ner_bc5cdr_md")
clinical_note_1="The patient is a 77-year-old female who is unable to give any information. She has been sedated with Ativan and came into the emergency room obtunded and unable to give any history. On a chest x-ray for what appeared to be shortness of breath she was found to have what was thought to be free air under the right diaphragm."
clinical_note_2 = "She patient is a 71-year-old female patient of Dr. X. The patient presented to the emergency room last evening with approximately 7- to 8-day history of abdominal pain which has been persistent. She was seen 3 to 4 days ago at ABC ER and underwent evaluation and discharged and had a CT scan at that time and she was told it was normal. She was given oral antibiotics of Cipro and Flagyl. She has had no nausea and vomiting, but has had persistent associated anorexia. She is passing flatus, but had some obstipation symptoms with the last bowel movement two days ago. She denies any bright red blood per rectum and no history of recent melena. Her last colonoscopy was approximately 5 years ago with Dr. Y. She has had no definite fevers or chills and no history of jaundice. The patient denies any significant recent weight loss."

#Lemmatizer - capture all forms of negation(e.g., deny: denies, denying)

def lemmatize(note, nlp):
    doc = nlp(note)
    lemNote = [wd.lemma_ for wd in doc]
    return " ".join(lemNote)
lem_clinical_note= lemmatize(clinical_note_2, sp_bc5_model)
print(lem_clinical_note)

#creating a doc object using BC5CDR model
doc = sp_bc5_model(lem_clinical_note)

Named Entity Extraction

#function to modify options for displacy NER visualization
def get_entity_options():
    entities = ["DISEASE", "CHEMICAL", "NEG_ENTITY"]
    colors = {'DISEASE': 'linear-gradient(180deg, #66ffcc, #abf763)', 'CHEMICAL': 'linear-gradient(90deg, #aa9cfc, #fc9ce7)', "NEG_ENTITY":'linear-gradient(90deg, #ffff66, #ff6600)'}
    #entities = ["PERSON", "DISEASE"]
    #colors = {'PERSON': 'linear-gradient(180deg, #66ffcc, #abf763)', "DISEASE":'linear-gradient(90deg, #ffff66, #ff6600)'}
    options = {"ents": entities, "colors": colors}    
    return options

options = get_entity_options()
    
#visualizing identified Named Entities in clinical input text 
displacy.render(doc, style='ent', options=options)


negex = Negex(nlp_model, language = 'en_clinical_sensitive', chunk_prefix = ["no"])

for e in doc.ents:
	print(e.text, e._.negex)

Negative Feature Extraction

In [21]:
#Adding a new pipeline component to identify negation
def negation_model(nlp_model):
    nlp = spacy.load(nlp_model)
    negex = Negex(nlp)
    #adding a following pattern
    negex.following_patterns += [nlp('absent')]
    following_update = [i.text for i in negex.following_patterns]
    #Updating Negex  
    negex = Negex(nlp, following_negations= following_update)
    nlp.add_pipe(negex)
    return nlp

#Identifying negation entities
def get_negation_entities(nlp_model, text, negation_model):
    results = []
    #Set up negex in the pipeline
    nlp = negation_model(nlp_model)
    #Split up the note into sentences (use . as the delimiter)
    text = text.split(".")
    
    #Aggregate all the negative entities in a list
    for sentence in text:
        doc = nlp(sentence)
        for e in doc.ents:
            rs = str(e._.negex)
            if rs == "True":
                results.append(e.text)
    return results

#Get the list of negative entities from clinical note identified
final_results = get_negation_entities(bc5_model, clinical_note, negation_model)

#Print the list of negative identities
print(final_results)

['diabetic', 'chest pain', 'weight loss', 'stomach pain', 'Hypertension']


#function to identify span objects of matched megative phrases from clinical note
def match(nlp,terms,label):
        patterns = [nlp.make_doc(text) for text in terms]
        matcher = PhraseMatcher(nlp.vocab)
        matcher.add(label, None, *patterns)
        return matcher
#replacing the labels for identified negative entities
def overwrite_ent_lbl(matcher, doc):
    matches = matcher(doc)
    seen_tokens = set()
    new_entities = []
    entities = doc.ents
    for match_id, start, end in matches:
        if start not in seen_tokens and end - 1 not in seen_tokens:
            new_entities.append(Span(doc, start, end, label=match_id))
            entities = [
                e for e in entities if not (e.start < end and e.end > start)
            ]
            seen_tokens.update(range(start, end))
    doc.ents = tuple(entities) + tuple(new_entities)
    return doc
matcher = match(nlp1_model, results0,"NEG_ENTITY")
#doc0: new doc object with added "NEG_ENTITY label"
doc0 = overwrite_ent_lbl(matcher,doc)
#visualizing identified Named Entities in clinical input text 
displacy.render(doc0, style='ent', options=options)

In [35]:
#function to identify span objects of matched megative phrases from clinical note
def match(nlp,terms,label):
        patterns = [nlp.make_doc(text) for text in terms]
        print(patterns)
        matcher = PhraseMatcher(nlp.vocab)
        matcher.add(label, None, *patterns)
        print(matcher)
        return matcher

#atcher = match(nlp1, final_results,"NEG_ENTITY")
matcher = match(sp_bc5_model, final_results, "NEG_ENTITY")

#doc0: new doc object with added "NEG_ENTITY label"
#doc0 = overwrite_ent_lbl(matcher,doc)

[diabetic, appetite, chest pain, weight loss, stomach pain, back ache]


TypeError: print() argument after * must be an iterable, not spacy.matcher.phrasematcher.PhraseMatcher

In [50]:
base_model = "en_core_web_sm"
sp_base_model = spacy.load("en_core_web_sm")
doc = sp_base_model("yellow fabric")
print(doc)
matches = matcher(doc)
print(matches)
for match_id, start, end in matches:
    print(start,end)
    rule_id = nlp.vocab.strings[match_id]  # get the unicode ID, i.e. 'COLOR'
    span = doc[start : end]  # get the matched slice of the doc
    print(rule_id, span.text)

yellow fabric
[]


In [58]:
#function to modify options for displacy NER visualization
def get_entity_options():
    entities = ["DISEASE", "CHEMICAL", "NEG_ENTITY"]
    colors = {'DISEASE': 'linear-gradient(180deg, #66ffcc, #abf763)', 'CHEMICAL': 'linear-gradient(90deg, #aa9cfc, #fc9ce7)', "NEG_ENTITY":'linear-gradient(90deg, #ffff66, #ff6600)'}
    options = {"ents": entities, "colors": colors}    
    return options
options = get_entity_options()
doc = sp_bc5_model(clinical_note_3)
#visualizing identified Named Entities in clinical input text 
displacy.render(doc, style='ent', options=options)