In [2]:
# Install required packages (only run once)
!pip install transformers torch -q

from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
import re

# Load model and tokenizer
model_name = "Helios9/BioMed_NER"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)

# NER pipeline
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, grouped_entities=False)

# Input clinical text
text="""While bismuth compounds (Pepto-Bismol) decreased the number of bowel movements in those with travelers' diarrhea, they do not decrease the length of illness.[91] Anti-motility agents like loperamide are also effective at reducing the number of stools but not the duration of disease.[8] These agents should be used only if bloody diarrhea is not present.[92]\n\nDiosmectite, a natural aluminomagnesium silicate clay, is effective in alleviating symptoms of acute diarrhea in children,[93] and also has some effects in chronic functional diarrhea, radiation-induced diarrhea, and chemotherapy-induced diarrhea.[45] Another absorbent agent used for the treatment of mild diarrhea is kaopectate.\n\nRacecadotril an antisecretory medication may be used to treat diarrhea in children and adults.[86] It has better tolerability than loperamide, as it causes less constipation and flatulence.[94]"""
# Run NER pipeline
ner_results = ner_pipeline(text)

# Improved post-processing using character positions
entities = []
current_entity = ""
current_label = ""
current_start = None
current_end = None

for item in ner_results:
    word = item["word"]
    label = item["entity"]
    start = item["start"]
    end = item["end"]
    
    if "-" in label:
        prefix, entity_type = label.split("-")
    else:
        prefix, entity_type = "", label

    if entity_type == "O":
        if current_entity:
            entities.append((current_entity, current_label, current_start, current_end))
            current_entity = ""
            current_label = ""
        continue

    if current_entity:
        if (start == current_end) and (entity_type == current_label):
            if word.startswith("##"):
                current_entity += word[2:]
            else:
                current_entity += " " + word
            current_end = end
        else:
            entities.append((current_entity, current_label, current_start, current_end))
            current_entity = word if not word.startswith("##") else word[2:]
            current_label = entity_type
            current_start = start
            current_end = end
    else:
        current_entity = word if not word.startswith("##") else word[2:]
        current_label = entity_type
        current_start = start
        current_end = end

if current_entity:
    entities.append((current_entity, current_label, current_start, current_end))

# Merge overlapping/adjacent entities and clean up
final_entities = []
for entity in entities:
    text_segment = text[entity[2]:entity[3]]
    text_segment_clean = text_segment.strip().lower()
    final_entities.append((text_segment_clean, entity[1]))

# Merge Detailed_description followed by Sign_symptom
merged_entities = []
i = 0
while i < len(final_entities):
    if i < len(final_entities) - 1:
        current_ent, next_ent = final_entities[i], final_entities[i+1]
        if current_ent[1] == "Detailed_description" and next_ent[1] == "Sign_symptom":
            merged_text = f"{current_ent[0]} {next_ent[0]}"
            merged_entities.append((merged_text, "Sign_symptom"))
            i += 2
            continue
    merged_entities.append(final_entities[i])
    i += 1

# Remove duplicates
seen = set()
unique_entities = []
for ent in merged_entities:
    if ent[1] != "O" and ent[0] not in seen:
        seen.add(ent[0])
        unique_entities.append(ent)

# Display results
print("Named Entities Found:\n")
for word, label in unique_entities:
    print(f"{word}: {label}")



[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip
Device set to use cpu
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Named Entities Found:

bismuth compounds: Medication
pepto-bismol: Detailed_description
bowel movements: Diagnostic_procedure
travelers: Detailed_description
' diarrhea: Disease_disorder
anti-motility agents: Medication
loperamide: Medication
bloody: Detailed_description
diarrhea: Disease_disorder
dios: Medication
mec: Detailed_description
tite: Medication
natural aluminomagnesium silicate clay: Detailed_description
acute: Detailed_description
chronic: Detailed_description
radiation: Detailed_description
chemotherapy: Detailed_description
kaopectate: Medication
racecadotril: Medication
antisecretory medication: Medication
constipation: Sign_symptom
flatulence: Sign_symptom
