In [None]:
# Install required packages (only run once)
!pip install transformers torch -q

from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
import re

# Load model and tokenizer
model_name = "Helios9/BioMed_NER"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)

# NER pipeline
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, grouped_entities=False)

# Input clinical text
text="""Not a Normal Part of Aging Nearly 1 in 3 American adults have high blood pressure. Many people get high blood pressure as they get older. However, getting high blood pressure is not a normal part of aging. There are things you can do to help keep your blood pressure normal, such as eating a healthy diet and getting more exercise. Risk Factors Anyone can develop high blood pressure. However, these factors can increase your risk for developing high blood pressure. -  age  - race or ethnicity  - being overweight  - gender  - lifestyle habits  - a family history of high blood pressure.  age race or ethnicity being overweight gender lifestyle habits a family history of high blood pressure. Age Blood pressure tends to rise with age. In fact, about 65 percent of Americans age 60 or older have high blood pressure. Race/Ethnicity High blood pressure is more common in African American adults than in Caucasian or Hispanic American adults. Compared with these ethnic groups, African Americans -  tend to get high blood pressure earlier in life  - often have higher blood pressure numbers  - are less likely to achieve target blood pressure goals with treatment. tend to get high blood pressure earlier in life often have higher blood pressure numbers are less likely to achieve target blood pressure goals with treatment. Overweight You are more likely to develop prehypertension or high blood pressure if youre overweight or obese. The terms overweight and obese refer to body weight thats greater than what is considered healthy for a certain height. Gender Before age 55, men are more likely than women to develop high blood pressure. After age 55, women are more likely than men to develop high blood pressure. Lifestyle Habits Unhealthy lifestyle habits can raise your risk for high blood pressure, and they include -  eating too much sodium or too little potassium   - lack of physical activity  - drinking too much alcohol  - smoking  - stress. eating too much sodium or too little potassium lack of physical activity drinking too much alcohol smoking stress. Family History A family history of high blood pressure raises the risk of developing prehypertension or high blood pressure. Some people have a high sensitivity to sodium and salt, which may increase their risk for high blood pressure and may run in families. Genetic causes of this condition are why family history is a risk factor for this condition."""
# Run NER pipeline
ner_results = ner_pipeline(text)

# Improved post-processing using character positions
entities = []
current_entity = ""
current_label = ""
current_start = None
current_end = None

for item in ner_results:
    word = item["word"]
    label = item["entity"]
    start = item["start"]
    end = item["end"]
    
    if "-" in label:
        prefix, entity_type = label.split("-")
    else:
        prefix, entity_type = "", label

    if entity_type == "O":
        if current_entity:
            entities.append((current_entity, current_label, current_start, current_end))
            current_entity = ""
            current_label = ""
        continue

    if current_entity:
        if (start == current_end) and (entity_type == current_label):
            if word.startswith("##"):
                current_entity += word[2:]
            else:
                current_entity += " " + word
            current_end = end
        else:
            entities.append((current_entity, current_label, current_start, current_end))
            current_entity = word if not word.startswith("##") else word[2:]
            current_label = entity_type
            current_start = start
            current_end = end
    else:
        current_entity = word if not word.startswith("##") else word[2:]
        current_label = entity_type
        current_start = start
        current_end = end

if current_entity:
    entities.append((current_entity, current_label, current_start, current_end))

# Merge overlapping/adjacent entities and clean up
final_entities = []
for entity in entities:
    text_segment = text[entity[2]:entity[3]]
    text_segment_clean = text_segment.strip().lower()
    final_entities.append((text_segment_clean, entity[1]))

# Merge Detailed_description followed by Sign_symptom
merged_entities = []
i = 0
while i < len(final_entities):
    if i < len(final_entities) - 1:
        current_ent, next_ent = final_entities[i], final_entities[i+1]
        if current_ent[1] == "Detailed_description" and next_ent[1] == "Sign_symptom":
            merged_text = f"{current_ent[0]} {next_ent[0]}"
            merged_entities.append((merged_text, "Sign_symptom"))
            i += 2
            continue
    merged_entities.append(final_entities[i])
    i += 1

# Remove duplicates
seen = set()
unique_entities = []
for ent in merged_entities:
    if ent[1] != "O" and ent[0] not in seen:
        seen.add(ent[0])
        unique_entities.append(ent)

# Display results
print("Named Entities Found:\n")
for word, label in unique_entities:
    print(f"{word}: {label}")
