In [3]:
import json 
import spacy

nlp = spacy.load("en_core_web_sm")  # General-purpose model

def extract_medical_entities(text):
    """Extract medical terms using n-grams and keyword matching."""
    doc = nlp(text.lower())

    # Define lowercase keyword sets
    symptom_keywords = {"dizziness", "palpitations", "fatigue", "nausea", "headache", "insomnia", "chronic pain", "muscle weakness", 
    "joint pain", "fever", "weight loss", "swelling", "cough", "shortness of breath", "chronic fatigue", "chest pain", 
    "difficulty breathing", "back pain", "blurred vision", "tingling", "sweating", "frequent urination", 
    "burning sensations while urinating", "light sensitivity", "lightheadedness", "loss of appetite", "rash", 
    "itching", "heartburn", "coughing up blood", "wheezing", "numbness", "throat pain", "muscle stiffness", 
    "night sweats", "nausea", "vomiting", "abdominal pain", "frequent sneezing", "hearing loss", "constipation", 
    "urinary incontinence", "dry mouth", "difficulty swallowing", "coughing blood", "hoarseness", "confusion", 
    "drowsiness", "cold hands or feet", "dysphagia", "difficulty walking", "tremors", "chills", "bloody stool", 
    "leg cramps", "weight gain", "hair loss", "abnormal vaginal bleeding", "difficulty concentrating", 
    "sore throat", "wheezing", "fatigue", "muscle cramps", "cough with sputum", "coughing up phlegm", 
    "diarrhea", "vomiting", "hypertension", "dehydration", "swollen ankles", "blurry vision", "insomnia", 
    "leg swelling", "poor circulation", "tiredness", "leg swelling", "anxiety", "depression"}

    condition_keywords = {
        "chronic obstructive pulmonary disease", "heart attack", "irritable bowel syndrome", "rheumatoid arthritis", 
    "urinary tract infection", "chronic fatigue syndrome", "psoriasis", "asthma", "diabetes", "hypertension", 
    "cancer", "liver disease", "kidney failure", "stroke", "dementia", "arthritis", "pneumonia", "sepsis", 
    "epilepsy", "gastroesophageal reflux disease", "multiple sclerosis", "alzheimer's disease", "parkinson's disease", 
    "systemic lupus erythematosus", "diabetic neuropathy", "tuberculosis", "obesity", "cystic fibrosis", "hepatitis", 
    "meningitis", "sickle cell anemia", "hiv/aids", "celiac disease", "ulcerative colitis", "crohn's disease", 
    "chronic kidney disease", "fibromyalgia", "autoimmune disease", "anemia", "leukemia", "pneumothorax", 
    "lupus", "tetanus", "scleroderma", "rheumatic fever", "prostate cancer", "ovarian cancer", "breast cancer", 
    "gastric cancer", "non-hodgkin lymphoma", "hemophilia", "vitiligo", "severe malaria", "bronchitis", "gout", 
    "scabies", "hemorrhoids", "varicose veins", "hemophilia", "eczema", "chronic pain", "melanoma", "hearing loss", 
    "menstrual disorders", "anxiety", "depression", "bipolar disorder", "schizophrenia", "ptsd", "dyslexia", 
    "insomnia", "phobia", "hysteria", "attention deficit disorder", "migraines", "chronic back pain", "obstructive sleep apnea", 
    "epistaxis", "otitis media", "sinusitis", "bronchial asthma", "copd", "hypothyroidism", "hyperthyroidism", 
    "gout", "rickets", "hyperlipidemia", "cystitis", "spondylitis", "vascular dementia", "strokes", "fibroids"
    }

    medication_keywords = {
        "paracetamol", "ibuprofen", "aspirin", "metformin", "insulin", "atorvastatin", "omeprazole", "amoxicillin", 
    "losartan", "levothyroxine", "prednisone", "albuterol", "gabapentin", "sertraline", "amlodipine", "hydrochlorothiazide", 
    "clopidogrel", "lisinopril", "metoprolol", "simvastatin", "citalopram", "furosemide", "fluoxetine", "warfarin", 
    "trazodone", "cephalexin", "doxycycline", "rosuvastatin", "duloxetine", "pantoprazole", "hydrocodone", "tramadol", 
    "ciprofloxacin", "meloxicam", "escitalopram", "bupropion", "azithromycin", "ranitidine", "venlafaxine", "naproxen", 
    "ondansetron", "methotrexate", "mirtazapine", "spironolactone", "diazepam", "cyclobenzaprine", "diltiazem", 
    "metronidazole", "lorazepam", "morphine", "prednisolone", "famotidine", "baclofen", "clindamycin", "carvedilol", 
    "propranolol", "montelukast", "topiramate", "levofloxacin", "rivaroxaban", "apixaban", "cetirizine", 
    "diphenhydramine", "fentanyl", "hydroxyzine", "ivermectin", "ketorolac", "loratadine", "mefenamic acid", "methocarbamol", 
    "metformin xr", "metoclopramide", "nifedipine", "olmesartan", "phenytoin", "quetiapine", "risperidone", "sitagliptin", 
    "sulfasalazine", "tamsulosin", "terbinafine", "valacyclovir", "valsartan", "verapamil", "zolpidem", "tizanidine", 
    "clonazepam", "mometasone", "betamethasone", "fluticasone", "dexamethasone", "alprazolam", "acetaminophen", "esomeprazole", 
    "budesonide", "tiotropium", "cefuroxime", "erythromycin", "linezolid", "chlorpheniramine"
    } 

    # Generate n-grams (1-3 words)
    words = [token.text for token in doc]
    n_grams = []
    for n in range(1, 4):
        n_grams += [' '.join(words[i:i+n]) for i in range(len(words)-n+1)]

    symptoms = [term for term in n_grams if term in symptom_keywords]
    conditions = [term for term in n_grams if term in condition_keywords]
    medications = [term for term in n_grams if term in medication_keywords]

    return list(set(symptoms)), list(set(conditions)), list(set(medications))

file_path = r"C:/Users/MD.ZAID SHAIKH/Documents/transcription.txt"

# Read the text content from the file
with open(file_path, 'r', encoding='utf-8') as file:
    text_content = file.read()

# Extract medical entities
output_data = extract_medical_entities(text_content)

# Convert the tuple to a dictionary with meaningful keys
data_dict = {
    "symptoms": output_data[0],
    "conditions": output_data[1],
    "medications": output_data[2]
}

json_file_path = "medical_entities.json"

# Save to JSON file with correct format
with open(json_file_path, "w", encoding="utf-8") as json_file:
    json.dump(data_dict, json_file, indent=4, ensure_ascii=False)

print(f"JSON saved successfully at {json_file_path}")




JSON saved successfully at medical_entities.json


In [1]:
import google.generativeai as genai
import json

def load_entities_from_json(json_path):
    """Load medical entities from JSON file"""
    try:
        with open(json_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        return (data['symptoms'], data['conditions'], data['medications'])
    except Exception as e:
        print(f"Error loading entities: {e}")
        return ([], [], [])

def generate_soap_with_gemini(transcription_path, entities_json_path):
    """Generate SOAP note using transcription file and entities JSON"""
    try:
        # Read transcription text
        with open(transcription_path, 'r', encoding='utf-8') as f:
            original_text = f.read()
        
        # Load entities from JSON
        symptoms, conditions, medications = load_entities_from_json(entities_json_path)
        
        # Configure Google AI
        genai.configure(api_key="AIzaSyDooyEJKTTh6Dwj7ntEDpBzlf50rzdEk-M")
        model = genai.GenerativeModel('gemini-2.0-flash')
        
        # Generate prompt
        prompt = f"""
        Generate structured SOAP note from:
        Patient Statement: "{original_text}"
        Identified Symptoms: {symptoms}
        Medical Conditions: {conditions}
        Current Medications: {medications}
        
        Format:
        [SOAP Note]
        Subjective (S): <patient-reported info>
        Objective (O): <clinical findings>
        Assessment (A): <diagnosis analysis>
        Plan (P): <treatment plan>
        """
        
        response = model.generate_content(prompt)
        return response.text
        
    except Exception as e:
        return f"Error generating SOAP note: {e}"

# File paths (use raw strings for Windows paths)
transcription_file = r"C:/Users/MD.ZAID SHAIKH/Documents/transcription.txt"
entities_json = r"C:/Users/MD.ZAID SHAIKH/Documents/AI_Medical_Assistant/backend/models/services/medical_entities.json"

# Generate and print SOAP note
print(generate_soap_with_gemini(transcription_file, entities_json))

  from .autonotebook import tqdm as notebook_tqdm


```
[SOAP Note]

**Subjective (S):**

*   Patient reports a headache for two days.
*   Patient states they were advised to take Paracetamol.
*   Patient is compliant and indicates they will remember the advice given ("I will keep in mind the rest").

**Objective (O):**

*   No objective clinical findings were reported. Further examination needed for detailed assessment.

**Assessment (A):**

*   Headache, etiology undetermined. Without further information, a specific diagnosis cannot be made. Differential diagnoses include tension headache, migraine, or headache secondary to other causes.

**Plan (P):**

*   Since patient was advised to take paracetamol, continue monitoring.
*   Recommend further evaluation if the headache persists, worsens, or is accompanied by other symptoms (fever, stiff neck, vision changes, weakness, numbness, etc.).
*   Encourage adequate hydration and rest.
*   Further questioning about symptom characteristics needed to provide a more precise diagnosis and plan.

In [2]:
from Bio import Entrez
import google.generativeai as genai
import json

# Configure Google Gemini
genai.configure(api_key="AIzaSyDooyEJKTTh6Dwj7ntEDpBzlf50rzdEk-M")

def load_entities(json_path):
    """Load medical entities from JSON file"""
    try:
        with open(json_path, 'r') as f:
            data = json.load(f)
        return {
            'symptoms': data.get('symptoms', []),
            'conditions': data.get('conditions', []),
            'medications': data.get('medications', [])
        }
    except Exception as e:
        print(f"Error loading entities: {e}")
        return {'symptoms': [], 'conditions': [], 'medications': []}

def fetch_pubmed_evidence(json_path, max_results=3):
    """Fetch research based on JSON entities"""
    entities = load_entities(json_path)
    
    # Build PubMed query
    search_terms = []
    search_terms += entities['symptoms']
    search_terms += entities['conditions']
    search_terms += [f"{med} therapy" for med in entities['medications']]
    query = " AND ".join(search_terms)

    # PubMed API call
    Entrez.email = "zaidshaikh98848@gmail.com"
    handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
    results = Entrez.read(handle)
    
    # Fetch articles
    articles = []
    for pubmed_id in results["IdList"]:
        with Entrez.efetch(db="pubmed", id=pubmed_id, retmode="xml") as handle:
            article = handle.read()
            articles.append(parse_article(article))
    
    return articles

def parse_article(article_xml):
    """Extract article details from bytes XML"""
    xml_str = article_xml.decode('utf-8')
    title = ""
    abstract = ""
    
    if "<ArticleTitle>" in xml_str and "</ArticleTitle>" in xml_str:
        title = xml_str.split("<ArticleTitle>")[1].split("</ArticleTitle>")[0]
    
    if "<AbstractText>" in xml_str and "</AbstractText>" in xml_str:
        abstract = xml_str.split("<AbstractText>")[1].split("</AbstractText>")[0]
    
    return {
        'title': title,
        'abstract': abstract
    }

def summarize_with_gemini(content):
    """Summarize using Google Gemini"""
    model = genai.GenerativeModel('gemini-2.0-flash')
    response = model.generate_content(
        f"Summarize this medical research in 2 sentences: {content}"
    )
    return response.text

def generate_evidence_report(json_path):
    """Generate evidence report"""
    articles = fetch_pubmed_evidence(json_path)
    
    report = "✅ Evidence Synthesis 📚\n"
    for idx, article in enumerate(articles, 1):
        summary = summarize_with_gemini(f"{article['title']}. {article['abstract']}")
        report += f"\n{idx}. Title: {article['title']}\n   Summary: {summary}\n"
    
    return report

# Example Usage
entities_json = r"C:/Users/MD.ZAID SHAIKH/Documents/AI_Medical_Assistant/backend/models/services/medical_entities.json"
print(generate_evidence_report(entities_json))

RuntimeError: Empty term and query_key - nothing todo