In [2]:
import json 
import spacy

nlp = spacy.load("en_core_web_sm")  # General-purpose model

def extract_medical_entities(text):
    """Extract medical terms using n-grams and keyword matching."""
    doc = nlp(text.lower())

    # Define lowercase keyword sets
    symptom_keywords = {"dizziness", "palpitations", "fatigue", "nausea", "headache", "insomnia", "chronic pain", "muscle weakness", 
    "joint pain", "fever", "weight loss", "swelling", "cough", "shortness of breath", "chronic fatigue", "chest pain", 
    "difficulty breathing", "back pain", "blurred vision", "tingling", "sweating", "frequent urination", 
    "burning sensations while urinating", "light sensitivity", "lightheadedness", "loss of appetite", "rash", 
    "itching", "heartburn", "coughing up blood", "wheezing", "numbness", "throat pain", "muscle stiffness", 
    "night sweats", "nausea", "vomiting", "abdominal pain", "frequent sneezing", "hearing loss", "constipation", 
    "urinary incontinence", "dry mouth", "difficulty swallowing", "coughing blood", "hoarseness", "confusion", 
    "drowsiness", "cold hands or feet", "dysphagia", "difficulty walking", "tremors", "chills", "bloody stool", 
    "leg cramps", "weight gain", "hair loss", "abnormal vaginal bleeding", "difficulty concentrating", 
    "sore throat", "wheezing", "fatigue", "muscle cramps", "cough with sputum", "coughing up phlegm", 
    "diarrhea", "vomiting", "hypertension", "dehydration", "swollen ankles", "blurry vision", "insomnia", 
    "leg swelling", "poor circulation", "tiredness", "leg swelling", "anxiety", "depression"}

    condition_keywords = {
        "chronic obstructive pulmonary disease", "heart attack", "irritable bowel syndrome", "rheumatoid arthritis","stomach pain", 
    "urinary tract infection", "chronic fatigue syndrome", "psoriasis", "asthma", "diabetes", "hypertension", 
    "cancer", "liver disease", "kidney failure", "stroke", "dementia", "arthritis", "pneumonia", "sepsis", 
    "epilepsy", "gastroesophageal reflux disease", "multiple sclerosis", "alzheimer's disease", "parkinson's disease", 
    "systemic lupus erythematosus", "diabetic neuropathy", "tuberculosis", "obesity", "cystic fibrosis", "hepatitis", 
    "meningitis", "sickle cell anemia", "hiv/aids", "celiac disease", "ulcerative colitis", "crohn's disease", 
    "chronic kidney disease", "fibromyalgia", "autoimmune disease", "anemia", "leukemia", "pneumothorax", 
    "lupus", "tetanus", "scleroderma", "rheumatic fever", "prostate cancer", "ovarian cancer", "breast cancer", 
    "gastric cancer", "non-hodgkin lymphoma", "hemophilia", "vitiligo", "severe malaria", "bronchitis", "gout", 
    "scabies", "hemorrhoids", "varicose veins", "hemophilia", "eczema", "chronic pain", "melanoma", "hearing loss", 
    "menstrual disorders", "anxiety", "depression", "bipolar disorder", "schizophrenia", "ptsd", "dyslexia", 
    "insomnia", "phobia", "hysteria", "attention deficit disorder", "migraines", "chronic back pain", "obstructive sleep apnea", 
    "epistaxis", "otitis media", "sinusitis", "bronchial asthma", "copd", "hypothyroidism", "hyperthyroidism", 
    "gout", "rickets", "hyperlipidemia", "cystitis", "spondylitis", "vascular dementia", "strokes", "fibroids"
    }

    medication_keywords = {
        "paracetamol", "ibuprofen", "aspirin", "metformin", "insulin", "atorvastatin", "omeprazole", "amoxicillin", 
    "losartan", "levothyroxine", "prednisone", "albuterol", "gabapentin", "sertraline", "amlodipine", "hydrochlorothiazide", 
    "clopidogrel", "lisinopril", "metoprolol", "simvastatin", "citalopram", "furosemide", "fluoxetine", "warfarin", 
    "trazodone", "cephalexin", "doxycycline", "rosuvastatin", "duloxetine", "pantoprazole", "hydrocodone", "tramadol", 
    "ciprofloxacin", "meloxicam", "escitalopram", "bupropion", "azithromycin", "ranitidine", "venlafaxine", "naproxen", 
    "ondansetron", "methotrexate", "mirtazapine", "spironolactone", "diazepam", "cyclobenzaprine", "diltiazem", 
    "metronidazole", "lorazepam", "morphine", "prednisolone", "famotidine", "baclofen", "clindamycin", "carvedilol", 
    "propranolol", "montelukast", "topiramate", "levofloxacin", "rivaroxaban", "apixaban", "cetirizine", 
    "diphenhydramine", "fentanyl", "hydroxyzine", "ivermectin", "ketorolac", "loratadine", "mefenamic acid", "methocarbamol", 
    "metformin xr", "metoclopramide", "nifedipine", "olmesartan", "phenytoin", "quetiapine", "risperidone", "sitagliptin", 
    "sulfasalazine", "tamsulosin", "terbinafine", "valacyclovir", "valsartan", "verapamil", "zolpidem", "tizanidine", 
    "clonazepam", "mometasone", "betamethasone", "fluticasone", "dexamethasone", "alprazolam", "acetaminophen", "esomeprazole", 
    "budesonide", "tiotropium", "cefuroxime", "erythromycin", "linezolid", "chlorpheniramine"
    } 

    # Generate n-grams (1-3 words)
    words = [token.text for token in doc]
    n_grams = []
    for n in range(1, 4):
        n_grams += [' '.join(words[i:i+n]) for i in range(len(words)-n+1)]

    symptoms = [term for term in n_grams if term in symptom_keywords]
    conditions = [term for term in n_grams if term in condition_keywords]
    medications = [term for term in n_grams if term in medication_keywords]

    return list(set(symptoms)), list(set(conditions)), list(set(medications))

file_path = r"C:/Users/MD.ZAID SHAIKH/Documents/transcription.txt"

# Read the text content from the file
with open(file_path, 'r', encoding='utf-8') as file:
    text_content = file.read()

# Extract medical entities
output_data = extract_medical_entities(text_content)

# Convert the tuple to a dictionary with meaningful keys
data_dict = {
    "symptoms": output_data[0],
    "conditions": output_data[1],
    "medications": output_data[2]
}

json_file_path = "medical_entities.json"

# Save to JSON file with correct format
with open(json_file_path, "w", encoding="utf-8") as json_file:
    json.dump(data_dict, json_file, indent=4, ensure_ascii=False)

print(f"JSON saved successfully at {json_file_path}")




JSON saved successfully at medical_entities.json


In [3]:
import google.generativeai as genai
import json

def load_entities_from_json(json_path):
    """Load medical entities from JSON file"""
    try:
        with open(json_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        return (data['symptoms'], data['conditions'], data['medications'])
    except Exception as e:
        print(f"Error loading entities: {e}")
        return ([], [], [])

def generate_soap_with_gemini(transcription_path, entities_json_path):
    """Generate SOAP note using transcription file and entities JSON"""
    try:
        # Read transcription text
        with open(transcription_path, 'r', encoding='utf-8') as f:
            original_text = f.read()
        
        # Load entities from JSON
        symptoms, conditions, medications = load_entities_from_json(entities_json_path)
        
        # Configure Google AI
        genai.configure(api_key="AIzaSyDooyEJKTTh6Dwj7ntEDpBzlf50rzdEk-M")
        model = genai.GenerativeModel('gemini-2.0-flash')
        
        # Generate prompt
        prompt = f"""
        Generate structured SOAP note from:
        Patient Statement: "{original_text}"
        Identified Symptoms: {symptoms}
        Medical Conditions: {conditions}
        Current Medications: {medications}
        
        Format:
        [SOAP Note]
        Subjective (S): <patient-reported info>
        Objective (O): <clinical findings>
        Assessment (A): <diagnosis analysis>
        Plan (P): <treatment plan>
        """
        
        response = model.generate_content(prompt)
        return response.text
        
    except Exception as e:
        return f"Error generating SOAP note: {e}"

# File paths (use raw strings for Windows paths)
transcription_file = r"C:/Users/MD.ZAID SHAIKH/Documents/transcription.txt"
entities_json = r"C:/Users/MD.ZAID SHAIKH/Documents/AI_Medical_Assistant/backend/models/services/medical_entities.json"

# Generate and print SOAP note
print(generate_soap_with_gemini(transcription_file, entities_json))

```
[SOAP Note]
Subjective (S): Patient reports extreme fatigue and constant joint pain. Even small movements are painful.

Objective (O): (Note: No objective data provided in the prompt. This section would typically include physical exam findings and lab results.) Further evaluation and testing pending.

Assessment (A): Possible rheumatoid arthritis. Differential diagnosis includes other forms of arthritis and inflammatory conditions.

Plan (P): Prescribed methotrexate and prednisone to manage inflammation. Further diagnostic testing (e.g., rheumatoid factor, anti-CCP antibody) will be conducted to confirm diagnosis. Patient to follow up for lab results and medication management.
```



In [4]:
from Bio import Entrez
import google.generativeai as genai
import json

# Configure Google Gemini
genai.configure(api_key="AIzaSyDooyEJKTTh6Dwj7ntEDpBzlf50rzdEk-M")

def load_entities(json_path):
    """Load medical entities from JSON file"""
    try:
        with open(json_path, 'r') as f:
            data = json.load(f)
        return {
            'symptoms': data.get('symptoms', []),
            'conditions': data.get('conditions', []),
            'medications': data.get('medications', [])
        }
    except Exception as e:
        print(f"Error loading entities: {e}")
        return {'symptoms': [], 'conditions': [], 'medications': []}

def fetch_pubmed_evidence(json_path, max_results=3):
    """Fetch research based on JSON entities"""
    entities = load_entities(json_path)
    
    # Build PubMed query
    search_terms = []
    search_terms += entities['symptoms']
    search_terms += entities['conditions']
    search_terms += [f"{med} therapy" for med in entities['medications']]
    query = " AND ".join(search_terms)

    # PubMed API call
    Entrez.email = "zaidshaikh98848@gmail.com"
    handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
    results = Entrez.read(handle)
    
    # Fetch articles
    articles = []
    for pubmed_id in results["IdList"]:
        with Entrez.efetch(db="pubmed", id=pubmed_id, retmode="xml") as handle:
            article = handle.read()
            articles.append(parse_article(article))
    
    return articles

def parse_article(article_xml):
    """Extract article details from bytes XML"""
    xml_str = article_xml.decode('utf-8')
    title = ""
    abstract = ""
    
    if "<ArticleTitle>" in xml_str and "</ArticleTitle>" in xml_str:
        title = xml_str.split("<ArticleTitle>")[1].split("</ArticleTitle>")[0]
    
    if "<AbstractText>" in xml_str and "</AbstractText>" in xml_str:
        abstract = xml_str.split("<AbstractText>")[1].split("</AbstractText>")[0]
    
    return {
        'title': title,
        'abstract': abstract
    }

def summarize_with_gemini(content):
    """Summarize using Google Gemini"""
    model = genai.GenerativeModel('gemini-2.0-flash')
    response = model.generate_content(
        f"Summarize this medical research in 2 sentences: {content}"
    )
    return response.text

def generate_evidence_report(json_path):
    """Generate evidence report"""
    articles = fetch_pubmed_evidence(json_path)
    
    report = "✅ Evidence Synthesis 📚\n"
    for idx, article in enumerate(articles, 1):
        summary = summarize_with_gemini(f"{article['title']}. {article['abstract']}")
        report += f"\n{idx}. Title: {article['title']}\n   Summary: {summary}\n"
    
    return report

# Example Usage
entities_json = r"C:/Users/MD.ZAID SHAIKH/Documents/AI_Medical_Assistant/backend/models/services/medical_entities.json"
print(generate_evidence_report(entities_json))

✅ Evidence Synthesis 📚

1. Title: A case report of a 53-year-old female with rheumatoid arthritis and osteoporosis: focus on lab testing and CAM therapies.
   Summary: This case report details a 53-year-old woman with rheumatoid arthritis and osteoporosis who, after discontinuing conventional RA medications due to side effects, experienced significant improvement in multiple health conditions, including bone density, through lab-directed CAM therapies involving dietary changes, nutritional support, and stress management. The patient was able to discontinue several medications and normalize antinuclear antibodies, suggesting the potential benefit of integrative approaches in managing complex chronic conditions.


2. Title: Women, men, and rheumatoid arthritis: analyses of disease activity, disease characteristics, and treatments in the QUEST-RA study.
   Summary: This research investigated how rheumatoid arthritis (RA) manifests and is treated differently in women and men using data fro

In [6]:
import os
import json
from datetime import datetime
from textwrap import fill
import google.generativeai as genai

# Configure Google Gemini
genai.configure(api_key="AIzaSyDooyEJKTTh6Dwj7ntEDpBzlf50rzdEk-M")

def generate_patient_report(transcription_path, entities_json_path):
    """
    Generate a comprehensive patient report including:
    - Patient summary
    - Identified medical entities
    - SOAP note
    - Evidence-based recommendations
    - Treatment plan
    """
    try:
        # Load transcription
        with open(transcription_path, 'r', encoding='utf-8') as f:
            original_text = f.read()
        
        # Load medical entities
        with open(entities_json_path, 'r', encoding='utf-8') as f:
            entities = json.load(f)
        
        # Initialize Gemini model
        model = genai.GenerativeModel('gemini-2.0-flash')
        
        # Generate report sections
        report_sections = []
        
        # 1. Header
        report_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        report_sections.append(f"PATIENT MEDICAL REPORT\nGenerated on: {report_date}\n{'='*50}\n")
        
        # 2. Patient Summary
        summary_prompt = f"""
        Create a concise 3-4 sentence summary of the patient's main concerns and medical context 
        based on this transcript:
        
        {original_text}
        
        Focus on the key medical issues while maintaining patient privacy.
        """
        patient_summary = model.generate_content(summary_prompt).text
        report_sections.append(f"PATIENT SUMMARY:\n{fill(patient_summary, width=80)}\n\n")
        
        # 3. Identified Medical Entities
        entities_str = "\n".join([
            f"- Symptoms: {', '.join(entities['symptoms']) or 'None identified'}",
            f"- Conditions: {', '.join(entities['conditions']) or 'None identified'}",
            f"- Medications: {', '.join(entities['medications']) or 'None identified'}"
        ])
        report_sections.append(f"IDENTIFIED MEDICAL ENTITIES:\n{entities_str}\n\n")
        
        # 4. SOAP Note
        soap_prompt = f"""
        Generate a professional SOAP note from:
        Patient Statement: "{original_text}"
        Identified Symptoms: {entities['symptoms']}
        Medical Conditions: {entities['conditions']}
        Current Medications: {entities['medications']}
        
        Format professionally with these sections:
        [Subjective]
        - Patient's reported symptoms and concerns
        - Relevant history
        
        [Objective]
        - Observable findings (hypothetical if not provided)
        - Vital signs (if mentioned)
        
        [Assessment]
        - Differential diagnosis
        - Most likely diagnosis
        
        [Plan]
        - Diagnostic tests recommended
        - Treatment recommendations
        - Follow-up plan
        """
        soap_note = model.generate_content(soap_prompt).text
        report_sections.append(f"CLINICAL ASSESSMENT (SOAP FORMAT):\n{soap_note}\n\n")
        
        # 5. Evidence-Based Recommendations
        evidence_prompt = f"""
        Based on these medical entities:
        Symptoms: {entities['symptoms']}
        Conditions: {entities['conditions']}
        Medications: {entities['medications']}
        
        Generate 3-5 evidence-based clinical recommendations including:
        - Diagnostic considerations
        - Treatment options with rationale
        - Lifestyle modifications
        - Red flags to watch for
        
        Format as a bulleted list with brief explanations.
        """
        evidence = model.generate_content(evidence_prompt).text
        report_sections.append(f"EVIDENCE-BASED RECOMMENDATIONS:\n{evidence}\n\n")
        
        # 6. Patient-Friendly Summary
        patient_prompt = f"""
        Create a patient-friendly version of the key findings and recommendations:
        - Use simple language (8th grade reading level)
        - Avoid medical jargon
        - Focus on actionable items
        - Include important warnings
        
        Format with clear headings and bullet points.
        """
        patient_version = model.generate_content(patient_prompt).text
        report_sections.append(f"PATIENT-FRIENDLY SUMMARY:\n{patient_version}\n\n")
        
        # Combine all sections
        full_report = "\n".join(report_sections)
        
        # Save report to file
        report_path = os.path.join(os.path.dirname(transcription_path), "patient_report.txt")
        with open(report_path, 'w', encoding='utf-8') as f:
            f.write(full_report)
        
        return full_report, report_path
    
    except Exception as e:
        return f"Error generating report: {str(e)}", None

# Example usage
if __name__ == "__main__":
    # Update these paths to match your system
    transcription_file = r"C:/Users/MD.ZAID SHAIKH/Documents/transcription.txt"
    entities_json = r"C:/Users/MD.ZAID SHAIKH/Documents/AI_Medical_Assistant/backend/models/services/medical_entities.json"
    
    report, path = generate_patient_report(transcription_file, entities_json)
    print(f"Report generated successfully at: {path}")
    print("\n=== REPORT CONTENT ===\n")
    print(report)

Report generated successfully at: C:/Users/MD.ZAID SHAIKH/Documents\patient_report.txt

=== REPORT CONTENT ===

PATIENT MEDICAL REPORT
Generated on: 2025-03-27 16:16:56

PATIENT SUMMARY:
The patient reports experiencing persistent and debilitating fatigue coupled
with constant joint pain, significantly impacting their mobility. Based on these
symptoms, the doctor suspects rheumatoid arthritis. To address the inflammation
and discomfort, the doctor has prescribed methotrexate and prednisone pending
further diagnostic testing.


IDENTIFIED MEDICAL ENTITIES:
- Symptoms: fatigue
- Conditions: arthritis, rheumatoid arthritis
- Medications: prednisone, methotrexate


CLINICAL ASSESSMENT (SOAP FORMAT):
Okay, here's a SOAP note based on the provided information, formatted professionally and with some plausible additions to create a more complete and realistic note.

**SOAP Note**

**Patient:**  [Patient Name]
**Date:** October 26, 2023
**MRN:** [Medical Record Number]

**[Subjective]**

*   **