In [1]:
!pip install -q transformers torch accelerate sentencepiece

In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline
import warnings
warnings.filterwarnings('ignore')

print("Libraries imported successfully!")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

Libraries imported successfully!
PyTorch version: 2.9.0+cu126
CUDA available: True


In [3]:
# Load BioBERT model fine-tuned for QA
model_name = "dmis-lab/biobert-v1.1"  # Base BioBERT
# Alternative: "ktrapeznikov/biobert_v1.1_pubmed_squad_v2" for QA-specific

print("Loading BioBERT model...")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForQuestionAnswering.from_pretrained("ktrapeznikov/biobert_v1.1_pubmed_squad_v2")

# Create QA pipeline
qa_pipeline = pipeline(
    "question-answering",
    model=model,
    tokenizer=tokenizer,
    device=0 if torch.cuda.is_available() else -1
)

print("BioBERT model loaded successfully!")

Loading BioBERT model...


tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/462 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/465 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/433M [00:00<?, ?B/s]

Some weights of the model checkpoint at ktrapeznikov/biobert_v1.1_pubmed_squad_v2 were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cuda:0


model.safetensors:   0%|          | 0.00/433M [00:00<?, ?B/s]

BioBERT model loaded successfully!


In [None]:
def load_medical_encyclopedia(file_path='medical_encyclopedia.txt'):
    
    encyclopedia = {}

    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()

        # Split by separator
        sections = content.split('\n---\n')

        for section in sections:
            section = section.strip()
            if not section:
                continue

            # First line is the condition name
            lines = section.split('\n', 1)
            if len(lines) == 2:
                condition_name = lines[0].strip()
                description = lines[1].strip()
                encyclopedia[condition_name] = description

        print(f"Medical encyclopedia loaded from '{file_path}'")
        print(f"Found {len(encyclopedia)} conditions")
        print(f"Conditions: {', '.join(encyclopedia.keys())}")

        return encyclopedia

    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found!")
        print("   Please make sure the medical_encyclopedia.txt file exists.")
        return {}
    except Exception as e:
        print(f"Error loading encyclopedia: {str(e)}")
        return {}

# Load the medical encyclopedia from file
MEDICAL_ENCYCLOPEDIA = load_medical_encyclopedia('medical_encyclopedia.txt')

if not MEDICAL_ENCYCLOPEDIA:
    print("\nWarning: Medical encyclopedia is empty. Please check the file.")

Medical encyclopedia loaded from 'medical_encyclopedia.txt'
Found 6 conditions
Conditions: Influenza (Flu), Common Cold, COVID-19, Migraine, Strep Throat, Asthma


In [5]:
def check_symptoms(question, top_k=3, min_score=0.01):
    """
    Check symptoms against medical encyclopedia using BioBERT QA.

    Args:
        question: User's question about symptoms
        top_k: Number of top conditions to check
        min_score: Minimum confidence score threshold

    Returns:
        List of answers with confidence scores
    """
    results = []

    print(f"\nSearching medical encyclopedia for: '{question}'\n")

    # Query each medical condition
    for condition, context in MEDICAL_ENCYCLOPEDIA.items():
        try:
            # Get answer from BioBERT
            answer = qa_pipeline(
                question=question,
                context=context,
                max_answer_len=200,
                handle_impossible_answer=True
            )

            if answer['score'] >= min_score:
                results.append({
                    'condition': condition,
                    'answer': answer['answer'],
                    'score': answer['score'],
                    'context': context
                })
        except Exception as e:
            print(f"Error processing {condition}: {str(e)}")

    # Sort by confidence score
    results.sort(key=lambda x: x['score'], reverse=True)

    # Display top results
    if results:
        print(f"\nTop {min(top_k, len(results))} Results:\n")
        for i, result in enumerate(results[:top_k], 1):
            print(f"\n{i}. {result['condition']}")
            print(f"   Confidence: {result['score']:.2%}")
            print(f"   Answer: {result['answer']}")
            print("   " + "-" * 65)
    else:
        print("\nNo relevant information found. Please rephrase your question.")

    return results[:top_k]

print("Symptom checker function ready!")

Symptom checker function ready!


In [6]:
# Example 1: Flu symptoms
check_symptoms("What are symptoms of flu?")


Searching medical encyclopedia for: 'What are symptoms of flu?'


Top 3 Results:


1. Migraine
   Confidence: 99.88%
   Answer: 
   -----------------------------------------------------------------

2. Strep Throat
   Confidence: 99.84%
   Answer: 
   -----------------------------------------------------------------

3. Asthma
   Confidence: 99.84%
   Answer: 
   -----------------------------------------------------------------


[{'condition': 'Migraine',
  'answer': '',
  'score': 0.9987974166870117,
  'context': 'A migraine is a neurological condition characterized by intense, debilitating headaches.\nMigraines are often accompanied by other symptoms and can significantly impact daily activities.\n\nSymptoms of migraine include:\n- Intense throbbing or pulsing pain, usually on one side of the head\n- Sensitivity to light (photophobia)\n- Sensitivity to sound (phonophobia)\n- Nausea and vomiting\n- Blurred vision\n- Visual disturbances (aura) such as seeing flashing lights or zigzag patterns\n- Lightheadedness or dizziness\n- Tingling or numbness in the face or extremities\n\nMigraines can last from 4 hours to 3 days. Treatment includes pain relievers, preventive\nmedications, lifestyle modifications, and avoiding known triggers.'},
 {'condition': 'Strep Throat',
  'answer': '',
  'score': 0.9984400272369385,
  'context': 'Strep throat is a bacterial infection caused by group A Streptococcus bacteria. It prim

In [7]:
# Example 2: COVID symptoms
check_symptoms("What are the symptoms of COVID-19?")

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset



Searching medical encyclopedia for: 'What are the symptoms of COVID-19?'


Top 3 Results:


1. Asthma
   Confidence: 99.91%
   Answer: 
   -----------------------------------------------------------------

2. Strep Throat
   Confidence: 99.80%
   Answer: 
   -----------------------------------------------------------------

3. Migraine
   Confidence: 99.73%
   Answer: 
   -----------------------------------------------------------------


[{'condition': 'Asthma',
  'answer': '',
  'score': 0.9991257786750793,
  'context': 'Asthma is a chronic respiratory condition where airways become inflamed and narrowed,\nmaking breathing difficult. It can range from mild to severe and life-threatening.\n\nSymptoms of asthma include:\n- Shortness of breath\n- Chest tightness or pain\n- Wheezing (a whistling sound when breathing)\n- Coughing, especially at night or early morning\n- Difficulty breathing or rapid breathing\n- Trouble sleeping due to breathing problems\n- Fatigue during physical activity\n\nAsthma symptoms can be triggered by allergens, exercise, cold air, stress, or respiratory\ninfections. Treatment includes avoiding triggers, using inhaled medications (bronchodilators\nand corticosteroids), and having an asthma action plan.'},
 {'condition': 'Strep Throat',
  'answer': '',
  'score': 0.9979684352874756,
  'context': 'Strep throat is a bacterial infection caused by group A Streptococcus bacteria. It primarily\naffects 

In [8]:
# Example 3: Headache-related query
check_symptoms("What causes severe headaches with sensitivity to light?")


Searching medical encyclopedia for: 'What causes severe headaches with sensitivity to light?'


Top 3 Results:


1. Strep Throat
   Confidence: 99.94%
   Answer: 
   -----------------------------------------------------------------

2. Asthma
   Confidence: 99.94%
   Answer: 
   -----------------------------------------------------------------

3. Influenza (Flu)
   Confidence: 99.89%
   Answer: 
   -----------------------------------------------------------------


[{'condition': 'Strep Throat',
  'answer': '',
  'score': 0.999437689781189,
  'context': 'Strep throat is a bacterial infection caused by group A Streptococcus bacteria. It primarily\naffects the throat and tonsils and is highly contagious.\n\nSymptoms of strep throat include:\n- Sudden, severe sore throat\n- Pain when swallowing\n- Red and swollen tonsils, sometimes with white patches or streaks of pus\n- Tiny red spots on the roof of the mouth\n- Swollen, tender lymph nodes in the neck\n- Fever (101°F or higher)\n- Headache\n- Rash (scarlet fever)\n- Nausea or vomiting (especially in children)\n\nUnlike viral sore throats, strep throat typically does not cause cough or runny nose.\nIt requires antibiotic treatment to prevent complications. A throat swab test confirms diagnosis.'},
 {'condition': 'Asthma',
  'answer': '',
  'score': 0.9993599653244019,
  'context': 'Asthma is a chronic respiratory condition where airways become inflamed and narrowed,\nmaking breathing difficult. It c

In [9]:
# Example 4: Respiratory symptoms
check_symptoms("What are the symptoms of breathing problems?")


Searching medical encyclopedia for: 'What are the symptoms of breathing problems?'


Top 3 Results:


1. Migraine
   Confidence: 99.95%
   Answer: 
   -----------------------------------------------------------------

2. Influenza (Flu)
   Confidence: 99.92%
   Answer: 
   -----------------------------------------------------------------

3. Common Cold
   Confidence: 99.88%
   Answer: 
   -----------------------------------------------------------------


[{'condition': 'Migraine',
  'answer': '',
  'score': 0.999517023563385,
  'context': 'A migraine is a neurological condition characterized by intense, debilitating headaches.\nMigraines are often accompanied by other symptoms and can significantly impact daily activities.\n\nSymptoms of migraine include:\n- Intense throbbing or pulsing pain, usually on one side of the head\n- Sensitivity to light (photophobia)\n- Sensitivity to sound (phonophobia)\n- Nausea and vomiting\n- Blurred vision\n- Visual disturbances (aura) such as seeing flashing lights or zigzag patterns\n- Lightheadedness or dizziness\n- Tingling or numbness in the face or extremities\n\nMigraines can last from 4 hours to 3 days. Treatment includes pain relievers, preventive\nmedications, lifestyle modifications, and avoiding known triggers.'},
 {'condition': 'Influenza (Flu)',
  'answer': '',
  'score': 0.9992469549179077,
  'context': 'Influenza, commonly known as the flu, is a contagious respiratory illness caused by i

In [10]:
# Example 5: Sore throat
check_symptoms("What causes severe sore throat with fever?")


Searching medical encyclopedia for: 'What causes severe sore throat with fever?'


Top 3 Results:


1. Migraine
   Confidence: 99.90%
   Answer: 
   -----------------------------------------------------------------

2. Influenza (Flu)
   Confidence: 99.76%
   Answer: 
   -----------------------------------------------------------------

3. Asthma
   Confidence: 99.67%
   Answer: 
   -----------------------------------------------------------------


[{'condition': 'Migraine',
  'answer': '',
  'score': 0.998975932598114,
  'context': 'A migraine is a neurological condition characterized by intense, debilitating headaches.\nMigraines are often accompanied by other symptoms and can significantly impact daily activities.\n\nSymptoms of migraine include:\n- Intense throbbing or pulsing pain, usually on one side of the head\n- Sensitivity to light (photophobia)\n- Sensitivity to sound (phonophobia)\n- Nausea and vomiting\n- Blurred vision\n- Visual disturbances (aura) such as seeing flashing lights or zigzag patterns\n- Lightheadedness or dizziness\n- Tingling or numbness in the face or extremities\n\nMigraines can last from 4 hours to 3 days. Treatment includes pain relievers, preventive\nmedications, lifestyle modifications, and avoiding known triggers.'},
 {'condition': 'Influenza (Flu)',
  'answer': '',
  'score': 0.9975820183753967,
  'context': 'Influenza, commonly known as the flu, is a contagious respiratory illness caused by i

In [None]:
def get_condition_details(condition_name):
    if condition_name in MEDICAL_ENCYCLOPEDIA:
        print("\n" + "="*70)
        print(f"DETAILED INFORMATION: {condition_name}")
        print("="*70 + "\n")
        print(MEDICAL_ENCYCLOPEDIA[condition_name])
        print("\n" + "="*70)

        # Ask specific questions about the condition
        questions = [
            "What are the main symptoms?",
            "How long does it last?",
            "What is the treatment?"
        ]

        print("\nAI-Extracted Information:\n")
        for q in questions:
            answer = qa_pipeline(
                question=q,
                context=MEDICAL_ENCYCLOPEDIA[condition_name]
            )
            print(f"Q: {q}")
            print(f"A: {answer['answer']} (Confidence: {answer['score']:.2%})\n")
    else:
        print(f"Condition '{condition_name}' not found in encyclopedia.")
        print(f"Available conditions: {', '.join(MEDICAL_ENCYCLOPEDIA.keys())}")

# Example: Get details about Influenza
get_condition_details("Influenza (Flu)")


DETAILED INFORMATION: Influenza (Flu)

Influenza, commonly known as the flu, is a contagious respiratory illness caused by influenza viruses.

Common symptoms of flu include:
- Fever or feeling feverish/chills (not everyone with flu will have a fever)
- Cough
- Sore throat
- Runny or stuffy nose
- Muscle or body aches
- Headaches
- Fatigue (tiredness)
- Some people may have vomiting and diarrhea, though this is more common in children than adults

The flu typically comes on suddenly and symptoms are usually more severe than the common cold.
Most people recover within a few days to less than two weeks. Treatment includes rest, fluids,
and over-the-counter medications. Antiviral drugs may be prescribed in some cases.


AI-Extracted Information:

Q: What are the main symptoms?
A: Fever or feeling feverish/chills (Confidence: 49.19%)

Q: How long does it last?
A: a few days to less than two weeks (Confidence: 14.51%)

Q: What is the treatment?
A: rest, fluids,
and over-the-counter medicat

In [12]:
def save_symptom_report(question, results, filename="symptom_report.txt"):
    """
    Save symptom check results to a text file.

    Args:
        question: The symptom question asked
        results: Results from check_symptoms()
        filename: Output filename
    """
    with open(filename, 'w') as f:
        f.write("="*70 + "\n")
        f.write("MEDICAL SYMPTOM CHECKER REPORT\n")
        f.write("="*70 + "\n\n")
        f.write("DISCLAIMER: This is for educational purposes only.\n")
        f.write("   Always consult healthcare professionals for medical advice.\n\n")
        f.write("-"*70 + "\n\n")
        f.write(f"Question: {question}\n\n")
        f.write("="*70 + "\n")
        f.write("RESULTS\n")
        f.write("="*70 + "\n\n")

        for i, result in enumerate(results, 1):
            f.write(f"{i}. {result['condition']}\n")
            f.write(f"   Confidence: {result['score']:.2%}\n")
            f.write(f"   Answer: {result['answer']}\n")
            f.write("   " + "-"*65 + "\n\n")

    print(f"Report saved to {filename}")

# Example: Save a report
results = check_symptoms("What are symptoms of flu?", top_k=3)
save_symptom_report("What are symptoms of flu?", results)

# Download the file
from google.colab import files
files.download('symptom_report.txt')


Searching medical encyclopedia for: 'What are symptoms of flu?'


Top 3 Results:


1. Migraine
   Confidence: 99.88%
   Answer: 
   -----------------------------------------------------------------

2. Strep Throat
   Confidence: 99.84%
   Answer: 
   -----------------------------------------------------------------

3. Asthma
   Confidence: 99.84%
   Answer: 
   -----------------------------------------------------------------
Report saved to symptom_report.txt


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>