> **Installing dependencies**

> **Installed torch audio soundfile to use audio datasets**

> **Installed jiwer, evaluate  to use wer metric**

In [1]:
!pip install datasets transformers torchaudio soundfile

Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch==2.6.0->torchaudio)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch==2.6.0->torchaudio)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch==2.6.0->torchaudio)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch==2.6.0->torchaudio)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch==2.6.0->torchaudio)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata

In [2]:
!pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.5-py3-none-any.whl.metadata (9.5 kB)
Downloading evaluate-0.4.5-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.5


In [3]:
!pip install jiwer

Collecting jiwer
  Downloading jiwer-4.0.0-py3-none-any.whl.metadata (3.3 kB)
Collecting rapidfuzz>=3.9.7 (from jiwer)
  Downloading rapidfuzz-3.14.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (12 kB)
Downloading jiwer-4.0.0-py3-none-any.whl (23 kB)
Downloading rapidfuzz-3.14.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m40.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: rapidfuzz, jiwer
Successfully installed jiwer-4.0.0 rapidfuzz-3.14.0


> **Built a pipeline using transformers.**

> **Used whisper for audio to speech.**

> **Used biomedical-ner-all for named entity recognition**

In [4]:
from datasets import load_dataset, Audio
from transformers import pipeline

# 1. Load dataset
dataset = load_dataset("jarvisx17/Medical-ASR-EN", split="train")

# Decode audio to waveform
dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))

# 1. Load the ASR pipeline
asr = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")

ner = pipeline("ner",
               model="d4data/biomedical-ner-all",
               aggregation_strategy="simple")


# 3. Take one sample (replace with your dataset loading logic)
sample = dataset[0]
audio = sample["audio"]["array"]
ground_truth = sample["transcription"]

# 4. Run ASR
prediction = asr(audio)
transcribed_text = prediction["text"]

print(f"🔊 Ground Truth: {ground_truth}")
print(f"📝 Transcribed Text: {transcribed_text}")

# 5. Run NER on transcribed text
entities = ner(transcribed_text)

# Print the raw entities for debugging
print("\n🏷️ Named Entities:")
for ent in entities:
    print(f" - {ent['word']} ({ent['entity_group']}, score={ent['score']:.2f})")

# 6. Build a medical fact sheet with CORRECT MAPPING
fact_sheet = {
    "Disease": [],
    "Symptom": [],
    "Medication": [],
    "Test": [],
    "Anatomy": []
}

# Define a mapping from the model's labels to our fact sheet categories
label_mapping = {
    'Disease_disorder': "Disease",
    'Sign_symptom': "Symptom",
    'Drug': "Medication",
    'Lab_value': "Test",
    'Biological_structure': "Anatomy", # This is the key fix
    'Body_structure': "Anatomy",       # Some models might use this
    'Body_part': "Anatomy",            # or this
    'Therapeutic_procedure': "Test",
    'Diagnostic_procedure': "Test",
}

for ent in entities:
    original_label = ent["entity_group"]
    value = ent["word"]

    # Find the best category for this label, default to None
    category = label_mapping.get(original_label)

    # Only add it if we found a valid category and it's not already in the list
    if category and value not in fact_sheet[category]:
        fact_sheet[category].append(value)

print("\n📌 Extracted Medical Information:")
print(fact_sheet)

2025-09-05 07:18:37.120864: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1757056717.308957      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1757056717.363039      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


README.md:   0%|          | 0.00/262 [00:00<?, ?B/s]

dataset_infos.json:   0%|          | 0.00/757 [00:00<?, ?B/s]

data/train-00000-of-00013-fd3c831bb1117e(…):   0%|          | 0.00/339M [00:00<?, ?B/s]

data/train-00001-of-00013-e247e67b9a3043(…):   0%|          | 0.00/382M [00:00<?, ?B/s]

data/train-00002-of-00013-ed628df5482800(…):   0%|          | 0.00/365M [00:00<?, ?B/s]

data/train-00003-of-00013-fe9679f67f8f40(…):   0%|          | 0.00/317M [00:00<?, ?B/s]

data/train-00004-of-00013-67ec6d116014bd(…):   0%|          | 0.00/370M [00:00<?, ?B/s]

data/train-00005-of-00013-8e7af61c1e5237(…):   0%|          | 0.00/365M [00:00<?, ?B/s]

data/train-00006-of-00013-67e6ab45871fa9(…):   0%|          | 0.00/368M [00:00<?, ?B/s]

data/train-00007-of-00013-a4fe9e64ba389d(…):   0%|          | 0.00/369M [00:00<?, ?B/s]

data/train-00008-of-00013-d40ec3e9af31cb(…):   0%|          | 0.00/328M [00:00<?, ?B/s]

data/train-00009-of-00013-03efb43a58d1fd(…):   0%|          | 0.00/371M [00:00<?, ?B/s]

data/train-00010-of-00013-703e62af8725c6(…):   0%|          | 0.00/323M [00:00<?, ?B/s]

data/train-00011-of-00013-4aa9012e50a0b2(…):   0%|          | 0.00/351M [00:00<?, ?B/s]

data/train-00012-of-00013-18d1b054ff2942(…):   0%|          | 0.00/327M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/6661 [00:00<?, ? examples/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/290M [00:00<?, ?B/s]

generation_config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json:   0%|          | 0.00/805 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

normalizer.json: 0.00B [00:00, ?B/s]

added_tokens.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

preprocessor_config.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/266M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/373 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

Device set to use cuda:0


🔊 Ground Truth: I have a painful cramp in my feet
📝 Transcribed Text:  I have a painful cramp in my feet.

🏷️ Named Entities:
 - painful (Detailed_description, score=1.00)
 - cramp (Sign_symptom, score=0.98)

📌 Extracted Medical Information:
{'Disease': [], 'Symptom': ['cramp'], 'Medication': [], 'Test': [], 'Anatomy': []}


**Evaluation of ASR and NER done below.**

In [5]:
import evaluate

# Load WER metric
wer_metric = evaluate.load("wer")

# Example: loop through dataset and compute WER
wer_scores = []
for i in range (5):   # change [:5] to process first 5 samples, or remove for all
    sample=dataset[i]
    audio = sample["audio"]["array"]
    ground_truth = sample["transcription"]

    # 1. Run ASR
    prediction = asr(audio)
    transcribed_text = prediction["text"]

    wer = wer_metric.compute(predictions=[transcribed_text], references=[ground_truth])
    wer_scores.append(wer)

    print(f"\n===== Sample {i+1} =====")
    print(f"🔊 Ground Truth: {ground_truth}")
    print(f"📝 Transcribed Text: {transcribed_text}")
    print(f"📉 WER: {wer:.3f}")
    # 2. Run NER
    entities = ner(transcribed_text)

    print("\n🏷️ Named Entities:")
    for ent in entities:
        print(f" - {ent['word']} ({ent['entity_group']}, score={ent['score']:.2f})")

    # 3. Initialize fact sheet
    fact_sheet = {
        "Disease": [],
        "Symptom": [],
        "Medication": [],
        "Test": [],
        "Anatomy": []
    }

    # 4. Define label mapping
    label_mapping = {
        'Disease_disorder': "Disease",
        'Sign_symptom': "Symptom",
        'Drug': "Medication",
        'Lab_value': "Test",
        'Biological_structure': "Anatomy",
        'Body_structure': "Anatomy",
        'Body_part': "Anatomy",
        'Therapeutic_procedure': "Test",
        'Diagnostic_procedure': "Test",
    }

    # 5. Fill fact sheet
    for ent in entities:
        original_label = ent["entity_group"]
        value = ent["word"]

        category = label_mapping.get(original_label)

        if category and value not in fact_sheet[category]:
            fact_sheet[category].append(value)

    # 6. Print structured info
    print("\n📌 Extracted Medical Information:")
    print(fact_sheet)


Downloading builder script: 0.00B [00:00, ?B/s]


===== Sample 1 =====
🔊 Ground Truth: I have a painful cramp in my feet
📝 Transcribed Text:  I have a painful cramp in my feet.
📉 WER: 0.125

🏷️ Named Entities:
 - painful (Detailed_description, score=1.00)
 - cramp (Sign_symptom, score=0.98)

📌 Extracted Medical Information:
{'Disease': [], 'Symptom': ['cramp'], 'Medication': [], 'Test': [], 'Anatomy': []}

===== Sample 2 =====
🔊 Ground Truth: The pain feels like it's right below the skin
📝 Transcribed Text:  The pain feels like it's right below the skin.
📉 WER: 0.111

🏷️ Named Entities:
 - pain (Coreference, score=0.95)
 - right (Biological_structure, score=0.53)
 - below the (Biological_structure, score=0.71)

📌 Extracted Medical Information:
{'Disease': [], 'Symptom': [], 'Medication': [], 'Test': [], 'Anatomy': ['right', 'below the']}

===== Sample 3 =====
🔊 Ground Truth: I feel suicidal.
📝 Transcribed Text:  I feel suicidal.
📉 WER: 0.000

🏷️ Named Entities:

📌 Extracted Medical Information:
{'Disease': [], 'Symptom': [], 'Medicat