In [None]:
%pip install -qq langextract python-dotenv

# Google Langextract Demo for Labs Extraction

This is a demonstration of using [Google Langextract](https://github.com/google/langextract) for extracting laboratory test results from unstructured text.

In [None]:
from dotenv import load_dotenv

load_dotenv()

In [None]:
import langextract as lx
import textwrap

# 1. Define the prompt and extraction rules
prompt = textwrap.dedent("""\
    You are given a lab panel (e.g. CMP) in markdown format. Your goal is to extract individual lab tests and their results. 
    You should also identify the normal ranges for each test when available.
    Provide all attributes for each test, including units, values, normal ranges (when available), and dates.
""")

# 2. Provide a high-quality example to guide the model
examples = [
    lx.data.ExampleData(
        text=textwrap.dedent("""\
## Hematology Results (Anonymized)

| Component                    | June 11, 2021 | Dec 24, 2024  | 
|------------------------------|--------------|--------------|
| **WBC (K/uL)**               | 5.9          | 6.2          |         
| Normal Range: 4.0 - 11.0 K/uL|              |                   |              
| **Neutrophil %**             | 27.9%        | 32.4%        |       
                             """),
        extractions=[
            lx.data.Extraction(
                extraction_class="lab",
                extraction_text="WBC",
                attributes={"unit": "K/uL", "value": "5.9", "range_low": "4.0", "range_high": "11.0", "date": "June 11, 2021"}
            ),
            lx.data.Extraction(
                extraction_class="lab",
                extraction_text="Neutrophil %",
                attributes={"unit": "%", "value": "27.9", "range_low": "n/a", "range_high": "n/a", "date": "June 11, 2021"}
            ),
             lx.data.Extraction(
                extraction_class="lab",
                extraction_text="WBC",
                attributes={"unit": "K/uL", "value": "6.2", "range_low": "4.0", "range_high": "11.0", "date": "Dec 24, 2024"}
            ),
            lx.data.Extraction(
                extraction_class="lab",
                extraction_text="Neutrophil %",
                attributes={"unit": "%", "value": "32.4", "range_low": "n/a", "range_high": "n/a", "date": "Dec 24, 2024"}
            ),
        ]
    )
]

In [None]:
# The input text to be processed. This is markdown of an actual lab report that's anonymized
input_text = textwrap.dedent("""\
## Test Results (Anonymized)

| Component                       | Jun 05, 2018 | Feb 12, 2019 | Dec 15, 2020 | Jul 01, 2021 | Nov 10, 2022 |
|---------------------------------|--------------|--------------|--------------|--------------|--------------|
| **Sodium** | 137 mmol/L   | 140 mmol/L   | 142 mmol/L   | 138 mmol/L   | 139 mmol/L   |
| _Normal Range:_ 135 - 145 mmol/L|              |              |              |              |              |
| **Urea Nitrogen, Ser/Plas** | -            | 10 mg/dL     | 11 mg/dL     | 13 mg/dL     | 14 mg/dL     |
| _Normal Range:_ 6 - 20 mg/dL / 8 - 23 mg/dL|    |              |              |              |              |
| **Calcium** | 9.2 mg/dL    | 9.5 mg/dL    | 9.4 mg/dL    | 9.7 mg/dL    | 9.3 mg/dL    |
| _Normal Range:_ 8.4 - 10.2 mg/dL / 8.4 - 10.5 mg/dL |     |              |              |              |              |
| **Protein, Total, Ser/Plas** | 6.9 g/dL     | 7.5 g/dL     | 7.8 g/dL     | 7.1 g/dL     | 7.4 g/dL     |
| _Normal Range:_ 6.0 - 8.3 g/dL  |              |              |              |              |              |
| **Albumin, Ser/Plas** | 4.5 g/dL     | 4.7 g/dL     | 5.0 g/dL     | 4.5 g/dL     | 4.8 g/dL     |
| _Normal Range:_ 3.5 - 5.2 g/dL  |              |              |              |              |              |
| **Bilirubin Total** | 0.3 mg/dL    | 0.3 mg/dL    | 0.4 mg/dL    | 0.2 mg/dL    | 0.3 mg/dL    |
| _Normal Range:_ Less than <1.2 mg/dL |           |              |              |              |              |
| **Alkaline Phosphatase (ALK P'TASE), Total, Ser/Plas** | 38 U/L | 40 U/L | 45 U/L | 41 U/L | 40 U/L |
| _Normal Range:_ 35 - 105 U/L    |              |              |              |              |              |
| **AST (Aspartate Aminotransferase)** | 21 U/L  | 18 U/L       | 25 U/L       | 22 U/L       | 23 U/L       |
| _Normal Range:_ 10 - 35 U/L     |              |              |              |              |              |
| **ALT (Alanine Aminotransferase)** | 16 U/L   | 17 U/L       | 20 U/L       | 21 U/L       | 18 U/L       |
| _Normal Range:_ 10 - 35 U/L     |              |              |              |              |              |
| **Globulin** | 2.4 g/dL     | 2.8 g/dL     | 2.8 g/dL     | 2.6 g/dL     | 2.6 g/dL     |
| _Normal Range:_ 2.0 - 5.0 g/dL  |              |              |              |              |              |
| **Potassium** | 4.1 mmol/L   | 4.6 mmol/L   | 4.0 mmol/L   | 4.5 mmol/L   | 4.2 mmol/L   |
| _Normal Range:_ 3.5 - 5.5 mmol/L|              |              |              |              |              |
| **Chloride, Ser/Plas** | 101 mmol/L   | 103 mmol/L   | 105 mmol/L   | 100 mmol/L   | 102 mmol/L   |
| _Normal Range:_ 98 - 107 mmol/L |              |              |              |              |              |
| **CO2, Ser/Plas** | 26 mmol/L    | 31 mmol/L    | 25 mmol/L    | 28 mmol/L    | 26 mmol/L    |
| _Normal Range:_ 22 - 29 mmol/L  |              |              |              |              |              |
| **Anion Gap** | 10 mmol/L    | 6 mmol/L     | 12 mmol/L    | 10 mmol/L    | 11 mmol/L    |
| _Normal Range:_ 5 - 15 mmol/L   |              |              |              |              |              |
| **Fasting** | Yes          | No           |              |              |              |
| **Glucose, SER/PLAS** | 104 mg/dL    | 88 mg/dL     | 97 mg/dL     | 105 mg/dL    | -            |
| _Normal Range:_ 70 - 100 mg/dL / 70 - 140 mg/dL |  |         |              |              |              |
| **Creatinine** | 0.75 mg/dL   | 0.71 mg/dL   | 0.76 mg/dL   | 0.81 mg/dL   | 0.85 mg/dL   |
| _Normal Range:_ 0.51 - 0.95 mg/dL|              |              |              |              |              |
| **eGFR** | 88 mL/min/1.73 m² | 95 mL/min/1.73 m² | 89 mL/min/1.73 m² | 83 mL/min/1.73 m² | 79 mL/min/1.73 m² |
| _Normal Range:_ Greater than >60 mL/min/1.73 m² |              |              |              |              |             |

> **Note:** Glucose entries are manually recorded; check EMR for more details.
""")

# Run the extraction
result = lx.extract(
    text_or_documents=input_text,
    prompt_description=prompt,
    examples=examples,
    model_id="gemini-2.5-flash",
    extraction_passes=2,
    max_char_buffer=2000
)

In [None]:
# Save the results to a JSONL file
lx.io.save_annotated_documents([result], output_name="extraction_results.jsonl", output_dir="./outputs")

# Generate the visualization from the file
html_content = lx.visualize("./outputs/extraction_results.jsonl")
with open("./outputs/visualization.html", "w") as f:
    if hasattr(html_content, 'data'):
        f.write(html_content.data)  # For Jupyter/Colab
    else:
        f.write(html_content)