**Install LangExtract**

In [None]:
!pip install langextract

Collecting langextract
  Downloading langextract-1.0.8-py3-none-any.whl.metadata (18 kB)
Collecting async_timeout>=4.0.0 (from langextract)
  Downloading async_timeout-5.0.1-py3-none-any.whl.metadata (5.1 kB)
Collecting exceptiongroup>=1.1.0 (from langextract)
  Downloading exceptiongroup-1.3.0-py3-none-any.whl.metadata (6.7 kB)
Collecting ml-collections>=0.1.0 (from langextract)
  Downloading ml_collections-1.1.0-py3-none-any.whl.metadata (22 kB)
Downloading langextract-1.0.8-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.0/85.0 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading async_timeout-5.0.1-py3-none-any.whl (6.2 kB)
Downloading exceptiongroup-1.3.0-py3-none-any.whl (16 kB)
Downloading ml_collections-1.1.0-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.7/76.7 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ml-collections, exceptiongroup, async_ti

**Import necessary packages**

In [None]:
import langextract as lx
import textwrap

**Extraction Steps**

In [None]:
# 1. Define the prompt and extraction rules
prompt = textwrap.dedent("""\
    Extract all phrases that express a sentiment.
    Use the exact text for the extraction. Do not paraphrase.
    For each sentiment, provide the following attributes:
    - category: The type of sentiment (e.g., 'positive', 'negative', 'neutral').
    - intensity: A score from 0.0 to 1.0 indicating the strength of the sentiment.""")

In [None]:
# 2. Provide a high-quality example to guide the model
examples = [
    lx.data.ExampleData(
        text= "The setup process was incredibly intuitive and the performance is amazing.",
        extractions=[
            lx.data.Extraction(
                extraction_class= "sentiment",
                extraction_text= "incredibly intuitive",
                attributes={"category": "positive", "intensity": 0.7}
            ),
            lx.data.Extraction(
                extraction_class= "sentiment",
                extraction_text= "amazing",
                attributes= {"category": "positive", "intensity": 0.9}
            )
        ]
    )
]

In [None]:
# 3. Prepare the input text for analysis and run the extraction
input_text = """
The city air was thick with the smell of rain and exhaust as Kaelen clutched the faded photograph.
A wave of profound sadness washed over him as he remembered happier times.
He had to find her. Across the street, a shadowy figure watched from an alley, a flicker of cold amusement in their eyes before they melted back into the darkness.
"""
result = lx.extract(
    text_or_documents=input_text,
    prompt_description= prompt,
    examples= examples,
    model_id="gemini-2.5-flash",
    api_key="AIzaSyDYrdzbGWffZasw9PFq41f0o565t0xEwaY"  # Only use this for testing
)

DEBUG:absl:Registered GeminiLanguageModel with patterns ['^gemini'] at priority 10
DEBUG:absl:Registered OllamaLanguageModel with patterns ['^gemma', '^llama', '^mistral', '^mixtral', '^phi', '^qwen', '^deepseek', '^command-r', '^starcoder', '^codellama', '^codegemma', '^tinyllama', '^wizardcoder', '^gpt-oss', '^meta-llama/[Ll]lama', '^google/gemma', '^mistralai/[Mm]istral', '^mistralai/[Mm]ixtral', '^microsoft/phi', '^Qwen/', '^deepseek-ai/', '^bigcode/starcoder', '^codellama/', '^TinyLlama/', '^WizardLM/'] at priority 10
DEBUG:absl:Registered OpenAILanguageModel with patterns ['^gpt-4', '^gpt4\\.', '^gpt-5', '^gpt5\\.'] at priority 10
2025-08-21 19:03:50,680 - langextract.debug - DEBUG - [langextract.inference] CALL: BaseLanguageModel.__init__(self=<GeminiLanguageModel>, constraint=Constraint(co...NONE: 'none'>), kwargs={})
2025-08-21 19:03:50,681 - langextract.debug - DEBUG - [langextract.inference] RETURN: BaseLanguageModel.__init__ -> None (0.0 ms)
2025-08-21 19:03:50,683 - langex

[92m✓[0m Extraction processing complete



INFO:absl:Finalizing annotation for document ID doc_73bb4009.
INFO:absl:Document annotation completed.


[92m✓[0m Extracted [1m3[0m entities ([1m1[0m unique types)
  [96m•[0m Time: [1m4.52s[0m
  [96m•[0m Speed: [1m75[0m chars/sec
  [96m•[0m Chunks: [1m1[0m


In [None]:
# 4. Save the results to a JSONL file
lx.io.save_annotated_documents([result], output_name="extraction_results.jsonl", output_dir=".")

[94m[1mLangExtract[0m: Saving to [92mextraction_results.jsonl[0m: 1 docs [00:00, 1568.55 docs/s]

[92m✓[0m Saved [1m1[0m documents to [92mextraction_results.jsonl[0m





In [None]:
# 5. Generate the visualization from the file
html_content = lx.visualize("extraction_results.jsonl")
with open("visualization.html", "w") as f:
    if hasattr(html_content, 'data'):
        f.write(html_content.data)  # For Colab
    else:
        f.write(html_content)

[94m[1mLangExtract[0m: Loading [92mextraction_results.jsonl[0m: 100%|██████████| 1.25k/1.25k [00:00<00:00, 2.99MB/s]

[92m✓[0m Loaded [1m1[0m documents from [92mextraction_results.jsonl[0m



