In [None]:
from dotenv import load_dotenv
from babble_foundry.openrouter import OpenRouter

display(load_dotenv(override=True))

client = OpenRouter()

In [5]:
from pydantic import BaseModel, Field

class CalendarEvent(BaseModel):
    name: str = Field(
        description="The name or title of the event",
        min_length=1,
        max_length=100,
    )
    date: str = Field(
        description="The date of the event in YYYY-MM-DD format",
        pattern=r"^\d{4}-\d{2}-\d{2}$",
    )
    participants: list[str] = Field(
        description="List of people attending the event",
        min_length=1,
    )
    num_participants: int = Field(
        description="Number of people attending the event"
    )

response = client.chat(
    model="tencent/hunyuan-a13b-instruct:free",
    messages=[
        {"role": "system", "content": "Extract the event information."},
        {"role": "user", "content": "Alice and Bob are going to a science fair on February 25th, 2025."},
    ],
    response_format={
        "type": "json_schema",
        "json_schema": {
            "name": "CalendarEvent",
            "schema": CalendarEvent.model_json_schema()
        },
    },
    verbose=True
)

display(CalendarEvent.model_validate_json(response["choices"][0]["message"]["content"]))

{
    "name": "Alice and Bob",
    "date": "2025-02-25",
    "participants": ["Alice", "Bob"],
    "num_participants": 2
}


CalendarEvent(name='Alice and Bob', date='2025-02-25', participants=['Alice', 'Bob'], num_participants=2)

In [6]:
from typing import Literal, Optional, get_args

history_of_huggingface = """
The company was founded in 2016 by French entrepreneurs Clément Delangue, Julien Chaumond, and Thomas Wolf in New York City, originally as a company that developed a chatbot app targeted at teenagers. The company was named after the U+1F917 🤗 HUGGING FACE emoji. After open sourcing the model behind the chatbot, the company pivoted to focus on being a platform for machine learning.
In March 2021, Hugging Face raised US$40 million in a Series B funding round.
On April 28, 2021, the company launched the BigScience Research Workshop in collaboration with several other research groups to release an open large language model. In 2022, the workshop concluded with the announcement of BLOOM, a multilingual large language model with 176 billion parameters.
In December 2022, the company acquired Gradio, an open source library built for developing machine learning applications in Python.
On May 5, 2022, the company announced its Series C funding round led by Coatue and Sequoia. The company received a $2 billion valuation.
On August 3, 2022, the company announced the Private Hub, an enterprise version of its public Hugging Face Hub that supports SaaS or on-premises deployment.
In February 2023, the company announced partnership with Amazon Web Services (AWS) which would allow Hugging Face's products to be available to AWS customers to use them as the building blocks for their custom applications. The company also said the next generation of BLOOM will be run on Trainium, a proprietary machine learning chip created by AWS.
In August 2023, the company announced that it raised $235 million in a Series D funding round, at a $4.5 billion valuation. The funding was led by Salesforce and notable participation came from Google, Amazon, Nvidia, AMD, Intel, IBM, and Qualcomm.
In June 2024, the company announced, along with Meta and Scaleway, their launch of a new AI accelerator program for European startups. This initiative aims to help startups integrate open foundation models into their products, accelerating the EU AI ecosystem. The program, based at STATION F in Paris, will run from September 2024 to February 2025. Selected startups will receive mentoring, access to AI models and tools, and Scaleway’s computing power.
On September 23, 2024, to further the International Decade of Indigenous Languages, Hugging Face teamed up with Meta and UNESCO to launch a new online language translator built on Meta's No Language Left Behind open-source AI model, enabling free text translation across 200 languages, including many low-resource languages.
On April 2025, Hugging Face announced that they acquired a humanoid robotics startup, Pollen Robotics. Pollen Robotics is a France based Robotics Startup founded by Matthieu Lapeyre and Pierre Rouanet in 2016. In an X tweet, Clément Delangue, CEO of Hugging Face, shared his vision to make Artificial Intelligence robotics Open Source.
""".strip()

NamedEntityType = Literal["organization", "person", "location", "event", "product"]

class NamedEntity(BaseModel):
    type: NamedEntityType = Field(description="Type of the named entity")
    value: str = Field(description="Name of the entity as seen in the text")

class DateEntity(BaseModel):
    type: Literal["date"]
    value: str = Field(description="The raw string of the date in the text")
    year: Optional[int] = Field(None, ge=1000, le=9999)
    month: Optional[int] = Field(None, ge=1, le=12)
    day: Optional[int] = Field(None, ge=1, le=31)

class MonetaryValue(BaseModel):
    type: Literal["monetary_value"]
    value: str = Field(description="The raw string of the monetary value in the text")
    amount: int = Field(description="Monetary value to the nearest full amount", ge=1)
    currency: Optional[str] = Field(description="Currency, e.g. USD")

class RetrievedEntities(BaseModel):
    entities: list[NamedEntity | DateEntity | MonetaryValue] = Field(description="A list of entities in the text")

system_message = """
# Task
You are performing Named Entity Recognition on the given <text>. Extract entities and classify them into the appropriate structured format.

- Named entities: {named_entities}
- Dates: years, dates, time periods (e.g., "2016" → {{"year": 2016}}, "March 2020" → {{"year": 2020, "month": 3}})
- Monetary values: funding, valuations, prices (e.g., "$40 million" → {{"amount": 40000000, "currency": "USD"}})

## Requirements
- Extract ALL entities of these types in the order they appear in the <text>.
- Output each entity in its own single line.

## Example
<example_text>
Apple Inc. was founded by Steve Jobs in Cupertino in 1976 and received US$1.3 million in funding for their first product launch event.
</example_text>

<example_output>
{{
    "entities": [
        {{"type": "organization", "name": "Apple Inc."}},
        {{"type": "person", "name": "Steve Jobs"}},
        {{"type": "location", "name": "Cupertino"}},
        {{"type": "date", "value": "1796", "year": 1976}},
        {{"type": "monetary_value", "value": "US$1.3 million", "amount": 1300000, "currency": "USD"}},
        {{"type": "event", "name": "product launch event"}}
    ]
}}
</example_output>

""".strip().format(named_entities=', '.join(list(get_args(NamedEntityType))))

user_message = f"""
## Input Text
<text>
{history_of_huggingface}
</text>

Extract the entities now.
""".strip()

stream = client.chat(
    # model="tencent/hunyuan-a13b-instruct:free",
    model="meta-llama/llama-3.1-405b-instruct:free",
    messages=[
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_message},
    ],
    response_format={
        "type": "json_schema",
        "json_schema": {
            "name": "NamedEntityRecognition",
            "schema": RetrievedEntities.model_json_schema()
        },
    },
    stream=True,
    reasoning={"enabled": False, "max_tokens": 0},
    verbose=True
)
chunks = list(stream)

content = "".join([chunk["choices"][0]["delta"]["content"] for chunk in chunks])
display(RetrievedEntities.model_validate_json(content).entities)

{
    "entities": [
        {"type": "date", "value": "2016", "year": 2016},
        {"type": "person", "value": "ClÃ©ment Delangue"},
        {"type": "person", "value": "Julien Chaumond"},
        {"type": "person", "value": "Thomas Wolf"},
        {"type": "location", "value": "New York City"},
        {"type": "organization", "value": "Hugging Face"},
        {"type": "date", "value": "March 2021", "year": 2021, "month": 3},
        {"type": "monetary_value", "value": "US$40 million", "amount": 40000000, "currency": "USD"},
        {"type": "event", "value": "BigScience Research Workshop"},
        {"type": "date", "value": "April 28, 2021", "year": 2021, "month": 4, "day": 28},
        {"type": "date", "value": "2022", "year": 2022},
        {"type": "product", "value": "BLOOM"},
        {"type": "organization", "value": "Gradio"},
        {"type": "date", "value": "December 2022", "year": 2022, "month": 12},
        {"type": "date", "value": "May 5, 2022", "year": 2022, "month": 

[DateEntity(type='date', value='2016', year=2016, month=None, day=None),
 NamedEntity(type='person', value='ClÃ©ment Delangue'),
 NamedEntity(type='person', value='Julien Chaumond'),
 NamedEntity(type='person', value='Thomas Wolf'),
 NamedEntity(type='location', value='New York City'),
 NamedEntity(type='organization', value='Hugging Face'),
 DateEntity(type='date', value='March 2021', year=2021, month=3, day=None),
 MonetaryValue(type='monetary_value', value='US$40 million', amount=40000000, currency='USD'),
 NamedEntity(type='event', value='BigScience Research Workshop'),
 DateEntity(type='date', value='April 28, 2021', year=2021, month=4, day=28),
 DateEntity(type='date', value='2022', year=2022, month=None, day=None),
 NamedEntity(type='product', value='BLOOM'),
 NamedEntity(type='organization', value='Gradio'),
 DateEntity(type='date', value='December 2022', year=2022, month=12, day=None),
 DateEntity(type='date', value='May 5, 2022', year=2022, month=5, day=5),
 NamedEntity(type=