In [44]:
from vespa.package import HNSW, ApplicationPackage, Component, Field, Parameter

app_package = ApplicationPackage(
    name="snowflake",
    components=[
        Component(
            id="snow",
            type="hugging-face-embedder",
            parameters=[
                Parameter(
                    "transformer-model",
                    {
                        "url": "https://huggingface.co/Snowflake/snowflake-arctic-embed-l/resolve/main/onnx/model_int8.onnx"
                    },
                ),
                Parameter(
                    "tokenizer-model",
                    {
                        "url": "https://huggingface.co/Snowflake/snowflake-arctic-embed-l/raw/main/tokenizer.json"
                    },
                ),
                Parameter(
                    "normalize",
                    {},  # Define as a simple string, not a dictionary
                    "true",  # Define as a simple string, not a dictionary
                ),
                Parameter(
                    "pooling-strategy",
                    {},
                    "cls",  # Define as a simple string, not a dictionary
                ),
            ],
        )
    ],
)

In [45]:
app_package.schema.add_fields(
    Field(name="id", type="int", indexing=["attribute", "summary"]),
    Field(
        name="doc", type="string", indexing=["index", "summary"], index="enable-bm25"
    ),
    Field(
        name="doc_embeddings",
        type="tensor<float>(c{},x[384])",
        indexing=["input doc", "embed", "index", "attribute"],
        ann=HNSW(distance_metric="prenormalized-angular"),
        is_document_field=False,
    ),
)

In [None]:
from vespa.package import RankProfile

app_package.schema.add_rank_profile(
    RankProfile(
        name="semantic",
        inputs=[("query(q)", "tensor<float>(x[384])")],
        inherits="default",
        first_phase="cos(distance(field,doc_embeddings))",
        match_features=["closest(doc_embeddings)"],
    )
)

app_package.schema.add_rank_profile(RankProfile(name="bm25", first_phase="bm25(doc)"))

In [46]:
# Query that the user is searching for
query = "How does Vespa handle real-time indexing and search?"

# List of documents simulating content in the search engine
documents = [
    "Vespa excels in real-time data indexing and its ability to search large datasets quickly.",  # Good BM25 and semantic match
    "Instant data availability and searchability are key features of the Vespa search engine.",  # Good BM25 and semantic match
    "With our search solution, real-time updates are seamlessly integrated into the search index, enhancing responsiveness.",  # Good BM25 and semantic match, no direct mention of "Vespa"
    "While not as robust as Vespa, our vector database strives to meet your search needs, despite certain, shall we say, 'flexible' features.",  # Cheeky apologetic reference from a competitor
    "Search engines like ours utilize complex algorithms to handle immediate data querying and indexing.",  # Semantic match, not direct keyword hit
    "Modern search platforms emphasize quick data retrieval from continuously updated indexes.",  # Semantic match, not direct keyword hit
    "Discover the history and cultural impact of the classic Italian Vespa scooter brand.",  # Keyword match (Vespa), no semantic relation to the search engine
    "Tips for maintaining your Vespa to ensure optimal performance and longevity of your scooter.",  # Keyword match (Vespa), no semantic relation to the search engine
    "Review of different scooter brands including Vespa, highlighting how they handle features like speed, cost, and aesthetics, and how consumers search for the best options.",  # Keyword match (Vespa), including 'handle' and 'search'
    "Vespa scooter safety regulations and best practices for urban commuting.",  # Keyword match (Vespa), no semantic relation to the search engine
]

In [47]:
app_package.to_files("snowflake")

In [48]:
from vespa.deployment import VespaDocker

vespa_docker = VespaDocker()
app = vespa_docker.deploy(app_package)

Waiting for configuration server, 0/60 seconds...
Using plain http against endpoint http://localhost:8080/ApplicationStatus
Waiting for application status, 0/300 seconds...
Using plain http against endpoint http://localhost:8080/ApplicationStatus
Waiting for application status, 5/300 seconds...
Using plain http against endpoint http://localhost:8080/ApplicationStatus
Waiting for application status, 10/300 seconds...
Using plain http against endpoint http://localhost:8080/ApplicationStatus
Waiting for application status, 15/300 seconds...
Using plain http against endpoint http://localhost:8080/ApplicationStatus
Waiting for application status, 20/300 seconds...
Using plain http against endpoint http://localhost:8080/ApplicationStatus
Waiting for application status, 25/300 seconds...
Using plain http against endpoint http://localhost:8080/ApplicationStatus
Waiting for application status, 30/300 seconds...
Using plain http against endpoint http://localhost:8080/ApplicationStatus
Waiting fo