In [None]:
#| default_exp query

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
#|hide
from nbdev import *
from fastcore.test import *
from fastcore.utils import *

# query
> Reference API related to QueryModel related code

In [None]:
#|export
from typing import Optional, Dict, Callable
from fastcore.utils import patch

## Match Filters

In [None]:
#|export
class MatchFilter(object):
    def __init__(self) -> None:    
        "Abstract class for match filters."
        pass

In [None]:
#|export
#|hide
@patch
def create_match_filter(
    self: MatchFilter, 
    query: str  # Query input.
) -> str:  # Part of the YQL expression related to the filter.
    "Abstract method to be implemented that creates part of the YQL expression related to the filter."
    raise NotImplementedError

In [None]:
#|export
#|hide
@patch
def get_query_properties(
    self: MatchFilter, 
    query: Optional[str] = None  # Query input.
) -> Dict:  # Contains the relevant request properties associated with the filter.
    "Abstract method to be implemented that get the relevant request properties associated with the filter."
    raise NotImplementedError

In [None]:
#|export
class AND(MatchFilter):
    def __init__(self) -> None:
        "Filter that match document containing all the query terms."
        super().__init__()

Usage: The `AND` filter is usually used when specifying query models.

In [None]:
and_filter = AND()

In [None]:
#|export
#|hide
@patch
def create_match_filter(
    self: AND, 
    query: str  # Query input.  
) -> str:  # Part of the YQL expression related to the AND filter.
    "Creates part of the YQL expression related to the AND filter"
    return '(userInput("{}"))'.format(query)

In [None]:
#|export
#|hide
@patch
def get_query_properties(
    self: AND, 
    query: Optional[str] = None  # Query input.
) -> Dict:  # Get the relevant request properties associated with the AND filter.
    "Get the relevant request properties associated with the AND filter."
    return {}


In [None]:
#|hide
query = "this is  a test"
test_eq(
    and_filter.create_match_filter(query),
    '(userInput("this is  a test"))',
)
test_eq(and_filter.get_query_properties(query), {})

In [None]:
#|export
class OR(MatchFilter):
    def __init__(self) -> None:
        "Filter that match any document containing at least one query term."
        super().__init__()

Usage: The `OR` filter is usually used when specifying query models.

In [None]:
or_filter = OR()

In [None]:
#|export 
#|hide
@patch
def create_match_filter(
    self: OR, 
    query: str  # Query input.
) -> str:  # Part of the YQL expression related to the OR filter.
    "Creates part of the YQL expression related to the OR filter"    
    return '({{grammar: "any"}}userInput("{}"))'.format(query)

In [None]:
#|export
#|hide
@patch
def get_query_properties(
    self: OR, 
    query: Optional[str] = None  # Query input.
) -> Dict:  # Get the relevant request properties associated with the OR filter.
    "Get the relevant request properties associated with the OR filter."    
    return {}

In [None]:
#|hide
query = "this is  a test"
test_eq(
    or_filter.create_match_filter(query=query),
    '({grammar: "any"}userInput("this is  a test"))',
)
test_eq(or_filter.get_query_properties(query=query), {})

In [None]:
#|export
class WeakAnd(MatchFilter):
    def __init__(
        self, 
        hits: int,  # Lower bound on the number of hits to be retrieved. 
        field: str = "default"  # Which Vespa field to search.
    ) -> None:
        """
        Match documents according to the weakAND algorithm.

        Reference: [https://docs.vespa.ai/en/using-wand-with-vespa.html](https://docs.vespa.ai/en/using-wand-with-vespa.html)
        """
        super().__init__()
        self.hits = hits
        self.field = field

Usage: The `WeakAnd` filter is usually used when specifying query models.

In [None]:
weakand_filter = WeakAnd(hits=10, field="default")

In [None]:
#|export
#|hide
@patch
def create_match_filter(
    self: WeakAnd, 
    query: str  # Query input.
) -> str:  # Part of the YQL expression related to the WeakAnd filter.
    "Creates part of the YQL expression related to the WeakAnd filter"
    query_tokens = query.split(" ")
    terms = ", ".join(
        ['{} contains "{}"'.format(self.field, token) for token in query_tokens]
    )
    return '({{targetHits: {}}}weakAnd({}))'.format(self.hits, terms)


In [None]:
#|export
#|hide
@patch
def get_query_properties(
    self: WeakAnd, 
    query: Optional[str] = None  # Query input.
) -> Dict:  # Get the relevant request properties associated with the WeakAnd filter.
    "Get the relevant request properties associated with the WeakAnd filter."        
    return {}

In [None]:
#|hide
weakand_filter = WeakAnd(hits=10, field="field_name")
query = "this is  a test"
test_eq(
    weakand_filter.create_match_filter(query=query),
    '({targetHits: 10}weakAnd(field_name contains "this", field_name contains "is", field_name contains "", '
    'field_name contains "a", field_name contains "test"))',
)
test_eq(weakand_filter.get_query_properties(query=query), {})

In [None]:
#|export
class ANN(MatchFilter):
    def __init__(
        self,
        doc_vector: str,  # Name of the document field to be used in the distance calculation.
        query_vector: str,  # Name of the query field to be used in the distance calculation.
        hits: int,  # Lower bound on the number of hits to return.
        label: str,  # A label to identify this specific operator instance.
        approximate: bool = True,  # True to use approximate nearest neighbor and False to use brute force. Default to True.
    ) -> None:
        """
        Match documents according to the nearest neighbor operator.

        Reference: [https://docs.vespa.ai/en/reference/query-language-reference.html](https://docs.vespa.ai/en/reference/query-language-reference.html)
        """
        super().__init__()
        self.doc_vector = doc_vector
        self.query_vector = query_vector
        self.hits = hits
        self.label = label
        self.approximate = approximate
        self._approximate = "true" if self.approximate is True else "false"

Usage: The `ANN` filter is usually used when specifying query models.

By default, the `ANN` operator uses approximate nearest neighbor:

In [None]:
match_filter = ANN(
    doc_vector="doc_vector",
    query_vector="query_vector",
    hits=10,
    label="label",
)

Brute-force can be used by specifying `approximate=False`:

In [None]:
ann_filter = ANN(
    doc_vector="doc_vector",
    query_vector="query_vector",
    hits=10,
    label="label",
    approximate=False,
)

In [None]:
#|export
#|hide
@patch
def create_match_filter(
    self: ANN, 
    query: str  # Query input is ignored in the ANN case.
) -> str:  # Part of the YQL expression related to the ANN filter.
    "Creates part of the YQL expression related to the ANN filter"    
    return '({{targetHits: {}, label: "{}", approximate: {}}}nearestNeighbor({}, {}))'.format(
        self.hits, self.label, self._approximate, self.doc_vector, self.query_vector
    )

In [None]:
#|export
#|hide
@patch
def get_query_properties(
    self: ANN, 
    query: Optional[str] = None  # Query input is ignored in the ANN case.
) -> Dict[str, str]:  # Get the relevant request properties associated with the ANN filter.
    "Get the relevant request properties associated with the ANN filter."            
    return {}

In [None]:
#|hide
query = "query string has no effect here"
match_filter = ANN(
    doc_vector="doc_vector",
    query_vector="query_vector",
    hits=10,
    label="label",
)
test_eq(
    match_filter.create_match_filter(query=query),
    '({targetHits: 10, label: "label", approximate: true}nearestNeighbor(doc_vector, query_vector))',
)
test_eq(
    match_filter.get_query_properties(query=query),
    {},
)

In [None]:
#|hide
query = "query string has no effect here"
match_filter = ANN(
    doc_vector="doc_vector",
    query_vector="query_vector",
    hits=10,
    label="label",
    approximate=False,
)
test_eq(
    match_filter.create_match_filter(query=query),
    '({targetHits: 10, label: "label", approximate: false}nearestNeighbor(doc_vector, query_vector))',
)
test_eq(
    match_filter.get_query_properties(query=query),
    {},
)

In [None]:
#|export
class Union(MatchFilter):
    def __init__(
        self, 
        *args: MatchFilter  # Match filters to be taken the union of.
    ) -> None:
        "Match documents that belongs to the union of many match filters."
        super().__init__()
        self.operators = args

Usage: The `Union` filter is usually used when specifying query models.

In [None]:
union_filter = Union(
    WeakAnd(hits=10, field="field_name"),
    ANN(
        doc_vector="doc_vector",
        query_vector="query_vector",
        hits=10,
        label="label",
    ),
)

In [None]:
#|export
#|hide
@patch
def create_match_filter(
    self: Union, 
    query: str  # Query input.
) -> str:  # Part of the YQL expression related to the Union filter.
    "Creates part of the YQL expression related to the Union filter"    
    match_filters = []
    for operator in self.operators:
        match_filter = operator.create_match_filter(query=query)
        if match_filter is not None:
            match_filters.append(match_filter)
    return " or ".join(match_filters)

In [None]:
#|export
#|hide
@patch
def get_query_properties(
    self: Union,  # Query input. 
    query: Optional[str] = None  # Get the relevant request properties associated with the Union filter.
) -> Dict[str, str]:  # Get the relevant request properties associated with the Union filter.
    query_properties = {}
    for operator in self.operators:
        query_properties.update(operator.get_query_properties(query=query))
    return query_properties

In [None]:
#|hide
query = "this is  a test"
match_filter = Union(
    WeakAnd(hits=10, field="field_name"),
    ANN(
        doc_vector="doc_vector",
        query_vector="query_vector",
        hits=10,
        label="label",
    ),
)
test_eq(
    match_filter.create_match_filter(query=query),
    '({targetHits: 10}weakAnd(field_name contains "this", field_name contains "is", '
    'field_name contains "", '
    'field_name contains "a", field_name contains "test")) or '
    '({targetHits: 10, label: "label", approximate: true}nearestNeighbor(doc_vector, query_vector))',
)
test_eq(
    match_filter.get_query_properties(query=query),
    {},
)

## Ranking

In [None]:
#|export
class Ranking(object):
    def __init__(
        self, 
        name: str = "default",  # Name of the rank profile as defined in a Vespa search definition.
        list_features: bool = False  # Should the ranking features be returned. Either 'true' or 'false'.
    ) -> None:
        "Define the rank profile to be used during ranking."
        self.name = name
        self.list_features = "false"
        if list_features:
            self.list_features = "true"

Usage: `Ranking` is usually used when specifying query models.

In [None]:
ranking = Ranking(name="bm25", list_features=True)

In [None]:
#|hide
ranking = Ranking(name="rank_profile", list_features=True)
test_eq(ranking.name, "rank_profile")
test_eq(ranking.list_features, "true")

## Query properties

In [None]:
#|export
class QueryProperty(object):
    def __init__(self) -> None:    
        "Abstract class for query property."
        pass    

In [None]:
#|export
#|hide
@patch
def get_query_properties(
    self: QueryProperty, 
    query: Optional[str] = None  # Query input.
) -> Dict:  # Contains the relevant request properties to be included in the query.
    "Extract query property syntax."
    raise NotImplementedError


In [None]:
#|export
class QueryRankingFeature(QueryProperty):
    def __init__(
        self,
        name: str,  # Name of the feature.
        mapping: Callable[[str], List[float]],  # Function mapping a string to a list of floats.
    ) -> None:
        "Include ranking.feature.query into a Vespa query."
        super().__init__()
        self.name = name
        self.mapping = mapping

Usage: `QueryRankingFeature` is usually used when specifying query models.

In [None]:
query_property = QueryRankingFeature(
    name="query_vector", mapping=lambda x: [1, 2, 3]
)

In [None]:
#|export
#|hide
@patch
def get_query_properties(
    self: QueryRankingFeature, 
    query: Optional[str] = None  # Query input.
) -> Dict[str, str]:  # Contains the relevant request properties to be included in the query.
    value = self.mapping(query)
    return {"ranking.features.query({})".format(self.name): str(value)}

In [None]:
#|hide
query = "this is  a test"
query_property = QueryRankingFeature(
    name="query_vector", mapping=lambda x: [1, 2, 3]
)
test_eq(
    query_property.get_query_properties(query=query),
    {"ranking.features.query(query_vector)": "[1, 2, 3]"},
)

## Query model

In [None]:
#|export
class QueryModel(object):
    def __init__(
        self,
        name: str = "default_name",  # Name of the query model. Used to tag model-related quantities, like evaluation metrics.
        query_properties: Optional[List[QueryProperty]] = None,  # Query properties to be included in the queries.
        match_phase: MatchFilter = AND(),  # Define the match criteria.
        ranking: Ranking = Ranking(),  # Define the rank criteria.
        body_function: Optional[Callable[[str], Dict]] = None,  # Function that take query as parameter and returns the body of a Vespa query.
    ) -> None:
        """
        Define a query model.

        A `QueryModel` is an abstraction that encapsulates all the relevant information
        controlling how a Vespa app matches and ranks documents.
        """
        self.name = name
        self.query_properties = query_properties if query_properties is not None else []
        self.match_phase = match_phase
        self.ranking = ranking
        self.body_function = body_function


Usage:

Specify a query model with default configurations:

In [None]:
query_model = QueryModel()

Specify match phase, ranking phase and properties used by them.

In [None]:
query_model = QueryModel(
    query_properties=[
        QueryRankingFeature(name="query_embedding", mapping=lambda x: [1, 2, 3])
    ],
    match_phase=ANN(
        doc_vector="document_embedding",
        query_vector="query_embedding",
        hits=10,
        label="label",
    ),
    ranking=Ranking(name="bm25_plus_embeddings", list_features=True),
)

Specify a query model based on a function that output Vespa YQL.

In [None]:
def body_function(query):
    body = {
        "yql": "select * from sources * where userQuery();",
        "query": query,
        "type": "any",
        "ranking": {"profile": "bm25", "listFeatures": "true"},
    }
    return body

query_model = QueryModel(body_function=body_function)

In [None]:
#|export
#|hide
@patch
def create_body(
    self: QueryModel, 
    query: str  # Query string.
) -> Dict[str, str]:  # Request body
    "Create the appropriate request body to be sent to Vespa."

    if self.body_function:
        body = self.body_function(query)
        return body

    query_properties = {}
    for query_property in self.query_properties:
        query_properties.update(query_property.get_query_properties(query=query))
    query_properties.update(self.match_phase.get_query_properties(query=query))

    match_filter = self.match_phase.create_match_filter(query=query)

    body = {
        "yql": "select * from sources * where {};".format(match_filter),
        "ranking": {
            "profile": self.ranking.name,
            "listFeatures": self.ranking.list_features,
        },
    }
    body.update(query_properties)
    return body

In [None]:
#|hide
query = "this is  a test"
query_model = QueryModel()
test_eq(
    query_model.create_body(query=query),
    {
        "yql": 'select * from sources * where (userInput("this is  a test"));',
        "ranking": {"profile": "default", "listFeatures": "false"},
    },
)

In [None]:
#|hide
query = "this is  a test"
def body_function(query):
    body = {
        "yql": "select * from sources * where userQuery();",
        "query": query,
        "type": "any",
        "ranking": {"profile": "bm25", "listFeatures": "true"},
    }
    return body

query_model = QueryModel(body_function=body_function)
test_eq(
    query_model.create_body(query=query),
    {
        "yql": "select * from sources * where userQuery();",
        "query": "this is  a test",
        "type": "any",
        "ranking": {"profile": "bm25", "listFeatures": "true"},
    },
)

In [None]:
#|hide
query = "this is  a test"
query_model = QueryModel(
    query_properties=[
        QueryRankingFeature(name="query_vector", mapping=lambda x: [1, 2, 3])
    ],
    match_phase=OR(),
    ranking=Ranking(name="bm25", list_features=True),
)
test_eq(
    query_model.create_body(query=query),
    {
        "yql": 'select * from sources * where ({grammar: "any"}userInput("this is  a test"));',
        "ranking": {"profile": "bm25", "listFeatures": "true"},
        "ranking.features.query(query_vector)": "[1, 2, 3]",
    },
)

In [None]:
#|hide
query = "this is  a test"
query_model = QueryModel(
    query_properties=[
        QueryRankingFeature(name="query_vector", mapping=lambda x: [1, 2, 3])
    ],
    match_phase=ANN(
        doc_vector="doc_vector",
        query_vector="query_vector",
        hits=10,
        label="label",
    ),
    ranking=Ranking(name="bm25", list_features=True),
)
test_eq(
    query_model.create_body(query=query),
    {
        "yql": 'select * from sources * where ({targetHits: 10, label: "label", approximate: true}nearestNeighbor(doc_vector, query_vector));',
        "ranking": {"profile": "bm25", "listFeatures": "true"},
        "ranking.features.query(query_vector)": "[1, 2, 3]",
    },
)

In [None]:
#|hide
nbdev_export()