In [None]:
# hide
%load_ext autoreload
%autoreload 2

# Query API

> Python query API

We can connect to the CORD-19 Search app and use it to exemplify the query API

In [None]:
from vespa.application import Vespa

app = Vespa(url = "https://api.cord19.vespa.ai")

## Specify the request body

> Full flexibility by specifying the entire request body

In [None]:
body = {
  'yql': 'select title, abstract from sources * where userQuery();',
  'hits': 5,
  'query': 'Is remdesivir an effective treatment for COVID-19?',
  'type': 'any',
  'ranking': 'bm25'
}

In [None]:
results = app.query(body=body)

In [None]:
results.number_documents_retrieved

108882

## Specify a query model

### Query + term-matching + rank profile

In [None]:
from vespa.query import Query, OR, RankProfile

results = app.query(
    query="Is remdesivir an effective treatment for COVID-19?", 
    query_model = Query(
        match_phase=OR(), 
        rank_profile=RankProfile(name="bm25")
    )
)

In [None]:
results.number_documents_retrieved

108882

### Query + term-matching + ann operator + rank_profile

In [None]:
from vespa.query import Query, ANN, WeakAnd, Union, RankProfile
from random import random

match_phase = Union(
    WeakAnd(hits = 10), 
    ANN(
        doc_vector="title_embedding", 
        query_vector="title_vector", 
        embedding_model=lambda x: [random() for x in range(768)],
        hits = 10,
        label="title"
    )
)
rank_profile = RankProfile(name="bm25", list_features=True)
query_model = Query(match_phase=match_phase, rank_profile=rank_profile)

In [None]:
results = app.query(query="Is remdesivir an effective treatment for COVID-19?", 
          query_model=query_model)

In [None]:
results.number_documents_retrieved

947

## Recall specific documents

Let's take a look at the top 3 ids from the last query.

In [None]:
top_ids = [hit["fields"]["id"] for hit in results.hits[0:3]]
top_ids

[117166, 60125, 28903]

Assume that we now want to retrieve the second and third ids above. We can do so with the `recall` argument.

In [None]:
results_with_recall = app.query(query="Is remdesivir an effective treatment for COVID-19?", 
                    query_model=query_model,
                    recall = ("id", top_ids[1:3]))

It will only retrieve the documents with Vespa field `id` that is defined on the list that is inside the tuple.

In [None]:
id_recalled = [hit["fields"]["id"] for hit in results_with_recall.hits]
id_recalled

[60125, 28903]

In [None]:
#hide
from fastcore.test import all_equal, test

test(id_recalled, top_ids[1:3],  all_equal)