In [1]:
%config Completer.use_jedi = False

# Feed data to Vespa applications

> Synchronous and asynchronous feeding 


In [2]:
# this is a hidden cell. It will not show on the documentation HTML.
import os
from vespa.package import VespaDocker
from vespa.gallery import QuestionAnswering

app_package = QuestionAnswering()

os.environ["WORK_DIR"] = "/Users/tmartins"
disk_folder = os.path.join(os.getenv("WORK_DIR"), "sample_application")
vespa_docker = VespaDocker(
    port=8081, 
    container_memory="8G", 
    disk_folder=disk_folder # requires absolute path
)
app = vespa_docker.deploy(application_package=app_package)

Waiting for configuration server.
Waiting for configuration server.
Waiting for configuration server.
Waiting for configuration server.
Waiting for configuration server.
Waiting for configuration server.
Waiting for configuration server.
Waiting for configuration server.
Waiting for application status.
Waiting for application status.
Finished deployment.


In [3]:
import json, requests

sentence_data = json.loads(
    requests.get("https://data.vespa.oath.cloud/blog/qa/sample_sentence_data.json").text
)
list(sentence_data[0].keys())

['text', 'dataset', 'questions', 'context_id', 'sentence_embedding']

## Batch feeding

In [4]:
batch_feed = [
    {
        "id": idx, 
        "fields": sentence
    }
    for idx, sentence in enumerate(sentence_data)
]

In [5]:
response = app.feed_batch(schema="sentence", batch=batch_feed)

In [6]:
response = await app.feed_batch(schema="sentence", batch=batch_feed, asynchronous=True)

<div class="alert alert-info">

**Note**: The **await** keyword above is required when batch feeding asynchronously from jupyter notebooks, because it already has its own async event loop running in the background. You can skip the **await** keyword when using it on an environment with no event loop running and pyvespa will take care of the rest.

</div>

## Feed individual data points

In [7]:
response = []
for idx, sentence in enumerate(sentence_data):
    response.append(
        app.feed_data_point(schema="sentence", data_id=idx, fields=sentence)
    )

In [8]:
from asyncio import create_task, wait, ALL_COMPLETED

async with app.asyncio() as async_app:
    feed = []
    for idx, sentence in enumerate(sentence_data):
        feed.append(
            create_task(
                async_app.feed_data_point(
                    schema="sentence",
                    data_id=idx,
                    fields=sentence,
                )
            )
        )
    await wait(feed, return_when=ALL_COMPLETED)
    response = [x.result() for x in feed]

<div class="alert alert-info">

**Note**: The code above run as is from a jupyter notebook bacause it already has its own async event loop running in the background. You need to start and use your own event loop when running this code on an environment without one, just like any asyncio code requires.
</div>

In [9]:
# this is a hidden cell. It will not show on the documentation HTML.
from shutil import rmtree

rmtree(disk_folder, ignore_errors=True)
vespa_docker.container.stop()
vespa_docker.container.remove()