Based on https://haystack.deepset.ai/tutorials/01_basic_qa_pipeline

# Set up Document Store

In [2]:
import os
from haystack.document_stores import ElasticsearchDocumentStore

host = os.environ.get("ELASTICSEARCH_HOST", "elasticsearch")
document_store = ElasticsearchDocumentStore(
    host=host, username="", password="", index="document"
)


  return response.status, response.getheaders(), raw_data


# Preprocess Documents

In [4]:
from haystack.utils import (
    clean_wiki_text,
    convert_files_to_docs,
    fetch_archive_from_http,
)


# Let's first fetch some documents that we want to query
# Here: 517 Wikipedia articles for Game of Thrones
doc_dir = "data/tutorial1"
s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/wiki_gameofthrones_txt1.zip"
fetch_archive_from_http(url=s3_url, output_dir=doc_dir)

docs = convert_files_to_docs(
    dir_path=doc_dir, clean_func=clean_wiki_text, split_paragraphs=True
)

document_store.write_documents(docs)


  return response.status, response.getheaders(), raw_data


# Initialize Retriever, Reader and Pipeline

Retriever:

In [5]:
from haystack.nodes import BM25Retriever

retriever = BM25Retriever(document_store=document_store)


Reader:

In [6]:
from haystack.nodes import TransformersReader

reader = TransformersReader(
    model_name_or_path="distilbert-base-uncased-distilled-squad",
    tokenizer="distilbert-base-uncased",
    use_gpu=-1,
)


Downloading: 100%|██████████| 451/451 [00:00<00:00, 370kB/s]
Downloading: 100%|██████████| 265M/265M [00:08<00:00, 32.0MB/s] 
Downloading: 100%|██████████| 28.0/28.0 [00:00<00:00, 20.5kB/s]
Downloading: 100%|██████████| 483/483 [00:00<00:00, 449kB/s]
Downloading: 100%|██████████| 232k/232k [00:00<00:00, 25.6MB/s]
Downloading: 100%|██████████| 466k/466k [00:00<00:00, 31.4MB/s]


Pipeline:

In [9]:
from haystack.pipelines import ExtractiveQAPipeline

pipe = ExtractiveQAPipeline(reader, retriever)


# Ask a question

In [14]:
from haystack.utils import print_answers

prediction = pipe.run(
    query="Who is the father of Arya Stark?",
    params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}},
)


print_answers(prediction, details="minimum")



Query: Who is the father of Arya Stark?
Answers:
[   {   'answer': 'Eddard',
        'context': 'Nymeria after a legendary warrior queen. She travels with '
                   "her father, Eddard, to King's Landing when he is made Hand "
                   'of the King. Before she leave'},
    {   'answer': 'Arry',
        'context': 'tch recruits, including Arya Stark (disguised as an orphan '
                   "boy named 'Arry), Lommy Greenhands, Hot Pie and Jaqen "
                   "H'ghar. During their journey, t"},
    {   'answer': 'Black Walder',
        'context': 'med “Lame Lothar” because of his twisted leg. He and his '
                   'half-brother Black Walder are sent by their father to '
                   'Riverrun to propose a marriage between Lo'},
    {   'answer': 'Ned',
        'context': '\n'
                   '====Season 1====\n'
                   'Arya accompanies her father Ned and her sister Sansa to '
                   "King's Landing. Before their 