In [1]:
from haystack.document_stores.in_memory import InMemoryDocumentStore

document_store = InMemoryDocumentStore()


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from haystack import Document 

In [3]:
from pathlib import Path

In [4]:
!ls ../

Dockerfile       [1m[36mdata[m[m             [1m[36mrag_microservice[m[m [31mrun_app.sh[m[m
README.md        [1m[36mnotebooks[m[m        requirements.txt [1m[36mvenv[m[m


In [5]:
doc_content = next(Path("../data").glob("**/*.txt")).read_text()

In [6]:
docs = [Document(content=doc_content)]


In [7]:
docs

[Document(id=741d1c671c90dc09293cd1ec37336e10a2ffabcd6f5726ef45b77feb5e64b470, content: 'Season 1, Episode 1: "Pilot"	Justin Roiland	Dan Harmon & Justin Roiland	December 2, 2013	RAM-001	1.1...')]

In [8]:
from haystack.components.embedders import SentenceTransformersDocumentEmbedder

doc_embedder = SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2")
doc_embedder.warm_up()




In [9]:
ls ~/.cache/huggingface/hub | grep sentence-transformers

[1m[36mmodels--sentence-transformers--all-MiniLM-L6-v2[m[m/


In [10]:
docs_with_embeddings = doc_embedder.run(docs)
document_store.write_documents(docs_with_embeddings["documents"])

Batches: 100%|██████████| 1/1 [00:00<00:00,  2.61it/s]


1

⚠️ Notice that you used sentence-transformers/all-MiniLM-L6-v2 model to create embeddings for your documents before. This is why you need to use the same model to embed the user queries.

In [11]:
from haystack.components.embedders import SentenceTransformersTextEmbedder

text_embedder = SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2")

In [12]:
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever

retriever = InMemoryEmbeddingRetriever(document_store)


In [13]:
from haystack.components.builders import PromptBuilder

template = """
Given the following information, answer the question.

Context:
{% for document in documents %}
    {{ document.content }}
{% endfor %}

Question: {{question}}
Answer:
"""

prompt_builder = PromptBuilder(template=template)

In [14]:
import os
from getpass import getpass
from haystack.components.generators import OpenAIGenerator

if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = getpass("Enter OpenAI API key:")

generator = OpenAIGenerator(model="gpt-4o-mini")

In [15]:
from haystack import Pipeline

basic_rag_pipeline = Pipeline()
# Add components to your pipeline
basic_rag_pipeline.add_component("text_embedder", text_embedder)
basic_rag_pipeline.add_component("retriever", retriever)
basic_rag_pipeline.add_component("prompt_builder", prompt_builder)
basic_rag_pipeline.add_component("llm", generator)

# Now, connect the components to each other
basic_rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
basic_rag_pipeline.connect("retriever", "prompt_builder.documents")
basic_rag_pipeline.connect("prompt_builder", "llm")


<haystack.core.pipeline.pipeline.Pipeline object at 0x34afd8e00>
🚅 Components
  - text_embedder: SentenceTransformersTextEmbedder
  - retriever: InMemoryEmbeddingRetriever
  - prompt_builder: PromptBuilder
  - llm: OpenAIGenerator
🛤️ Connections
  - text_embedder.embedding -> retriever.query_embedding (List[float])
  - retriever.documents -> prompt_builder.documents (List[Document])
  - prompt_builder.prompt -> llm.prompt (str)

In [16]:
question = "What is a Meeseeks Box?"

response = basic_rag_pipeline.run({"text_embedder": {"text": question}, "prompt_builder": {"question": question}})

print(response["llm"]["replies"][0])


Batches: 100%|██████████| 1/1 [00:00<00:00,  6.28it/s]


A Meeseeks Box is a fictional device featured in the show "Rick and Morty." It spawns blue creatures called Meeseeks, which exist to fulfill a single request made by the user. Once they successfully complete their task, they disappear. However, if a Meeseeks is unable to fulfill its request, it becomes increasingly frustrated and desperate, ultimately leading to chaotic and comedic situations. The device highlights the theme of existential purpose and the pressures surrounding the concept of fulfilling one's goals.


In [23]:
question = """
Give me a list of all the episodes about Jessica, in JSON format

e.g.

[
    {
        "season_number": 1,
        "episode_number": 3,
        "episode_name": "...",
        "episode_description": "..."
    },
    ...
]
""".strip()

response = basic_rag_pipeline.run({"text_embedder": {"text": question}, "prompt_builder": {"question": question}})

print(response["llm"]["replies"][0])

Batches: 100%|██████████| 1/1 [00:00<00:00,  2.49it/s]


```json
[
    {
        "season_number": 1,
        "episode_number": 6,
        "episode_name": "Rick Potion #9",
        "episode_description": "Morty is interested in going to the dance at his school with his classmate Jessica, so he asks Rick for a love potion. However, because Jessica has the flu, the potion becomes airborne and goes haywire, causing not only her but the whole population of Earth to fall for Morty save for blood relatives."
    }
]
```


In [24]:
response

{'llm': {'replies': ['```json\n[\n    {\n        "season_number": 1,\n        "episode_number": 6,\n        "episode_name": "Rick Potion #9",\n        "episode_description": "Morty is interested in going to the dance at his school with his classmate Jessica, so he asks Rick for a love potion. However, because Jessica has the flu, the potion becomes airborne and goes haywire, causing not only her but the whole population of Earth to fall for Morty save for blood relatives."\n    }\n]\n```'],
  'meta': [{'model': 'gpt-4o-mini-2024-07-18',
    'index': 0,
    'finish_reason': 'stop',
    'usage': {'completion_tokens': 107,
     'prompt_tokens': 3256,
     'total_tokens': 3363}}]}}