## Basic RAG with Weaviate

In [1]:
from pathlib import Path

def get_chunks_using_markers(src_text: str) -> list[str]:
    """
    Split the source text into chunks using markers.
    """
    marker = "\n##"

    # Split by marker and reconstruct with markers (except first chunk)
    parts = src_text.split(marker)
    chunks = []

    # Add first chunk if it exists and isn't empty
    if parts[0].strip():
        chunks.append(parts[0].strip())

    # Add remaining chunks with markers reattached
    for part in parts[1:]:
        if part.strip():
            chunks.append(marker + part.strip())

    return chunks


md_file_1 = Path("data/parsed/manual_bosch_WGG254Z0GR-parsed-text.md")
md_text_1 = md_file_1.read_text(encoding="utf-8")
chunk_texts = get_chunks_using_markers(md_text_1)

### Set up Weaviate Collection

In [2]:
import weaviate
import os

client = weaviate.connect_to_embedded(
    version="1.32.0",
    headers={
        "X-Cohere-Api-Key": os.getenv("COHERE_API_KEY"),
    },
    environment_variables={"LOG_LEVEL": "error"}  # Reduce amount of logs
)

In [3]:
client.collections.delete("Chunks")

In [4]:
from weaviate.classes.config import Property, DataType, Configure, Tokenization

client.collections.create(
    name="Chunks",
    properties=[
        Property(
            name="document_title",
            data_type=DataType.TEXT,
        ),
        Property(
            name="chunk",
            data_type=DataType.TEXT,
        ),
        Property(
            name="chunk_number",
            data_type=DataType.INT,
        ),
        Property(
            name="filename",
            data_type=DataType.TEXT,
            tokenization=Tokenization.FIELD
        ),
    ],
    vector_config=[
        Configure.Vectors.text2vec_cohere(
            name="default",
            source_properties=["document_title", "chunk"],
            model="embed-v4.0"
        )
    ]
)

<weaviate.collections.collection.sync.Collection at 0x120e6ba30>

In [5]:
chunks = client.collections.get("Chunks")

### Import data

In [6]:
from tqdm import tqdm
from weaviate.util import generate_uuid5

with chunks.batch.fixed_size(batch_size=100) as batch:
    for i, chunk_text in tqdm(enumerate(chunk_texts)):
        obj = {
            "document_title": "Bosch WGG254Z0GR Manual",
            "filename": "data/pdfs/manual_bosch_WGG254Z0GR.pdf",
            "chunk": chunk_text,
            "chunk_number": i + 1,
        }

        # Add object to batch for import
        batch.add_object(
            properties=obj,
            uuid=generate_uuid5(obj),
        )

127it [00:00, 41206.51it/s]


### RAG queries



In [8]:
response = chunks.generate.hybrid(
    query="how to clean the washing machine",
    limit=10,
    grouped_task="Briefly, what tasks do I need to perform to regularly maintain and clean the washing machine?"
)

print("Query response:")
print(response.generative.text)

Query response:
To regularly maintain and clean the washing machine, you need to perform the following tasks:

1. Clean the detergent drawer by pulling it out, removing the insert, cleaning it with water and a brush, and then drying it before fitting it back in place.
2. Start an empty washing cycle to remove any residual water by pouring water and washing powder into the detergent drawer and running the program.
3. Regularly run a program for cleaning the drum or wash at temperatures of at least 60°C to prevent damage and injury.
4. Prepare your laundry properly by brushing off sand and soil, sorting laundry according to color and textiles, and following care labels.
5. Rinse pretreated laundry thoroughly with water before washing to avoid the risk of explosion.
6. Use the washing machine only for machine-washable fabrics, tap water, and suitable detergents in a domestic environment.
7. Follow the recommended programs for different types of laundry and load sizes to ensure proper wash

![assets/llm_3_rag_weaviate.png](assets/llm_3_rag_weaviate.png)

In [None]:
import textwrap

print("Supporting passages:")
for i, o in enumerate(response.objects):
    print(textwrap.indent(f"\n> Result {i + 1}:", "  "))
    wrapped_text = textwrap.fill(o.properties['chunk'][:200]+"...", width=80)
    print(textwrap.indent(wrapped_text, "    "))

Supporting passages:

  > Result 1:
     ##17.2 Cleaning the detergent drawer  1. Pull out the detergent drawer. 2.
    Press down on the insert and remove the detergent drawer.  <!-- image -->  - en
    Cleaning and servicing 3. Pull out the inser...

  > Result 2:
     ##6.1 Starting an empty washing cycle  Your appliance was inspected thoroughly
    before leaving the factory. To remove any residual water, run the first wash
    cycle without any laundry.  1. Turn the pro...

  > Result 3:
     ##Risk of injury!  Permanently washing at low temperatures and a lack of
    ventilation for the appliance may damage the drum and cause injury.  - Regularly
    run a programme for cleaning the drum or wash...

  > Result 4:
     ##Note  The appliance and fabrics are protected when you prepare your laundry.
    - ¡ Brush off sand and soil - ¡ Sort the laundry according to colour and
    textiles and observe the care labels - ¡ Tie c...

  > Result 5:
    agents that contain solvents may caus