### Import dependencies

In [45]:
import openai
import instructor
from qdrant_client import QdrantClient

from pydantic import BaseModel, Field


### RAG pipeline

In [46]:
client = instructor.from_openai(openai.OpenAI())

In [47]:
class RAGGenerationResponse(BaseModel):
    answer: str = Field(description="The answer to the question")

In [49]:
def get_embedding(text, model="text-embedding-3-small"):
    response = openai.embeddings.create(
        input=text,
        model=model,
    )

    return response.data[0].embedding


def retrieve_data(query, qdrant_client, k=5):

    query_embedding = get_embedding(query)

    results = qdrant_client.query_points(
        collection_name="Amazon-items-collection-00",
        query=query_embedding,
        limit=k,
    )

    retrieved_context_ids = []
    retrieved_context = []
    similarity_scores = []
    retrieved_context_ratings = []

    for result in results.points:
        retrieved_context_ids.append(result.payload["parent_asin"])
        retrieved_context.append(result.payload["description"])
        retrieved_context_ratings.append(result.payload["average_rating"])
        similarity_scores.append(result.score)

    return {
        "retrieved_context_ids": retrieved_context_ids,
        "retrieved_context": retrieved_context,
        "retrieved_context_ratings": retrieved_context_ratings,
        "similarity_scores": similarity_scores,
    }


def process_context(context):

    formatted_context = ""

    for id, chunk, rating in zip(context["retrieved_context_ids"], context["retrieved_context"], context["retrieved_context_ratings"]):
        formatted_context += f"- ID: {id}, rating: {rating}, description: {chunk}\n"

    return formatted_context



def build_prompt(preprocessed_context, question):

    prompt = f"""
You are a shopping assistant that can answer questions about the products in stock.

You will be given a question and a list of context.

Instructtions:
- You need to answer the question based on the provided context only.
- Never use word context and refer to it as the available products.
- As an output you need to provide:

* The answer to the question based on the provided context.
* The list of the IDs of the chunks that were used to answer the question. Only return the ones that are used in the answer.
* Short description (1-2 sentences) of the item based on the description provided in the context.

- The short description should have the name of the item.
- The answer to the question should contain detailed information about the product and returned with detailed specification in bullet points.

Context:
{preprocessed_context}

Question:
{question}
"""

    return prompt


def generate_answer(prompt):

    response, raw_response = client.chat.completions.create_with_completion(
        model="gpt-4.1-mini",
        messages=[{"role": "system", "content": prompt}],
        temperature=0,
        response_model=RAGGenerationResponse
    )

    return response


def rag_pipeline(question, qdrant_client, top_k=5):

    retrieved_context = retrieve_data(question, qdrant_client, top_k)
    preprocessed_context = process_context(retrieved_context)
    prompt = build_prompt(preprocessed_context, question)
    answer = generate_answer(prompt)

    final_result = {
        "datamodel": answer,
        "answer": answer.answer,
        "question": question,
        "retrieved_context_ids": retrieved_context["retrieved_context_ids"],
        "retrieved_context": retrieved_context["retrieved_context"],
        "similarity_scores": retrieved_context["similarity_scores"]
    }

    return final_result


In [50]:
qdrant_client = QdrantClient(url="http://localhost:6333")

In [51]:
output = rag_pipeline("Can I get a charging cable? Please suggest me a good one.", qdrant_client)

In [52]:
output

{'datamodel': RAGGenerationResponse(answer='A good charging cable option available is the GREPHONE 2 Pack USB C to Lightning Cable, 6 FT, which is Apple MFi Certified and offers fast charging and data transfer. Here are the detailed specifications:\n\n- Length: 6 feet, providing convenience to use devices while charging.\n- Charging Speed: Supports PD Fast Charge 3A/30W (max) for compatible iPhones and iPads.\n- Compatibility: Works with iPhone 13/12/11 series, iPad Pro, iPad Air, iPad mini, and more.\n- Durability: Made with reinforced material, tested for softness and toughness, with 3D aluminum joint and laser welding technology to prevent breakage.\n- Certification: Apple MFi Certified with built-in overvoltage protection for safe charging.\n- Package: Comes in a pack of 2 cables.\n\nThis cable is suitable for fast and safe charging of Apple devices and offers a good length for comfortable use.\n\nShort description: The GREPHONE 2 Pack USB C to Lightning Cable is a 6-foot long, App

In [53]:
print(output["answer"])

A good charging cable option available is the GREPHONE 2 Pack USB C to Lightning Cable, 6 FT, which is Apple MFi Certified and offers fast charging and data transfer. Here are the detailed specifications:

- Length: 6 feet, providing convenience to use devices while charging.
- Charging Speed: Supports PD Fast Charge 3A/30W (max) for compatible iPhones and iPads.
- Compatibility: Works with iPhone 13/12/11 series, iPad Pro, iPad Air, iPad mini, and more.
- Durability: Made with reinforced material, tested for softness and toughness, with 3D aluminum joint and laser welding technology to prevent breakage.
- Certification: Apple MFi Certified with built-in overvoltage protection for safe charging.
- Package: Comes in a pack of 2 cables.

This cable is suitable for fast and safe charging of Apple devices and offers a good length for comfortable use.

Short description: The GREPHONE 2 Pack USB C to Lightning Cable is a 6-foot long, Apple MFi Certified fast charging cable designed for iPhon

### RAG pipeline with grounding context

In [37]:
class RAGUsedContext(BaseModel):
    id: str = Field(description="The ID of the item used to answer the question")
    description: str = Field(description="Short description of the item used to answer the question")

class RAGGenerationResponse(BaseModel):
    answer: str = Field(description="The answer to the question")
    references: list[RAGUsedContext] = Field(description="List of items used to answer the question")


In [None]:
def get_embedding(text, model="text-embedding-3-small"):
    response = openai.embeddings.create(
        input=text,
        model=model,
    )

    return response.data[0].embedding


def retrieve_data(query, qdrant_client, k=5):

    query_embedding = get_embedding(query)

    results = qdrant_client.query_points(
        collection_name="Amazon-items-collection-00",
        query=query_embedding,
        limit=k,
    )

    retrieved_context_ids = []
    retrieved_context = []
    similarity_scores = []
    retrieved_context_ratings = []

    for result in results.points:
        retrieved_context_ids.append(result.payload["parent_asin"])
        retrieved_context.append(result.payload["description"])
        retrieved_context_ratings.append(result.payload["average_rating"])
        similarity_scores.append(result.score)

    return {
        "retrieved_context_ids": retrieved_context_ids,
        "retrieved_context": retrieved_context,
        "retrieved_context_ratings": retrieved_context_ratings,
        "similarity_scores": similarity_scores,
    }


def process_context(context):

    formatted_context = ""

    for id, chunk, rating in zip(context["retrieved_context_ids"], context["retrieved_context"], context["retrieved_context_ratings"]):
        formatted_context += f"- ID: {id}, rating: {rating}, description: {chunk}\n"

    return formatted_context


def build_prompt(preprocessed_context, question):

    prompt = f"""
You are a shopping assistant that can answer questions about the products in stock.

You will be given a question and a list of context.

Instructions:
- You need to answer the question based on the provided context only.
- Never use word context and refer to it as the available products.
- As an output you need to provide:

* The answer to the question based on the provided context.
* The list of the IDs of the chunks that were used to answer the question. Only return the ones that are used in the answer.
* Short description (1-2 sentences) of the item based on the description provided in the context.

- The short description should have the name of the item.
- The answer to the question should contain detailed information about the product and returned with detailed specification in bullet points.

Context:
{preprocessed_context}

Question:
{question}
"""

    return prompt


def generate_answer(prompt):

    response, raw_response = client.chat.completions.create_with_completion(
        model="gpt-4.1-mini",
        messages=[{"role": "system", "content": prompt}],
        temperature=0,
        response_model=RAGGenerationResponse
    )

    return response


def rag_pipeline(question, qdrant_client, top_k=5):

    retrieved_context = retrieve_data(question, qdrant_client, top_k)
    preprocessed_context = process_context(retrieved_context)
    prompt = build_prompt(preprocessed_context, question)
    answer = generate_answer(prompt)

    final_result = {
        "original_ouput": answer,
        "answer": answer.answer,
        "references": answer.references,
        "question": question,
        "retrieved_context_ids": retrieved_context["retrieved_context_ids"],
        "retrieved_context": retrieved_context["retrieved_context"],
        "similarity_scores": retrieved_context["similarity_scores"]
    }

    return final_result

In [54]:
result = rag_pipeline("Can I get some earphones?", qdrant_client, top_k=10)

In [43]:
result

{'original_ouput': RAGGenerationResponse(answer='Yes, you can get several types of earphones from the available products. Here are some options with detailed specifications:\n\n1. Wireless Earbuds Sport S23-vine (ID: B0B9FTVL58)\n- Bluetooth 5.3\n- 37 hours playback time\n- LED power display\n- In-ear design with deep bass\n- IPX7 waterproof\n- Ultra-light with charging case\n- Smart touch controls\n\n2. TUNEAKE Kids Headphones (ID: B0C142QS8X)\n- Over-ear design\n- Volume limited to 94dB for hearing protection\n- Foldable and adjustable headband\n- 3.5mm jack wired connection\n- Compatible with various devices\n- No microphone\n\n3. TELSOR Wireless Earbuds (ID: B0C6K1GQCF)\n- Bluetooth 5.1\n- 30 hours playtime with charging case\n- IPX7 waterproof\n- Noise cancelling microphone\n- Touch control\n- Compatible with iPhone, Android, and other Bluetooth devices\n\n4. Open Ear Headphones (ID: B0CBMPG524)\n- Bluetooth 5.3\n- 60 hours playtime with charging case\n- IPX7 waterproof\n- Open-ea

In [55]:

print(result["answer"])

Yes, you can get several types of earphones from the available products. Here are some options with detailed specifications:

1. Wireless Earbuds Sport S23-vine (ID: B0B9FTVL58)
- Bluetooth 5.3
- 37 hours playback time
- LED power display
- In-ear design with deep bass
- IPX7 waterproof
- Ultra-light with charging case
- Smart touch controls

2. TUNEAKE Kids Headphones (ID: B0C142QS8X)
- Over-ear design
- Volume limited to 94dB for hearing protection
- Foldable and adjustable headband
- 3.5mm jack wired connection
- No microphone
- Comfortable padded headband and soft ear cups

3. TELSOR Wireless Earbuds (ID: B0C6K1GQCF)
- Bluetooth 5.1
- 30 hours playtime with charging case
- IPX7 waterproof
- Noise cancelling microphone
- Touch control for volume, calls, and voice assistant

4. Open Ear Headphones (ID: B0CBMPG524)
- Bluetooth 5.3
- 60 hours playtime with charging case
- IPX7 waterproof
- Open-ear design for awareness of surroundings
- Comfortable silicone ear hooks

5. Siniffo Bone C