In [None]:
from qdrant_client import QdrantClient
import openai

We need embedding function to translate the user input query to points for vector DB storage

In [None]:
def get_embedding(text, model="text-embedding-3-small"):
    response = openai.embeddings.create(
        input=text,
        model=model,
    )
    return response.data[0].embedding

In [None]:
qdrant_client = QdrantClient(url="http://localhost:6333")


We first define the retrieval function

In [None]:

def retrieve_data(query, qdrant_client, k=5):
    query_embedding = get_embedding(query)
    results = qdrant_client.query_points(
        collection_name="Amazon-items-collection-00",
        query=query_embedding,
        limit=k,
    )

    retrieved_context_ids = []
    retrieved_context = []
    similarity_scores = []
    retrieved_context_ratings = []
    retrieved_context_prices =[]
    retrieved_context_images = []

    for result in results.points:
        retrieved_context_ids.append(result.payload["parent_asin"])
        retrieved_context.append(result.payload["description"])
        retrieved_context_ratings.append(result.payload["average_rating"])
        similarity_scores.append(result.score)

    return {
        "retrieved_context_ids": retrieved_context_ids,
        "retrieved_context": retrieved_context,
        "retrieved_context_ratings": retrieved_context_ratings,
        "similarity_scores": similarity_scores,
    }

In [None]:
retrieved_context = retrieve_data("What kind of earphones can I get?", qdrant_client, k=10)

In [None]:
retrieved_context

Format the output for LLM (single string)

In [None]:
def process_context(context):

    formatted_context = ""

    for id, chunk, rating in zip(context["retrieved_context_ids"], context["retrieved_context"], context["retrieved_context_ratings"]):
        formatted_context += f"- ID: {id}, rating: {rating}, description: {chunk}\n"

    return formatted_context

In [None]:
preprocessed_context = process_context(retrieved_context)

In [None]:
print(preprocessed_context)

In [None]:


def build_prompt(preprocessed_context, question):

    prompt = f"""
You are a shopping assistant that can answer questions about the products in stock.

You will be given a question and a list of context.

Instructions:
- You need to answer the question based on the provided context only.
- Never use word context and refer to it as the available products.

Context:
{preprocessed_context}

Question:
{question}
"""

    return prompt



In [None]:

prompt = build_prompt(preprocessed_context, "What kind of earphones can I get?")

In [None]:
print(prompt)

Generate answer for the prompt using LLM

In [None]:
def generate_answer(prompt):

    response = openai.chat.completions.create(
        model="gpt-5-nano",
        messages=[{"role": "system", "content": prompt}],
        reasoning_effort="minimal"
    )

    return response.choices[0].message.content


In [None]:
print(generate_answer(prompt))

Combined RAG pipeline

In [None]:
def rag_pipeline(question, top_k=5):

    ## Connect to Vector DB
    qdrant_client = QdrantClient(url="http://localhost:6333")

    ## Retrieve data from Qdrant DB in proper format
    retrieved_context = retrieve_data(question, qdrant_client, top_k)

    ## For LLM, format the retrieved context to a single string with clear demarcation using ":""
    preprocessed_context = process_context(retrieved_context)

    ## Build prompt for LLM with the preprocessed context inline
    prompt = build_prompt(preprocessed_context, question)

    ## Use LLM to generate answer relevant to the context
    answer = generate_answer(prompt)

    return answer

In [None]:
print(rag_pipeline("What kind of earphones can I get with ratings above 4.5?"))
