Import Dependencies

In [2]:

import openai

## Instructor is to enable structured imports rather than we putting json schema and asking LLM to fill it.
import instructor
from qdrant_client import QdrantClient

from pydantic import BaseModel, Field

Mock Example

In [21]:
prompt = """
You are a helpful assistant.
Return an answer to the question.
Question: What is your name? How do u differ from Gemini model?
"""

In [4]:
openai_response = openai.chat.completions.create(
    model="gpt-4.1-mini",
    messages=[
        {"role": "system", "content": prompt}
    ],
    temperature=0
)

print(openai_response.choices[0].message.content)

I am ChatGPT, your AI assistant. How can I help you today?


Structured outputs

In [7]:
## Wraps a client of LLM provider and returns its own client with the LLM object, Hereafter we can use this client instead of OpenAI client
client = instructor.from_openai(openai.OpenAI())

Add a Pythandic schema object (to replace JSON schema we did for evaluation dataset - ipynb)

In [8]:
class RAGGenerationResponse(BaseModel):
    answer: str = Field(description="The answer to the question")

In [None]:
## client.chat.completions.create => only gives the response. 'with_completion' will give both original response and pydantic
## schema response
response, raw_response = client.chat.completions.create_with_completion(
    model="gpt-4.1-mini",
    messages=[
        {"role": "system", "content": prompt}
    ],
    temperature=0,
    response_model=RAGGenerationResponse
)


In [11]:
raw_response

ChatCompletion(id='chatcmpl-D28OxbeHAJTlspOVc3LxcXt0RouUt', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=[ChatCompletionMessageFunctionToolCall(id='call_KULYZ5iGJu2dpfon2E9jVhI3', function=Function(arguments='{"answer":"My name is ChatGPT. How can I assist you today?"}', name='RAGGenerationResponse'), type='function')]))], created=1769401323, model='gpt-4.1-mini-2025-04-14', object='chat.completion', service_tier='default', system_fingerprint='fp_376a7ccef1', usage=CompletionUsage(completion_tokens=17, prompt_tokens=95, total_tokens=112, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=None, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=None), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))

In [10]:
response

RAGGenerationResponse(answer='My name is ChatGPT. How can I assist you today?')

In [None]:
## This itself is a context prompt engineering. The description of the fields are injected.
class RAGGenerationResponse(BaseModel):
    answer: str = Field(description="The answer to the question")
    reasoning: str = Field(description="The reasoning for the answer")

In [22]:
response, raw_response = client.chat.completions.create_with_completion(
    model="gpt-4.1-mini",
    messages=[
        {"role": "system", "content": prompt}
    ],
    temperature=0,
    response_model=RAGGenerationResponse
)

In [23]:
response

RAGGenerationResponse(answer="My name is ChatGPT. I differ from the Gemini model primarily in my architecture, training data, and capabilities. While Gemini is a model developed by Google with its own unique design and features, I am based on OpenAI's GPT architecture, trained on a diverse range of internet text up to my knowledge cutoff in June 2024. This results in differences in how we generate responses, the style of interaction, and the specific strengths each model has in understanding and generating language.", reasoning='The question asks for my name and how I differ from the Gemini model. I provided my name as ChatGPT and explained the general differences based on architecture, training, and capabilities, which are the main factors that distinguish AI language models from each other.')

RAG Example

In [40]:
class RAGGenerationResponse(BaseModel):
    answer: str = Field(description="The answer to the question")

In [41]:
def get_embedding(text, model="text-embedding-3-small"):
    response = openai.embeddings.create(
        input=text,
        model=model,
    )

    return response.data[0].embedding


def retrieve_data(query, qdrant_client, k=5):

    query_embedding = get_embedding(query)

    results = qdrant_client.query_points(
        collection_name="Amazon-items-collection-00",
        query=query_embedding,
        limit=k,
    )

    retrieved_context_ids = []
    retrieved_context = []
    similarity_scores = []
    retrieved_context_ratings = []

    for result in results.points:
        retrieved_context_ids.append(result.payload["parent_asin"])
        retrieved_context.append(result.payload["description"])
        retrieved_context_ratings.append(result.payload["average_rating"])
        similarity_scores.append(result.score)

    return {
        "retrieved_context_ids": retrieved_context_ids,
        "retrieved_context": retrieved_context,
        "retrieved_context_ratings": retrieved_context_ratings,
        "similarity_scores": similarity_scores,
    }


def process_context(context):

    formatted_context = ""

    for id, chunk, rating in zip(context["retrieved_context_ids"], context["retrieved_context"], context["retrieved_context_ratings"]):
        formatted_context += f"- ID: {id}, rating: {rating}, description: {chunk}\n"

    return formatted_context


def build_prompt(preprocessed_context, question):

    prompt = f"""
You are a shopping assistant that can answer questions about the products in stock.

You will be given a question and a list of context.

Instructtions:
- You need to answer the question based on the provided context only.
- Never use word context and refer to it as the available products.

Context:
{preprocessed_context}

Question:
{question}
"""

    return prompt


## This changes, see the instructor response model is added here (Pydantic inheritance from BaseModel)
def generate_answer(prompt):

    response, raw_response = client.chat.completions.create_with_completion(
        model="gpt-4.1-mini",
        messages=[{"role": "system", "content": prompt}],
        temperature=0,
        response_model=RAGGenerationResponse
    )

    return response, raw_response


def rag_pipeline(question, qdrant_client, top_k=5):

    retrieved_context = retrieve_data(question, qdrant_client, top_k)
    preprocessed_context = process_context(retrieved_context)
    prompt = build_prompt(preprocessed_context, question)
    answer, raw_response = generate_answer(prompt)

    final_result = {
        ## The RAGGenerationResponse data model
        "datamodel": answer,
        ## Actual answer
        "answer": answer.answer,
        "rawresponse": raw_response,
        "question": question,
        "retrieved_context_ids": retrieved_context["retrieved_context_ids"],
        "retrieved_context": retrieved_context["retrieved_context"],
        "similarity_scores": retrieved_context["similarity_scores"]
    }

    return final_result

In [42]:
qdrant_client = QdrantClient(url="http://localhost:6333")


In [43]:
output = rag_pipeline("Can I get a charging cable? Please suggest me a good one.", qdrant_client)

In [44]:
output

{'datamodel': RAGGenerationResponse(answer='Based on the available products, here are some good charging cable options:\n\n1. GREPHONE 2 Pack USB C to Lightning Cable, 6 FT, MFi Certified - This cable supports fast charging (PD Fast Charge 3A/30W max), is durable with reinforced material, and is compatible with a wide range of iPhone and iPad models. It comes in a pack of 2 and has a 12-month service warranty.\n\n2. iPhone Charger Cord Lightning Cables, Original 2022 Upgraded [3Pack 3ft] - These Apple MFi Certified cables offer super fast charging (3.0A) and high-speed data transfer. They are durable with reinforced joints and come in a pack of 3.\n\n3. MUXA 6 Pack Colorful Nylon Lightning Cable - These MFi Certified cables are durable, support fast charging and data transfer, and come in multiple lengths (3/3/6/6/10/10 FT) for convenience.\n\n4. 5 in 1 USB C to Multi Charging Cable 3M/10Ft - A versatile multi-charging cable that supports Lightning, USB C, and Micro USB connectors, all

In [32]:
output["datamodel"]

RAGGenerationResponse(answer="Based on the available products, I suggest the following good charging cables:\n\n1. GREPHONE 2 Pack USB C to Lightning Cable, 6 FT, MFi Certified, fast charging, durable with reinforced material and aluminum joints. Compatible with iPhone 13/12/11 and iPad models. Comes with 12-month service.\n\n2. iPhone Charger Cord Lightning Cables, Original 2022 Upgraded, 3 Pack 3ft, Apple MFi Certified, super fast charge 3.0A, durable with reinforced joint design, compatible with a wide range of iPhones and iPads.\n\n3. MUXA 6 Pack Apple MFi Certified Nylon Lightning Cable, various lengths including 3, 6, and 10 FT, durable with 12,000+ bend cycles, compatible with many iPhone and iPad models.\n\nIf you want a multi-device charging cable, there's also the 5 in 1 USB C to Multi Charging Cable 3M/10Ft which can charge three devices simultaneously but is only for charging, not data sync.\n\nPlease let me know if you want a specific type or length, and I can help narrow 

In [33]:
output["answer"]

"Based on the available products, I suggest the following good charging cables:\n\n1. GREPHONE 2 Pack USB C to Lightning Cable, 6 FT, MFi Certified, fast charging, durable with reinforced material and aluminum joints. Compatible with iPhone 13/12/11 and iPad models. Comes with 12-month service.\n\n2. iPhone Charger Cord Lightning Cables, Original 2022 Upgraded, 3 Pack 3ft, Apple MFi Certified, super fast charge 3.0A, durable with reinforced joint design, compatible with a wide range of iPhones and iPads.\n\n3. MUXA 6 Pack Apple MFi Certified Nylon Lightning Cable, various lengths including 3, 6, and 10 FT, durable with 12,000+ bend cycles, compatible with many iPhone and iPad models.\n\nIf you want a multi-device charging cable, there's also the 5 in 1 USB C to Multi Charging Cable 3M/10Ft which can charge three devices simultaneously but is only for charging, not data sync.\n\nPlease let me know if you want a specific type or length, and I can help narrow down the choice."

In [45]:
output["rawresponse"]

ChatCompletion(id='chatcmpl-D28lfAQiDdzN4voZt1JIKkVNZ434d', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=[ChatCompletionMessageFunctionToolCall(id='call_iKMkz1fvQqJlIJAN20krMdKy', function=Function(arguments='{"answer":"Based on the available products, here are some good charging cable options:\\n\\n1. GREPHONE 2 Pack USB C to Lightning Cable, 6 FT, MFi Certified - This cable supports fast charging (PD Fast Charge 3A/30W max), is durable with reinforced material, and is compatible with a wide range of iPhone and iPad models. It comes in a pack of 2 and has a 12-month service warranty.\\n\\n2. iPhone Charger Cord Lightning Cables, Original 2022 Upgraded [3Pack 3ft] - These Apple MFi Certified cables offer super fast charging (3.0A) and high-speed data transfer. They are durable with reinforced joints and come in a pack of 3.\\n\\n3. MUXA