In [12]:
import openai
from qdrant_client import QdrantClient

from langsmith import Client
from qdrant_client import QdrantClient

from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings

from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

Download an example reference data point from LangSmith


In [13]:
client = Client()
dataset = client.read_dataset(
    dataset_name="rag-evaluation-dataset"
)


dataset



Dataset(name='rag-evaluation-dataset', description='Dataset for evaluating RAG pipeline', data_type=<DataType.kv: 'kv'>, id=UUID('85cdc74d-dbfb-43ff-b925-92c22886cf4e'), created_at=datetime.datetime(2026, 1, 19, 14, 54, 44, 402898, tzinfo=TzInfo(0)), modified_at=datetime.datetime(2026, 1, 19, 14, 54, 44, 402898, tzinfo=TzInfo(0)), example_count=50, session_count=0, last_session_start_time=None, inputs_schema=None, outputs_schema=None, transformations=None, metadata={'runtime': {'sdk': 'langsmith-py', 'library': 'langsmith', 'runtime': 'python', 'platform': 'macOS-15.1-arm64-arm-64bit', 'sdk_version': '0.6.2', 'runtime_version': '3.12.12', 'langchain_version': '1.2.3', 'py_implementation': 'CPython', 'langchain_core_version': '1.2.7'}})

In [35]:
list(client.list_examples(dataset_id=dataset.id, limit=10))[1].outputs

{'ground_truth': 'Yes — it is a tinted protector that preserves keyboard backlight visibility and has an industry-leading thickness of 0.12 mm.',
 'reference_context_ids': ['B09LVX3XW2'],
 'reference_descriptions': ['UPPERCASE GhostCover® Premium Ultra Thin Keyboard Cover Protector, for 2021 2022 2023 M1/M2 Pro/Max MacBook Pro 14" 16" (A2442 A2485 A2779 A2780), MacBook Air 13" M2 (A2681), US (ANSI) Layout, Tinted New! Industry\'s first tinted protector with high transparency to preserve the elegance of your MacBook, and allows keyboard backlight to shine through. Designed for 2021+ Apple Silicon M1/M2 Pro/Max MacBook Pro 14"/16", 2022 M2 MacBook Air 13", US Keyboard Layout Only Industry Leading 0.12mm (0.005 in.) thickness to minimize typing interference Round cutout for Touch ID button Made with premium engineering grade TPU material. Completely washable and reusable']}

Ground truth question

In [36]:

list(client.list_examples(dataset_id=dataset.id, limit=10))[1].inputs

{'question': 'Does the UPPERCASE GhostCover keyboard protector allow the backlight to shine through and what thickness is it?'}

In [42]:

reference_input = list(client.list_examples(dataset_id=dataset.id, limit=10))[1].inputs
reference_output = list(client.list_examples(dataset_id=dataset.id, limit=10))[1].outputs

We need RAG pipeline code to test with the datasets

In [41]:
def get_embedding(text, model="text-embedding-3-small"):
    response = openai.embeddings.create(
        input=text,
        model=model,
    )

    return response.data[0].embedding


def retrieve_data(query, qdrant_client, k=5):

    query_embedding = get_embedding(query)

    results = qdrant_client.query_points(
        collection_name="Amazon-items-collection-00",
        query=query_embedding,
        limit=k,
    )

    retrieved_context_ids = []
    retrieved_context = []
    similarity_scores = []
    retrieved_context_ratings = []

    for result in results.points:
        retrieved_context_ids.append(result.payload["parent_asin"])
        retrieved_context.append(result.payload["description"])
        retrieved_context_ratings.append(result.payload["average_rating"])
        similarity_scores.append(result.score)

    return {
        "retrieved_context_ids": retrieved_context_ids,
        "retrieved_context": retrieved_context,
        "retrieved_context_ratings": retrieved_context_ratings,
        "similarity_scores": similarity_scores,
    }


def process_context(context):

    formatted_context = ""

    for id, chunk, rating in zip(context["retrieved_context_ids"], context["retrieved_context"], context["retrieved_context_ratings"]):
        formatted_context += f"- ID: {id}, rating: {rating}, description: {chunk}\n"

    return formatted_context


def build_prompt(preprocessed_context, question):

    prompt = f"""
You are a shopping assistant that can answer questions about the products in stock.

You will be given a question and a list of context.

Instructtions:
- You need to answer the question based on the provided context only.
- Never use word context and refer to it as the available products.

Context:
{preprocessed_context}

Question:
{question}
"""

    return prompt


def generate_answer(prompt):

    response = openai.chat.completions.create(
        model="gpt-5-nano",
        messages=[{"role": "system", "content": prompt}],
        reasoning_effort="minimal"
    )

    return response.choices[0].message.content


def rag_pipeline(question, top_k=5):

    qdrant_client = QdrantClient(url="http://localhost:6333")

    retrieved_context = retrieve_data(question, qdrant_client, top_k)
    preprocessed_context = process_context(retrieved_context)
    prompt = build_prompt(preprocessed_context, question)
    answer = generate_answer(prompt)

    final_result = {
        "answer": answer,
        "question": question,
        "retrieved_context_ids": retrieved_context["retrieved_context_ids"],
        "retrieved_context": retrieved_context["retrieved_context"],
        "similarity_scores": retrieved_context["similarity_scores"]
    }

    return final_result

In [43]:
rag_pipeline("Can I get some charger?", top_k=5)

{'answer': 'Yes—there are several charger cables available in stock. Here are a few options:\n\n- iPhone/Apple MFi certified Lightning cables (3-pack, 3 ft each). Compatible with iPhone models and iPad/iPod. Durable with reinforced joints.\n- 5 in 1/6 in 1 multi-charging cables (6 ft/10 ft options) that include Lightning, USB-C, and Micro USB connectors. Note: not all ports support data transfer; some are for charging only.\n- MFi-certified colorful nylon Lightning cables (3/6/10 ft options) compatible with many iPhone models; durable and long.\n- USB-C to Lightning cables (6 ft) with MFi certification for fast charging; compatible with newer iPhone models and iPad.\n\nIf you tell me which device you’re charging (iPhone model, or Fitbit, etc.) and preferred length, I can narrow down the best match.',
 'question': 'Can I get some charger?',
 'retrieved_context_ids': ['B0BYYLJRHT',
  'B0BFPZGYLD',
  'B09TNXY54Y',
  'B0BV6PWVCG',
  'B0BGDQLZD2'],
 'retrieved_context': ['iPhone Charger Cor

RAGAS metrics (Context precision, Context recall, Response relevancy and Faithfulness)

In [16]:

from ragas.dataset_schema import SingleTurnSample 
from ragas.metrics import IDBasedContextPrecision, IDBasedContextRecall, Faithfulness, ResponseRelevancy

  from ragas.metrics import IDBasedContextPrecision, IDBasedContextRecall, Faithfulness, ResponseRelevancy
  from ragas.metrics import IDBasedContextPrecision, IDBasedContextRecall, Faithfulness, ResponseRelevancy
  from ragas.metrics import IDBasedContextPrecision, IDBasedContextRecall, Faithfulness, ResponseRelevancy
  from ragas.metrics import IDBasedContextPrecision, IDBasedContextRecall, Faithfulness, ResponseRelevancy


In [17]:

ragas_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4.1-mini"))
ragas_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings(model="text-embedding-3-small"))



  ragas_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4.1-mini"))
  ragas_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings(model="text-embedding-3-small"))


In [44]:
reference_input

{'question': 'Does the UPPERCASE GhostCover keyboard protector allow the backlight to shine through and what thickness is it?'}

In [45]:
reference_output

{'ground_truth': 'Yes — it is a tinted protector that preserves keyboard backlight visibility and has an industry-leading thickness of 0.12 mm.',
 'reference_context_ids': ['B09LVX3XW2'],
 'reference_descriptions': ['UPPERCASE GhostCover® Premium Ultra Thin Keyboard Cover Protector, for 2021 2022 2023 M1/M2 Pro/Max MacBook Pro 14" 16" (A2442 A2485 A2779 A2780), MacBook Air 13" M2 (A2681), US (ANSI) Layout, Tinted New! Industry\'s first tinted protector with high transparency to preserve the elegance of your MacBook, and allows keyboard backlight to shine through. Designed for 2021+ Apple Silicon M1/M2 Pro/Max MacBook Pro 14"/16", 2022 M2 MacBook Air 13", US Keyboard Layout Only Industry Leading 0.12mm (0.005 in.) thickness to minimize typing interference Round cutout for Touch ID button Made with premium engineering grade TPU material. Completely washable and reusable']}

In [46]:
result = rag_pipeline(reference_input["question"])

In [52]:
result

{'answer': 'Yes. The UPPERCASE GhostCover keyboard protector allows the backlight to shine through. It has a thickness of 0.12 mm (0.005 inch).',
 'question': 'Does the UPPERCASE GhostCover keyboard protector allow the backlight to shine through and what thickness is it?',
 'retrieved_context_ids': ['B09LVX3XW2',
  'B0B6V8THVX',
  'B09Y39DSWR',
  'B09QGNB537',
  'B09PRK49JH'],
 'retrieved_context': ['UPPERCASE GhostCover® Premium Ultra Thin Keyboard Cover Protector, for 2021 2022 2023 M1/M2 Pro/Max MacBook Pro 14" 16" (A2442 A2485 A2779 A2780), MacBook Air 13" M2 (A2681), US (ANSI) Layout, Tinted New! Industry\'s first tinted protector with high transparency to preserve the elegance of your MacBook, and allows keyboard backlight to shine through. Designed for 2021+ Apple Silicon M1/M2 Pro/Max MacBook Pro 14"/16", 2022 M2 MacBook Air 13", US Keyboard Layout Only Industry Leading 0.12mm (0.005 in.) thickness to minimize typing interference Round cutout for Touch ID button Made with premi

In [51]:
async def ragas_faithfulness(run, example):

    sample = SingleTurnSample(
            user_input=run["question"],
            response=run["answer"],
            retrieved_contexts=run["retrieved_context"]
        )
    scorer = Faithfulness(llm=ragas_llm)

    return await scorer.single_turn_ascore(sample)

In [53]:

await ragas_faithfulness(result, "")

1.0

In [54]:

async def ragas_response_relevancy(run, example):

    sample = SingleTurnSample(
            user_input=run["question"],
            response=run["answer"],
            retrieved_contexts=run["retrieved_context"]
        )
    scorer = ResponseRelevancy(llm=ragas_llm, embeddings=ragas_embeddings)

    return await scorer.single_turn_ascore(sample)



In [55]:

await ragas_response_relevancy(result, "")


np.float64(0.9557988137484688)

In [58]:

async def ragas_context_precision_id_based(run, example):

    sample = SingleTurnSample(
            retrieved_context_ids=run["retrieved_context_ids"],
            reference_context_ids=example["reference_context_ids"]
        )
    scorer = IDBasedContextPrecision()

    return await scorer.single_turn_ascore(sample)

In [59]:
await ragas_context_precision_id_based(result, reference_output)

0.2

In [61]:

async def ragas_context_recall_id_based(run, example):

    sample = SingleTurnSample(
            retrieved_context_ids=run["retrieved_context_ids"],
            reference_context_ids=example["reference_context_ids"]
        )
    scorer = IDBasedContextRecall()

    return await scorer.single_turn_ascore(sample)



In [62]:
await ragas_context_recall_id_based(result, reference_output)

1.0