In [None]:
%pip install -U deepeval

Note: you may need to restart the kernel to use updated packages.


In [None]:
from deepeval.synthesizer import Synthesizer
from deepeval.models.base_model import DeepEvalBaseLLM

In [None]:
import getpass
import os
from dotenv import load_dotenv

load_dotenv()
if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter you Google API key: ")

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_core.rate_limiters import InMemoryRateLimiter

LLM_MODEL = "gemini-2.0-flash"
EMBEDDING_MODEL = "models/text-embedding-004"


In [None]:
class GeminiChat(DeepEvalBaseLLM):
    def __init__(self, model_name: str):
        super().__init__(model_name)  # sets model_name and loads model

    def load_model(self):
        free_tier_rpm_mapping = {
            "gemini-2.0-flash": 15,
            "gemini-2.0-flash-lite": 30,
        }

        # compute request per second
        rps = free_tier_rpm_mapping[LLM_MODEL] / 60  # simple min to sec conversion

        rate_limiter = InMemoryRateLimiter(
            requests_per_second=rps,
            check_every_n_seconds=0.1,
            max_bucket_size=1,
        )
        return ChatGoogleGenerativeAI(model=self.model_name, rate_limiter=rate_limiter)

    def generate(self, prompt: str) -> str:
        res = self.model.invoke(prompt)
        return res.content

    async def a_generate(self, prompt) -> str:
        res = await self.model.ainvoke(prompt)
        return res.content

    def get_model_name(self) -> str:
        return self.model_name

In [None]:
gen = GeminiChat(model_name=LLM_MODEL)
print(gen.generate("What is the capital of India?"))

  rate_limiter = InMemoryRateLimiter(


The capital of India is **New Delhi**.


In [None]:
from typing import List
from deepeval.models import DeepEvalBaseEmbeddingModel

class GeminiEmbedding(DeepEvalBaseEmbeddingModel):
    model: GoogleGenerativeAIEmbeddings

    def __init__(self, model_name: str):
        super().__init__(model_name) 

    def load_model(self):
        return GoogleGenerativeAIEmbeddings(model=self.model_name)

    def embed_text(self, text: str) -> List[float]:
        return self.model.embed_query(text)

    async def a_embed_text(self, text: str) -> List[float]:
        return await self.model.aembed_query(text)

    def embed_texts(self, texts: List[str]) -> List[List[float]]:
        return self.model.embed_documents(texts)

    async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
        return await self.model.aembed_documents(texts)

    def get_model_name(self) -> str:
        return self.model_name


In [None]:
emb = GeminiEmbedding(EMBEDDING_MODEL)
embedding = emb.embed_text("What is the capital of India?")
len(embedding)

768

In [None]:
# Delete the persistence embeddings in vectordb from previous runs. This can cause issues
%rm -rf .vector_db/

In [None]:
from deepeval.synthesizer.config import ContextConstructionConfig

model = GeminiChat(LLM_MODEL)
embedder = GeminiEmbedding(EMBEDDING_MODEL)
synthesizer = Synthesizer(model=model, async_mode=False)

synthesizer.generate_goldens_from_docs(
    document_paths=["../corpus/Neural Network Training Recipe.txt"],
    include_expected_output=True,  # Generate a reference reponse as well
    max_goldens_per_context=2,  # From the same set of generated contexts, how many synthetic queries to generate?
    context_construction_config=ContextConstructionConfig(
        embedder=embedder,
        max_contexts_per_document=5,  # How many random (generation + similarity) contexts to create
        context_similarity_threshold=0.5,
        chunk_size=250,  # this corresponds to token based chunking (so this will translate to 4x-5x number of characters)
    ),
)


✨ 🚀 ✨ Loading Documents: 100%|██████████| 1/1 [00:00<00:00,  1.65it/s]
✨ 📚 ✨ Chunking Documents: 100%|██████████| 1/1 [00:02<00:00,  2.47s/it]
✨ 🧩 ✨ Generating Contexts: 100%|██████████| 15/15 [00:15<00:00,  1.05s/it]


✨ Generating up to 10 goldens using DeepEval (using gemini-2.0-flash and models/text-embedding-004, method=docs): 100%|██████████| 10/10 [03:16<00:00, 19.62s/it]


[Golden(input="What qualities correlate strongly to success in DL, according to Karpathy's blog?", actual_output=None, expected_output="According to Andrej Karpathy's blog, the qualities that correlate most strongly to success in deep learning are patience and attention to detail.", context=['Andrej Karpathy blog\nA Recipe for Training Neural Networks\n\nAbout\nAuthor: Andrej Karpathy\nDate: Apr 25, 2019\n\nSome few weeks ago I posted a tweet on “the most common neural net mistakes”, listing a few common gotchas related to training neural nets. The tweet got quite a bit more engagement than I anticipated (including a webinar :)). Clearly, a lot of people have personally encountered the large gap between “here is how a convolutional layer works” and “our convnet achieves state of the art results”.\n\nSo I thought it could be fun to brush off my dusty blog to expand my tweet to the long form that this topic deserves. However, instead of going into an enumeration of more common errors or 

In [None]:
dataframe = synthesizer.to_pandas()
dataframe

Unnamed: 0,input,actual_output,expected_output,context,retrieval_context,n_chunks_per_context,context_length,evolutions,context_quality,synthetic_input_quality,source_file
0,What qualities correlate strongly to success i...,,"According to Andrej Karpathy's blog, the quali...",[Andrej Karpathy blog\nA Recipe for Training N...,,2,2247,[Constrained],0.825,1.0,Neural Network Training Recipe.txt
1,How do patience & attention to detail relate t...,,"According to Andrej Karpathy, the qualities th...",[Andrej Karpathy blog\nA Recipe for Training N...,,2,2247,[Reasoning],0.825,1.0,Neural Network Training Recipe.txt
2,"If a novice were to use those libraries, would...",,"No, because neural nets are not ""off-the-shelf...",[It is allegedly easy to get started with trai...,,1,1053,[Hypothetical],0.775,1.0,Neural Network Training Recipe.txt
3,Compare the impression given by neural net lib...,,Neural net libraries give the false impression...,[It is allegedly easy to get started with trai...,,1,1053,[Comparative],0.775,0.7,Neural Network Training Recipe.txt
4,Why is silently failing during neural net trai...,,Neural net training often fails silently becau...,[the-shelf” technology the second you deviate ...,,2,2213,[Reasoning],0.775,1.0,Neural Network Training Recipe.txt
5,Explore scenarios where subtle data preprocess...,,"When training neural networks, subtle errors, ...",[the-shelf” technology the second you deviate ...,,2,2213,[In-Breadth],0.775,0.8,Neural Network Training Recipe.txt
6,What two qualities strongly correlate to succe...,,Patience and attention to detail are the two q...,"[.\n\nAs a result, (and this is reeaally diffi...",,3,3449,[Constrained],0.65,1.0,Neural Network Training Recipe.txt
7,"Per Karpathy, before net coding, what initial ...",,"Per Karpathy, the first step to training a neu...","[.\n\nAs a result, (and this is reeaally diffi...",,3,3449,[Multi-context],0.65,1.0,Neural Network Training Recipe.txt
8,Compare inspecting data vs. training nets in n...,,Inspecting data should be the first step when ...,"[ the labels?\n\nIn addition, since the neural...",,3,3401,[Comparative],0.6,0.8,Neural Network Training Recipe.txt
9,"Before training, how can inspecting data, find...",,By thoroughly inspecting your data and underst...,"[ the labels?\n\nIn addition, since the neural...",,3,3401,[Reasoning],0.6,1.0,Neural Network Training Recipe.txt


In [None]:
from rich.panel import Panel
from rich.console import Console

console = Console()
row_index = 0
first_row = dataframe.iloc[row_index].to_dict()

print("Context Length (Character Count):", len("".join(first_row["context"])))
console.print("\n[bold green]Dataset Sample[/bold green]")
console.print(
    Panel.fit(
        f"[yellow]Query:[/yellow]\n{first_row['input']}\n\n"
        f"[yellow]Expected Response:[/yellow]\n{first_row['expected_output']}\n\n"
        f"[yellow]Context:[/yellow]\n"
        + "\n\n---\n\n".join(
            [context.replace("\\n", "\n") for context in first_row["context"]]
        )
    )
)


Context Length (Character Count): 2247
