In [1]:
import nest_asyncio
import qdrant_client

from llama_index.core import Settings
from llama_index.core import PromptTemplate
from llama_index.llms.ollama import Ollama
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.postprocessor import SentenceTransformerRerank
from llama_index.embeddings.fastembed import FastEmbedEmbedding 
from llama_index.vector_stores.qdrant import QdrantVectorStore

from IPython.display import Markdown, display

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import nest_asyncio

nest_asyncio.apply()

In [3]:
llm = Ollama(model="llama3.2:1b", request_timeout=120.0)

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5",
                                   trust_remote_code=True)

rerank = SentenceTransformerRerank(model="BAAI/bge-reranker-base", top_n=2)

In [4]:
Settings.embed_model = embed_model

Settings.llm = llm

In [5]:
Settings.embed_model

HuggingFaceEmbedding(model_name='BAAI/bge-large-en-v1.5', embed_batch_size=10, callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x0000015AD69A77D0>, num_workers=None, max_length=512, normalize=True, query_instruction=None, text_instruction=None, cache_folder=None, show_progress_bar=False)

In [None]:
input_dir_path = './paul_graham/'

loader = SimpleDirectoryReader(
            input_dir = input_dir_path,
            required_exts=[".txt"],
            recursive=True
        )
docs = loader.load_data()

In [9]:
client = qdrant_client.QdrantClient(host="localhost", port=6333)

vector_store = QdrantVectorStore(client=client,
                                 collection_name="document_chat")

storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex.from_documents(docs,
                                        storage_context=storage_context)

In [11]:
query_engine = index.as_query_engine(similarity_top_k=4,
                                     node_postprocessors=[rerank])

template = """Context information is below.
              ---------------------
              {context_str}
              ---------------------
              Given the context information above I want you to think
              step by step to answer the query in a crisp manner. Incase 
              you don't know the answer say 'I don't know!'.
              
              Query: {query_str}
              
              Answer:"""

qa_prompt_tmpl = PromptTemplate(template)

query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
)

In [12]:
response = query_engine.query("""How did the structure of funding startups 
                                 in batches contribute to the success and 
                                 growth of the Y Combinator program and the
                                 startups involved?""")
                                 
display(Markdown(str(response)))

The structure of funding startups in batches contributed to the success and growth of the Y Combinator program and the startups involved through several mechanisms:

1. **Pooling resources**: Funding startups in batches allowed Y Combinator to pool its resources, expertise, and networks across multiple companies at once. This enabled it to provide more comprehensive support and mentorship to its portfolio companies.
2. **Reducing risk**: By funding startups in groups rather than individuals, Y Combinator reduced the risk of losing a single investment. If one startup failed, the others could continue to grow and improve their chances of success.
3. **Increasing efficiency**: Batching funding allowed Y Combinator to focus on managing its existing portfolio companies more efficiently. It could allocate resources, provide guidance, and make decisions in real-time without having to devote as much time to individual startups.
4. **Encouraging collaboration**: Funding startups in batches fostered a culture of collaboration among the companies. Each startup received support from multiple Y Combinator employees, who shared their expertise and resources to help each other grow.
5. **Identifying promising trends**: Batching funding enabled Y Combinator to identify emerging trends and areas of interest more effectively. By providing support to multiple startups in related fields or technologies, the organization could better understand what was working and what wasn't.

These mechanisms combined to create a robust and supportive environment for startup growth, which contributed significantly to the success and growth of the Y Combinator program and its involved startups.

# evaluation

In [14]:
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

loader = DirectoryLoader("./paul_graham/")

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=20)

documents = loader.load_and_split(text_splitter)

In [15]:
documents[0].to_json()

{'lc': 1,
 'type': 'constructor',
 'id': ['langchain', 'schema', 'document', 'Document'],
 'kwargs': {'page_content': 'How to Do Great Work\n\nJuly 2023\n\nIf you collected lists of techniques for doing great work in a lot of different fields, what would the intersection look like? I decided to find out by making it.\n\nPartly my goal was to create a guide that could be used by someone working in any field. But I was also curious about the shape of the intersection. And one thing this exercise shows is that it does have a definite shape; it\'s not just a point labelled "work hard."\n\nThe following recipe assumes you\'re very ambitious.\n\nThe first step is to decide what to work on. The work you choose needs to have three qualities: it has to be something you have a natural aptitude for, that you have a deep interest in, and that offers scope to do great work.\n\nIn practice you don\'t have to worry much about the third criterion. Ambitious people are if anything already too conservat

We will need three models here:

- A generator model that generates the QA pairs based on the provided context.
- An embedding to generate embeddings from raw text (will be used to retrieve & generate context).
- A critic model for validating the generation process.

In [16]:
from langchain_community.llms import Ollama
from langchain_community.embeddings import OllamaEmbeddings

generator_llm = Ollama(model="phi3:3.8b")
critic_llm = Ollama(model="llama3.2:1b")

ollama_emb = OllamaEmbeddings(
    model="nomic-embed-text",
)

In [None]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context

generator = TestsetGenerator.from_langchain(
    generator_llm=generator_llm,
    critic_llm=critic_llm,
    embeddings=ollama_emb
)

distribution = {simple: 0.5, reasoning: 0.25, multi_context: 0.25}
testset = generator.generate_with_langchain_docs(documents,
                                                 test_size=10,
                                                 distributions=distribution,
                                                 raise_exceptions=False)

Exception in thread Thread-5:                                    
Traceback (most recent call last):
  File "C:\Users\vijit_singh\AppData\Local\Programs\Python\Python311\Lib\threading.py", line 1038, in _bootstrap_inner
    self.run()
  File "c:\Users\vijit_singh\Desktop\Personal projects repo\rag_app_lindex\rag_test\Lib\site-packages\ragas\executor.py", line 96, in run
    results = self.loop.run_until_complete(self._aresults())
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\vijit_singh\Desktop\Personal projects repo\rag_app_lindex\rag_test\Lib\site-packages\nest_asyncio.py", line 98, in run_until_complete
    return f.result()
           ^^^^^^^^^^
  File "C:\Users\vijit_singh\AppData\Local\Programs\Python\Python311\Lib\asyncio\futures.py", line 203, in result
    raise self._exception.with_traceback(self._exception_tb)
  File "C:\Users\vijit_singh\AppData\Local\Programs\Python\Python311\Lib\asyncio\tasks.py", line 267, in __step
    result = coro.send(

ExceptionInRunner: The runner thread which was running the jobs raised an exeception. Read the traceback above to debug it. You can also pass `raise_exceptions=False` incase you want to show only a warning message instead.

In [22]:
test_df = testset.to_pandas().dropna()

NameError: name 'testset' is not defined

In [25]:
import pandas as pd
test_df = pd.read_csv(r".\paul_graham\test_data_paul_graham.csv")

In [28]:
test_df.dropna(inplace=True)

In [29]:
test_df.head()

Unnamed: 0.1,Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,0,How did the shift to publishing on the web cha...,"[""Wow, I thought, there's an audience. If I wr...",The shift to publishing on the web changed the...,simple,[{'source': 'paul_graham/what_i_worked_on.txt'...,True
1,1,"How does criticizing a project as a ""toy"" rese...","[""[9] You can't usually get paid for doing exa...",Criticizing a project as a 'toy' is similar to...,simple,[{'source': 'paul_graham/how_to_do_great_thing...,True
2,2,How did the structure of funding startups in b...,['The deal for startups was based on a combina...,Funding startups in batches allowed for conven...,simple,[{'source': 'paul_graham/what_i_worked_on.txt'...,True
3,3,How can exploring different topics help in gen...,"[""Talking or writing about the things you're i...",Exploring different topics can help in generat...,simple,[{'source': 'paul_graham/how_to_do_great_thing...,True
4,4,How does focusing consistently on something yo...,"[""The way to beat it is to stop occasionally a...",Great work happens by focusing consistently on...,simple,[{'source': 'paul_graham/how_to_do_great_thing...,True


In [None]:
# query_engine is our rag app
def generate_response(query_engine, question):
    response = query_engine.query(question)
    return {
        "answer": response.response,
        "contexts": [c.node.get_content() for c in response.source_nodes], #from original rag app we will pass ragas ques, it will return the context and answer
    }

In [31]:
from datasets import Dataset
from tqdm.auto import tqdm

test_questions = test_df["question"].values

responses = [generate_response(query_engine, q) for q in tqdm(test_questions)]

dataset_dict = {
    "question": test_questions,
    "answer": [response["answer"] for response in responses], # answer from original rag app
    "contexts": [response["contexts"] for response in responses], # context from original rag app
    "ground_truth": test_df["ground_truth"].values.tolist(), # ground truth from test_df(created by ragas)
}

ragas_eval_dataset = Dataset.from_dict(dataset_dict)

100%|██████████| 47/47 [44:18<00:00, 56.56s/it]


In [32]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.llms import Ollama

from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_correctness,
    context_recall,
    context_precision,
)

In [None]:
metrics = [faithfulness, answer_correctness,
           context_recall, context_precision]

critic_llm = Ollama(model="llama3.2:1b")

ollama_emb = OllamaEmbeddings(model="nomic-embed-text")

evaluation_result = evaluate(
    llm=critic_llm,
    embeddings=ollama_emb,
    dataset=ragas_eval_dataset,
    metrics=metrics
)

Evaluating:   2%|▏         | 3/188 [09:38<8:05:54, 157.59s/it] Failed to parse output. Returning None.
Evaluating:   2%|▏         | 4/188 [11:27<7:05:09, 138.64s/it]Failed to parse output. Returning None.
Evaluating:   3%|▎         | 6/188 [13:47<4:52:49, 96.53s/it] Failed to parse output. Returning None.
Evaluating:   4%|▎         | 7/188 [14:20<3:48:33, 75.76s/it]Failed to parse output. Returning None.
Evaluating:   5%|▍         | 9/188 [17:14<3:53:26, 78.25s/it]Failed to parse output. Returning None.
Evaluating:   5%|▌         | 10/188 [20:52<5:59:55, 121.32s/it]Failed to parse output. Returning None.
Evaluating:   6%|▌         | 11/188 [21:12<4:26:32, 90.35s/it] Failed to parse output. Returning None.
Evaluating:   7%|▋         | 13/188 [22:12<2:52:56, 59.29s/it]Failed to parse output. Returning None.
Evaluating:   7%|▋         | 14/188 [22:17<2:04:34, 42.96s/it]Failed to parse output. Returning None.
Evaluating:   9%|▊         | 16/188 [25:10<3:10:57, 66.62s/it]Failed to parse out

In [None]:
eval_scores_df = pd.DataFrame(evaluation_result.scores)