# ChromaDB

In [32]:
from dotenv import load_dotenv
load_dotenv()

True

In [34]:
from langchain.chat_models import init_chat_model

In [35]:
llm = init_chat_model(
    model="openai/gpt-oss-120b",
    model_provider="groq"
)

In [36]:
print(llm.invoke("hello").content)

Hello! How can I help you today?


In [37]:
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings

# Loader

In [41]:
loader = DirectoryLoader(
    path="../../data/pdf",
    glob="*.pdf",
    loader_cls=PyPDFLoader
)

In [42]:
docs = loader.load()

# Splitter

In [43]:
splitter = RecursiveCharacterTextSplitter(
    separators=["/n/n", "/n", " ", ""],
    chunk_size = 500,
    chunk_overlap = 50
)

In [44]:
chunks = splitter.split_documents(docs)

# Vector Store

In [46]:
embeddings = HuggingFaceEmbeddings(
    model = "sentence-transformers/all-MiniLM-L6-v2"
)

In [53]:
vector_store = Chroma.from_documents(
    documents = chunks,
    embedding=embeddings,
    collection_name="example_collection",
    persist_directory="./chroma_langchain_db",  # Where to save data locally, remove if not necessary
)

In [56]:
query = "What is AGI"
similarity_docs = vector_store.similarity_search(query, k=3)
similarity_docs[0]

Document(metadata={'source': '..\\..\\data\\pdf\\2510.18212v2.pdf', 'total_pages': 57, 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.28 (TeX Live 2025) kpathsea version 6.4.1', 'author': 'Dan Hendrycks; Dawn Song; Christian Szegedy; Honglak Lee; Yarin Gal; Erik Brynjolfsson; Sharon Li; Andy Zou; Lionel Levine; Bo Han; Jie Fu; Ziwei Liu; Jinwoo Shin; Kimin Lee; Mantas Mazeika; Long Phan; George Ingebretsen; Adam Khoja; Cihang Xie; Olawale Salaudeen; Matthias Hein; Kevin Zhao; Alexander Pan; David Duvenaud; Bo Li; Steve Omohundro; Gabriel Alfour; Max Tegmark; Kevin McGrew; Gary Marcus; Jaan Tallinn; Eric Schmidt; Yoshua Bengio', 'page': 1, 'producer': 'pikepdf 8.15.1', 'arxivid': 'https://arxiv.org/abs/2510.18212v2', 'doi': 'https://doi.org/10.48550/arXiv.2510.18212', 'creator': 'arXiv GenPDF (tex2pdf:e76afa9)', 'license': 'http://creativecommons.org/licenses/by/4.0/', 'trapped': '/False', 'page_label': '2', 'creationdate': '', 'title': 'A Definition of AGI'}, page_con

# RAG Pipeline

In [62]:
from langchain_core.documents import Document
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

In [59]:
def format_docs(docs: Document) -> str:
    return "\n\n".join(doc.page_content for doc in docs)

In [60]:
retriever = vector_store.as_retriever()

In [64]:
from langchain_core.prompts import ChatPromptTemplate

In [65]:
system_prompt = """You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question
If you dont know the answer, just say I dont know
Use three sentences maximum and keep the answer concise.
Context {context} 
"""

In [66]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("user", "{question}")
    ]
)

In [68]:
rag_chain = (
    {
        "context": retriever | format_docs,
        "question": RunnablePassthrough()
    } 
    | prompt 
    | llm 
    | StrOutputParser()
)

In [69]:
rag_chain.invoke("What is AGI?")

'Artificial General Intelligence (AGI) is an AI system that can match or surpass the cognitive versatility and proficiency of a well‑educated adult. It must demonstrate both the breadth (versatility across many domains) and depth (high skill level) of human cognition, rather than being limited to narrow, specialized tasks. In short, AGI aims to replicate the full range of human intellectual abilities.'

In [71]:
retriever._get_relevant_documents("What is AGI", run_manager=None)

[Document(metadata={'author': 'Dan Hendrycks; Dawn Song; Christian Szegedy; Honglak Lee; Yarin Gal; Erik Brynjolfsson; Sharon Li; Andy Zou; Lionel Levine; Bo Han; Jie Fu; Ziwei Liu; Jinwoo Shin; Kimin Lee; Mantas Mazeika; Long Phan; George Ingebretsen; Adam Khoja; Cihang Xie; Olawale Salaudeen; Matthias Hein; Kevin Zhao; Alexander Pan; David Duvenaud; Bo Li; Steve Omohundro; Gabriel Alfour; Max Tegmark; Kevin McGrew; Gary Marcus; Jaan Tallinn; Eric Schmidt; Yoshua Bengio', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.28 (TeX Live 2025) kpathsea version 6.4.1', 'creationdate': '', 'page_label': '2', 'creator': 'arXiv GenPDF (tex2pdf:e76afa9)', 'producer': 'pikepdf 8.15.1', 'doi': 'https://doi.org/10.48550/arXiv.2510.18212', 'license': 'http://creativecommons.org/licenses/by/4.0/', 'trapped': '/False', 'page': 1, 'total_pages': 57, 'title': 'A Definition of AGI', 'source': '..\\..\\data\\pdf\\2510.18212v2.pdf', 'arxivid': 'https://arxiv.org/abs/2510.18212v2'}, page_co

In [74]:
def query_rag(question):
    print(f"User: {question}")
    print("-"*30)
    docs = retriever._get_relevant_documents(question, run_manager=None)
    for doc in docs:
        print(doc.page_content)
        print("-"*30)
    res = rag_chain.invoke(question)
    print(f"Answer: {res}")

In [77]:
query_rag("abilities Needed for AGI")

User: abilities Needed for AGI
------------------------------
AGI is an AI that can match or exceed the cognitive versatility and proficiency
of a well-educated adult.
This definition emphasizes that general intelligence requires not just specialized performance in
narrow domains, but the breadth (versatility) and depth (proficiency) of skills that characterize human
cognition.
To operationalize this definition, we must look to the only existing example of general intelligence:
humans. Human cognition is not a monolithic capability; it is a complex
------------------------------
We leave economic
measurements of advanced AI to other work. Last, we deliberately focus on core cognitive capabilities
rather than physical abilities such as motor skills or tactile sensing, as we seek to measure the
capabilities of the mind rather than the quality of its actuators or sensors. We discuss more limitations
in the Discussion.
2 Overview of Abilities Needed for AGI
This document outlines a framewo