In [1]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain.document_loaders import DirectoryLoader


In [2]:
from dotenv import load_dotenv


load_dotenv()

True

In [3]:
embedding = OpenAIEmbeddings()


In [None]:
import os

def load_documents(path, glob="*.md"):
    # Assuming DirectoryLoader and its load method are defined elsewhere
    loader = DirectoryLoader(path, glob=glob)
    documents = loader.load()
    return documents

def load_all_documents(parent_path):
    all_documents = []
    for root, _, _ in os.walk(parent_path):
        print(root)
        documents = load_documents(root)  # Load documents from current directory
        all_documents.extend(documents)  # Combine documents from all directories
    return all_documents


parent_path =  "react.dev-main/src/content/reference"
all_docs = load_all_documents(parent_path)



In [None]:
all_docs

In [4]:
from langchain.storage._lc_store import create_kv_docstore
from langchain.storage import LocalFileStore
from langchain.retrievers import ParentDocumentRetriever


fs = LocalFileStore("./store")
store = create_kv_docstore(fs)

child_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)
parent_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)

vectorstore = Chroma(
    collection_name="full_documents", embedding_function=embedding,
    persist_directory="./chroma_db"
)
retriever = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter,
    parent_splitter=parent_splitter,
)
# retriever.add_documents(docs, ids=None)



In [None]:
def add_documents_in_batches(retriever, documents, batch_size=5):
    for i in range(0, len(documents), batch_size):
        batch = documents[i:i + batch_size]
        # Assuming your retriever's add_documents method can handle a batch of documents directly
        retriever.add_documents(batch)

# Assuming all_docs is a list of documents you've loaded
# and retriever is your retriever instance ready to add documents to it
add_documents_in_batches(retriever, all_docs)


In [6]:
retriever.get_relevant_documents("How do I use useEffect in react")

[Document(page_content="```js\nimport { useState, useEffect } from 'react';\n\nfunction useTime() {\n  // 1. Keep track of the current date's state. useState receives an initializer function as its\n  //    initial state. It only runs once when the hook is called, so only the current date at the\n  //    time the hook is called is set first.\n  const [time, setTime] = useState(() => new Date());\n\nuseEffect(() => {\n    // 2. Update the current date every second using setInterval.\n    const id = setInterval(() => {\n      setTime(new Date()); // ✅ Good: non-idempotent code no longer runs in render\n    }, 1000);\n    // 3. Return a cleanup function so we don't leak the setInterval timer.\n    return () => clearInterval(id);\n  }, []);\n\nreturn time;\n}", metadata={'source': 'react.dev-main/src/content/reference/rules/components-and-hooks-must-be-pure.md'}),
 Document(page_content="new Date() is not idempotent as it always returns the current date and changes its result every time it

In [7]:
import asyncio


In [8]:
async def fetch_documents_for_queries(retriever, queries):
    tasks = [retriever.aget_relevant_documents(query) for query in queries]
    results = await asyncio.gather(*tasks)
    unique_docs = [doc for result in results for doc in result]
    return unique_docs

In [9]:
queries =  ['1. Have you reviewed the official React documentation on useEffect to understand its usage and syntax?', '2. Are you familiar with the React Hooks API documentation, specifically the section on useEffect?', '3. Have you looked at any tutorials or guides that provide step-by-step instructions on how to use useEffect in React?', '4. Do you need clarification on any specific examples or use cases provided in the useEffect documentation?', '5. Have you checked the React useEffect API reference documentation for detailed information on its parameters and return values?']

In [10]:
docs = await fetch_documents_for_queries(retriever=retriever,queries=queries)

In [11]:
docs

[Document(page_content="title: useEffect\n\nuseEffect is a React Hook that lets you synchronize a component with an external system.\n\njs\nuseEffect(setup, dependencies?)\n\nReference {/reference/}\n\nuseEffect(setup, dependencies?) {/useeffect/}\n\nCall useEffect at the top level of your component to declare an Effect:\n\n```js\nimport { useEffect } from 'react';\nimport { createConnection } from './chat.js';\n\nfunction ChatRoom({ roomId }) {\n  const [serverUrl, setServerUrl] = useState('https://localhost:1234');\n\nuseEffect(() => {\n    const connection = createConnection(serverUrl, roomId);\n    connection.connect();\n    return () => {\n      connection.disconnect();\n    };\n  }, [serverUrl, roomId]);\n  // ...\n}\n```\n\nSee more examples below.\n\nParameters {/parameters/}", metadata={'source': 'react.dev-main/src/content/reference/react/useEffect.md'}),
 Document(page_content="title: useEffect\n\nuseEffect is a React Hook that lets you synchronize a component with an extern