In [1]:
import os
import tempfile
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders.sitemap import SitemapLoader
from langchain_community.vectorstores import SKLearnVectorStore
from langchain_openai import OpenAIEmbeddings
from langsmith import traceable
from openai import OpenAI
from typing import List
import nest_asyncio
from llmhelper import get_llm
from langchain_ollama import OllamaEmbeddings

MODEL_NAME = "gpt-4o-mini"
MODEL_PROVIDER = "openai"
APP_VERSION = 1.0
RAG_SYSTEM_PROMPT = """You are an assistant for question-answering tasks. 
Use the following pieces of retrieved context to answer the latest question in the conversation. 
If you don't know the answer, just say that you don't know. 
Use three sentences maximum and keep the answer concise.
"""


def get_vector_db_retriever(llm_model, documents):
    persist_path = os.path.join("./temp/", "vectorstore.union.parquet")
    embd = OllamaEmbeddings(model=llm_model.model)

    # If vector store exists, then load it
    if os.path.exists(persist_path):
        vectorstore = SKLearnVectorStore(
            embedding=embd,
            persist_path=persist_path,
            serializer="parquet"
        )
        print("Vector DB is already initialized.")
        return vectorstore.as_retriever(lambda_mult=0)
        

    # Otherwise, index LangSmith documents and create new vector store
    vectorstore = SKLearnVectorStore.from_documents(
        documents=documents,
        embedding=embd,
        persist_path=persist_path,
        serializer="parquet"
    )
    print("Vector DB is has setup now from path = ", persist_path)
    vectorstore.persist()
    return vectorstore.as_retriever(lambda_mult=0)



"""
retrieve_documents
- Returns documents fetched from a vectorstore based on the user's question
"""
@traceable(run_type="chain")
def retrieve_documents(question: str):
    return retriever.invoke(question)

"""
generate_response
- Calls `call_openai` to generate a model response after formatting inputs
"""
@traceable(run_type="chain")
def generate_response(question: str, documents):
    formatted_docs = "\n\n".join(doc.page_content for doc in documents)
    messages = [
        {
            "role": "system",
            "content": RAG_SYSTEM_PROMPT
        },
        {
            "role": "user",
            "content": f"Context: {formatted_docs} \n\n Question: {question}"
        }
    ]
    return call_model(messages)

"""
call_openai
- Returns the chat completion output from OpenAI
"""
@traceable(
    run_type="llm",
    metadata={
        "ls_provider": MODEL_PROVIDER,
        "ls_model_name": MODEL_NAME
    }
)
def call_model(messages: List[dict]) -> str:
    return llm_client.invoke(messages)

"""
langsmith_rag
- Calls `retrieve_documents` to fetch documents
- Calls `generate_response` to generate a response based on the fetched documents
- Returns the model response
"""
@traceable(run_type="chain")
def langsmith_rag(question: str):
    documents = retrieve_documents(question)
    response = generate_response(question, documents)
    return response

def load_website(): 
    web_paths = [] 
    web_paths.append("https://docs.smith.langchain.com/sitemap.xml")
    web_paths.append("https://dspy.ai/sitemap.xml")
    documents = []
    for path in web_paths: 
        ls_docs_sitemap_loader = SitemapLoader(web_path=path,show_progress=True)
        print("Loading web page ", path)
        ls_docs = ls_docs_sitemap_loader.load()
        print("Loaded web page ", path)
        text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
            chunk_size=500, chunk_overlap=0)
        doc_splits = text_splitter.split_documents(ls_docs)
        documents.extend(doc_splits)
    return documents

llm_client = get_llm()
nest_asyncio.apply()
documents = load_website()
retriever = get_vector_db_retriever(llm_model=llm_client, documents=documents) 
print("retriever " , retriever)



USER_AGENT environment variable not set, consider setting it to identify your requests.


Loading web page  https://docs.smith.langchain.com/sitemap.xml


Fetching pages: 100%|#####################################################################################################################################################################################| 219/219 [00:21<00:00, 10.02it/s]


Loaded web page  https://docs.smith.langchain.com/sitemap.xml
Loading web page  https://dspy.ai/sitemap.xml


Fetching pages: 100%|#######################################################################################################################################################################################| 81/81 [00:08<00:00,  9.73it/s]


Loaded web page  https://dspy.ai/sitemap.xml
Vector DB is has setup now from path =  ./temp/vectorstore.union.parquet
retriever  tags=['SKLearnVectorStore', 'OllamaEmbeddings'] vectorstore=<langchain_community.vectorstores.sklearn.SKLearnVectorStore object at 0x31b3d2110> search_kwargs={}


In [2]:
question = "what environment variables need to set for tracing?"
langsmith_rag(question)

'To enable tracing, you need to set the following environment variables:\n\n- OTEL_EXPORTER_OTLP_ENDPOINT with the LangSmith API endpoint URL\n- OTEL_EXPORTER_OTLP_HEADERS with your LangSmith API key and optional project name.'

In [7]:
question = "what is DSPy framework for programming?"
langsmith_rag(question)

'The DSPy framework is a Python-based programmatic framework designed to help developers build, train, and deploy large language models (LMs). It allows users to define their tasks, pipeline, and parameters using Python code, and provides features like assertions and suggestions to automate the process. The framework also supports various built-in modules for different prompting techniques, such as chain of thought or React.'

In [6]:
question = "what is high-quality outputs?"
langsmith_rag(question) 


'High-quality output for an LLM-as-a-judge evaluator would be examples where the model demonstrates clear, accurate, and relevant routing of user intentions into the correct path ("refund" or "question answering"), with minimal errors or confusion. The output should also demonstrate a good understanding of nuances in language, such as subtleties in intent and context. This could involve evaluating how well the LLM handles edge cases or ambiguous inputs.'

In [10]:
question = "what is Cross-LM Compatibility in DSPy? " 
langsmith_rag(question) 


"I don't know. The context provided discusses various aspects of RAG applications and LLMs, but there's no mention of Cross-LM Compatibility in DSPy."