# This is the scratchpad and testing for the server code of navlearnLM


In [1]:
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma


load_dotenv()
os.environ.get("OPENAI_API_KEY")

llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

vector_store = Chroma(embedding_function=embeddings)


In [15]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict

loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only = bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)

docs = loader.load()


In [16]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(docs)

In [17]:
_ = vector_store.add_documents(documents=all_splits)

In [None]:
prompt = hub.pull("rlm/rag-prompt")

class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

class RetrieverTool:

    def __init__(self, vector_store):
        self.vector_store = vector_store
        self.ID = 1 #to identify between one retriever to another

    def retrieve(self, state: State):
        retrieved_docs = self.vector_store.similarity_search(state["question"])
        return({"context": retrieved_docs})


retriever_one = RetrieverTool(vector_store=vector_store)      

def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return({"context": retrieved_docs})

def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return ({"answer": response.content})

graph_builder = StateGraph(State)
graph_builder.add_sequence([retriever_one.retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()




In [24]:
response = graph.invoke({"question": "How ReAct works?"})
print(response)
print(response["answer"])

{'question': 'How ReAct works?', 'context': [Document(id='78eb9bbe-9d28-4544-95a6-08963e804015', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='ReAct (Yao et al. 2023) integrates reasoning and acting within LLM by extending the action space to be a combination of task-specific discrete actions and the language space. The former enables LLM to interact with the environment (e.g. use Wikipedia search API), while the latter prompting LLM to generate reasoning traces in natural language.\nThe ReAct prompt template incorporates explicit steps for LLM to think, roughly formatted as:\nThought: ...\nAction: ...\nObservation: ...\n... (Repeated many times)'), Document(id='1e6aadd2-b9ae-4628-ac1a-7715b46fc1aa', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Fig. 2.  Examples of reasoning trajectories for knowledge-intensive tasks (e.g. HotpotQA, FEVER) and decision-making tasks (e.g. AlfWorld Env, WebShop). (