(1) Packages

(2) LangSmith

In [1]:
from dotenv import load_dotenv
import os
load_dotenv()

True

In [2]:
os.environ['LANGCHAIN_TRACING'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'


## Part 1: Overview

In [16]:
import bs4
#from langchain import hub
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langsmith import Client as ls_Client


### Indexing

In [5]:
# Load Documents
import bs4
loader = WebBaseLoader(
    web_paths = ("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs = dict(
        parse_only = bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

In [6]:
type(docs)

list

In [23]:
#docs[0:2]

In [8]:
#Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap =200)
splits = text_splitter.split_documents(docs)

In [None]:
#Embed
embeddings = OpenAIEmbeddings(model = "text-embedding-3-small")
vectorstore = Chroma.from_documents(documents = splits, embedding = embeddings)
retriever = vectorstore.as_retriever()

In [18]:
### RETRIEVAL AND GENERATION ###
ls_client = ls_Client()
# Prompt
prompt = ls_client.pull_prompt("rlm/rag-prompt")

In [19]:
#LLM
llm = ChatOpenAI(
    model = "gpt-4o-mini",
    temperature = 0
)

In [21]:
#Post Process
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

#Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [22]:
#Question
rag_chain.invoke("What is task decomposition?")

'Task decomposition is the process of breaking down a complex task into smaller, manageable sub-tasks or steps. This can be achieved through various methods, including prompting large language models (LLMs) to outline steps, using task-specific instructions, or incorporating human inputs. Techniques like Chain of Thought and Tree of Thoughts further enhance this process by structuring reasoning and exploring multiple possibilities at each step.'