In [1]:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

True

In [3]:
import os

# Set environment variables
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_PROJECT'] = 'cortex'

# Get keys from the environment
langchain_api_key = os.getenv("LANGCHAIN_API_KEY")
groq_api_key = os.getenv("GROQ_API_KEY")

if langchain_api_key:
    os.environ['LANGCHAIN_API_KEY'] = langchain_api_key
else:
    raise ValueError("LANGCHAIN_API_KEY is not set in the environment.")

if groq_api_key:
    os.environ['GROQ_API_KEY'] = groq_api_key
else:
    raise ValueError("GROQ_API_KEY is not set in the environment.")

PART 1 - OVERVIEW   

In [5]:
import os
from dotenv import load_dotenv, find_dotenv
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_groq import ChatGroq
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

# Set the USER_AGENT environment variable (for tracking requests)
os.environ['USER_AGENT'] = 'your_custom_user_agent_string'

# Load environment variables from .env file
load_dotenv(find_dotenv())

#### INDEXING ####

# Load Documents
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

##1 - 0 - 1000 , 800 - 1800

# Split - Chunking
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

# Embed
model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}

# Install and import sentence-transformers (if not installed, run this line once)
# !pip install sentence-transformers  # Uncomment this line if the package isn't installed

# Initialize HuggingFaceBgeEmbeddings
hf_embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)

# Initialize FAISS vector store
vectorstore = FAISS.from_documents(documents=splits, embedding=hf_embeddings)

retriever = vectorstore.as_retriever()  # Dense Retrieval - Embeddings/Context based

#### RETRIEVAL and GENERATION ####

# Prompt
prompt = hub.pull("rlm/rag-prompt")

# LLM (Language Model)
llm = ChatGroq(model="llama3-8b-8192", temperature=0)

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Question
print(rag_chain.invoke("What is Task Decomposition?"))


Task Decomposition is a process that breaks down a complicated task into smaller and simpler steps, allowing an agent to plan ahead and utilize more test-time computation. This is achieved through techniques such as Chain of Thought (CoT) and Tree of Thoughts (Yao et al. 2023), which transform big tasks into multiple manageable tasks and provide an interpretation of the model's thinking process.
