In [1]:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

True

In [3]:
import os

# Set environment variables
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_PROJECT'] = 'cortex'

# Get keys from the environment
langchain_api_key = os.getenv("LANGCHAIN_API_KEY")
groq_api_key = os.getenv("GROQ_API_KEY")

if langchain_api_key:
    os.environ['LANGCHAIN_API_KEY'] = langchain_api_key
else:
    raise ValueError("LANGCHAIN_API_KEY is not set in the environment.")

if groq_api_key:
    os.environ['GROQ_API_KEY'] = groq_api_key
else:
    raise ValueError("GROQ_API_KEY is not set in the environment.")

PART 1 - OVERVIEW   

In [5]:
import os
from dotenv import load_dotenv, find_dotenv
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_groq import ChatGroq
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

# Set the USER_AGENT environment variable (for tracking requests)
os.environ['USER_AGENT'] = 'your_custom_user_agent_string'

# Load environment variables from .env file
load_dotenv(find_dotenv())

#### INDEXING ####

# Load Documents
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

##1 - 0 - 1000 , 800 - 1800

# Split - Chunking
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

# Embed
model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}

# Install and import sentence-transformers (if not installed, run this line once)
# !pip install sentence-transformers  # Uncomment this line if the package isn't installed

# Initialize HuggingFaceBgeEmbeddings
hf_embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)

# Initialize FAISS vector store
vectorstore = FAISS.from_documents(documents=splits, embedding=hf_embeddings)

retriever = vectorstore.as_retriever()  # Dense Retrieval - Embeddings/Context based

#### RETRIEVAL and GENERATION ####

# Prompt
prompt = hub.pull("rlm/rag-prompt")

# LLM (Language Model)
llm = ChatGroq(model="llama3-8b-8192", temperature=0)

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Question
print(rag_chain.invoke("What is Task Decomposition?"))


Task Decomposition is a process that breaks down a complicated task into smaller and simpler steps, allowing an agent to plan ahead and utilize more test-time computation. This is achieved through techniques such as Chain of Thought (CoT) and Tree of Thoughts (Yao et al. 2023), which transform big tasks into multiple manageable tasks and provide an interpretation of the model's thinking process.


PART 2 - INDEXING

In [6]:
# Documents
question = "What kinds of pets do I like?"
document = "My favorite pet is a cat."

Document Loaders

In [13]:
# Load blog
import bs4
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
blog_docs = loader.load()

Splitter

In [8]:
# Split
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300, 
    chunk_overlap=50)

# Make splits
splits = text_splitter.split_documents(blog_docs)

Text Embedding Models

In [9]:
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

# Define model and encoding configurations
model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}

# Initialize the HuggingFaceBgeEmbeddings
hf_embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)

# Embed the query and document
query_result = hf_embeddings.embed_query(question)
document_result = hf_embeddings.embed_query(document)

# Print the length of the query embedding (for example)
print(f"Query embedding length: {len(query_result)}")


Query embedding length: 384


In [10]:
import numpy as np

# Cosine similarity function
def cosine_similarity(vec1, vec2):
    dot_product = np.dot(vec1, vec2)
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    return dot_product / (norm_vec1 * norm_vec2)

# Calculate cosine similarity
similarity = cosine_similarity(query_result, document_result)

# Print result
print("Cosine Similarity:", similarity)


Cosine Similarity: 0.9023052170240916


Vectorstores

In [11]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [12]:
from langchain_community.vectorstores import FAISS

# Create a FAISS vector store
vectorstore = FAISS.from_documents(documents=splits, embedding=hf_embeddings)

# Create a retriever from the FAISS vector store
retriever = vectorstore.as_retriever()

# Now you can use the retriever to search documents or queries
print("FAISS retriever created successfully.")

FAISS retriever created successfully.


PART 3 - RETRIEVAL

In [15]:
docs = retriever.invoke("What is Task Decomposition?")

In [16]:
len(docs)

4

In [17]:
docs

[Document(id='28cf3efa-4c1b-4418-8978-8cedba86e3b7', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition#\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The sear

PART 4 - GENERATION

In [None]:
# Import required modules
from langchain_groq import ChatGroq
from langchain.prompts import ChatPromptTemplate

# Define the prompt template
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

# Create the prompt using LangChain's ChatPromptTemplate
prompt = ChatPromptTemplate.from_template(template)

prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='Answer the question based only on the following context:\n{context}\n\nQuestion: {question}\n'), additional_kwargs={})])

In [19]:
# Example of using ChatGroq 
llm = ChatGroq(model="llama3-8b-8192", temperature=0)

In [20]:
# Chain
chain = prompt | llm

In [23]:
# Run
chain.invoke({"context":docs,"question":"What is Task Decomposition?"})

AIMessage(content='According to the provided context, Task Decomposition is a process where an agent breaks down a complicated task into smaller and simpler steps. This can be done using various techniques, such as:\n\n1. Chain of Thought (CoT): instructing the model to "think step by step" to decompose hard tasks into smaller steps.\n2. Tree of Thoughts (Yao et al. 2023): decomposing the problem into multiple thought steps and generating multiple thoughts per step, creating a tree structure.\n3. Simple prompting: using LLM with simple prompting like "Steps for XYZ.\\n1.", "What are the subgoals for achieving XYZ?", or using task-specific instructions.\n\nThis process helps the agent to better understand the task and plan ahead, making it more manageable and efficient.', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 159, 'prompt_tokens': 1109, 'total_tokens': 1268, 'completion_time': 0.1325, 'prompt_time': 0.138114704, 'queue_time': 0.026119155000000005,

RAG Chains

In [24]:
from langchain import hub

# Pull prompt from LangChain Hub
prompt_hub_rag = hub.pull("rlm/rag-prompt")

In [25]:
prompt_hub_rag

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])

In [36]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# Define the pipeline
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}  # Use context and question
    | prompt_hub_rag  # Prompt for RAG
    | llm  # The LLM model for generating the response
    | StrOutputParser()  # Parse the output to string
    | (lambda x: "\n".join([q.strip() for q in x.split("\n") if q.strip()])) 
)

# Invoke the chain with a user question
result = rag_chain.invoke("What is Task Decomposition?")

# Print the wrapped result
print(result)

Task Decomposition is the process of breaking down a complex task into smaller, simpler steps. This can be done through techniques such as Chain of Thought or Tree of Thoughts, or by using simple prompting like "Steps for XYZ" or task-specific instructions.
