In [1]:
!pip install langchain-community tiktoken langchainhub chromadb langchain-ollama sentence-transformers>=2.2.0

In [2]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.embeddings import HuggingFaceEmbeddings # Embedding Model(Local)
from langchain_community.chat_models import ChatOllama #LLM Model(Ollama - Local)

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [3]:
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = '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'

In [4]:
# Load Documents
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()
#docs

In [5]:
# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
#splits

In [6]:
# Embed
embeddings = HuggingFaceEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",
    model_kwargs={'device': 'cpu'} # Use 'cuda' if you have an NVIDIA GPU, otherwise 'cpu'
)

  embeddings = HuggingFaceEmbeddings(


In [7]:
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
retriever = vectorstore.as_retriever()

In [8]:
#RETRIEVAL and GENERATION
# Prompt
prompt = hub.pull("rlm/rag-prompt")

In [9]:
# LLM
llm = ChatOllama(model="gemma:2b", temperature=0) # Using Ollama as the local LLM. Ensure Ollama server is running and the model is pulled. #!ollama pull llama2

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

  llm = ChatOllama(model="gemma:2b", temperature=0) # Using Ollama as the local LLM. Ensure Ollama server is running and the model is pulled. #!ollama pull llama2


In [10]:
# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [11]:
question = "What is Task Decomposition?"
response = rag_chain.invoke(question)
print(response)

Sure, here's the answer to your question:

Task decomposition is a technique that breaks down a complex task into smaller, simpler steps. This helps the model to understand the task and plan it more effectively.


In [12]:
# Documents
question = "What kinds of pets do I like?"
document = "My favorite pet is a cat."

In [13]:
import tiktoken

def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

num_tokens_from_string(question, "cl100k_base")

8

In [14]:
query_result = embeddings.embed_query(question)
document_result = embeddings.embed_query(document)
len(query_result)

384

In [15]:
import numpy as np

def cosine_similarity(vec1, vec2):
    dot_product = np.dot(vec1, vec2)
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    return dot_product / (norm_vec1 * norm_vec2)

similarity = cosine_similarity(query_result, document_result)
print("Cosine Similarity:", similarity)

Cosine Similarity: 0.7378822169755471


In [16]:
#INDEXING

# Load blog
import bs4
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
blog_docs = loader.load()

In [17]:
# Split
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300, 
    chunk_overlap=50)

# Make splits
splits = text_splitter.split_documents(blog_docs)

In [18]:
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 1})

In [19]:
docs = retriever.invoke("What is Task Decomposition?")

In [20]:
len(docs)

1

In [21]:
# Prompt
prompt = hub.pull("rlm/rag-prompt")

In [22]:
# LLM
llm = ChatOllama(model="gemma:2b", temperature=0) # Adjust model name as needed

In [24]:
# Chain
rag_chain = (
    {"context": retriever , "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [25]:
question = "What is Task Decomposition?"
response = rag_chain.invoke(question)
print(response)

Task decomposition is a technique that breaks down a complex task into smaller, simpler steps. It can be done by various methods, including prompting an LLM with simple instructions, using task-specific instructions, or with human inputs.


In [26]:
#INDEXING

# Load blog
import bs4
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
blog_docs = loader.load()

# Split
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300, 
    chunk_overlap=50)

# Make splits
splits = text_splitter.split_documents(blog_docs)

# Index
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
retriever = vectorstore.as_retriever()

In [27]:
from langchain.prompts import ChatPromptTemplate

# Multi Query: Different Perspectives
template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser

generate_queries = (
    prompt_perspectives 
    | ChatOllama(model="gemma:2b", temperature=0)
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

In [35]:
from langchain.load import dumps, loads

def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    unique_docs = list(set(flattened_docs))
    return [loads(doc) for doc in unique_docs]

# Retrieve
question = "What is task decomposition for LLM agents?"
retrieval_chain = generate_queries | retriever.map() | get_unique_union
docs = retrieval_chain.invoke({"question":question})
len(docs)

13

In [33]:
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

llm = ChatOllama(model="gemma:2b", temperature=0)

final_rag_chain = ( 
    {"context": retrieval_chain, 
     "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question":question})

'Sure, here is the answer to the question:\n\nThe passage explains that task decomposition is a process of breaking down large tasks into smaller, manageable subgoals. This approach enables efficient handling of complex tasks by dividing them into smaller, more manageable steps.'