In [22]:
from dotenv import load_dotenv

from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_groq import ChatGroq
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnablePassthrough
import bs4
import pprint

load_dotenv()

import os
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

Data Ingestion

In [16]:
loader = WebBaseLoader(web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/","https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/", "https://lilianweng.github.io/posts/2021-03-21-lm-toxicity/"),
                        bs_kwargs=dict(
                            parse_only=bs4.SoupStrainer(
                                class_=("post-content", "post-title", "post-header")
                            )
                        ),
)

docs = loader.load()

print(docs)

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='\n\n      LLM Powered Autonomous Agents\n    \nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n\n\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistake

In [17]:
for doc in docs:
    print(doc.page_content)



      LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng


Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.
Agent System Overview#
In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:

Planning

Subgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.
Reflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.


Memory

Short-term memory: I 

Document Processing : Text chunking

In [19]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
len(splits)

152

Embedding Generation

In [18]:
embed_model = "BAAI/bge-m3"
hf_embeddings = HuggingFaceBgeEmbeddings(model_name=embed_model, model_kwargs={"device": "cpu"},
                                encode_kwargs={"normalize_embeddings": True})



  hf_embeddings = HuggingFaceBgeEmbeddings(model_name=embed_model, model_kwargs={"device": "cpu"},
  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


152

Vector Store with retrieval

In [21]:
vectorstore = FAISS.from_documents(splits, hf_embeddings)
retriever = vectorstore.as_retriever()

Prompt template

In [30]:
prompt = hub.pull("rlm/rag-prompt")

pprint.pprint(prompt.messages[0].prompt.template)

('You are an assistant for question-answering tasks. Use the following pieces '
 "of retrieved context to answer the question. If you don't know the answer, "
 "just say that you don't know. Use three sentences maximum and keep the "
 'answer concise.\n'
 'Question: {question} \n'
 'Context: {context} \n'
 'Answer:')


Query Processing
<pre style="font-size: 12px;">

User Question  →  Retriever  → format_docs  →  Prompt Template  →  Llama 3  →   String Output
     ↓               ↓           ↓                   ↓               ↓             ↓
"Elaborate on..." → [docs] → "doc1\ndoc2..." → "Use context..." → "Answer..." → "Final answer"
</pre>

In [31]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

llm = ChatGroq(model="llama3-8b-8192")

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("Elaborate on human annotations?")

'Human annotations refer to the labels or feedback provided by humans to train or evaluate language models. In the context of Chain of Hindsight, human feedback data consists of a sequence of past outputs, each annotated with a human rating and hindsight feedback.'