In [2]:
from langchain_community.document_loaders import TextLoader
loader= TextLoader("speech.txt", encoding= 'UTF-8')
text_document= loader.load()   #Read
print(text_document)


[Document(metadata={'source': 'speech.txt'}, page_content='“I Have a Dream,” delivered by Dr. Martin Luther King Jr. on August 28, 1963, during the March on Washington for Jobs and Freedom, is one of the most iconic and impactful speeches in American history.\n\nThe speech, set against the backdrop of the Lincoln Memorial, begins with King’s acknowledgment of the Emancipation Proclamation, which had freed millions of slaves a century earlier. However, he quickly points out that African Americans were still not free from segregation, discrimination, and poverty. King’s speech is a vivid portrayal of the struggles faced by Black Americans and a call for an end to racial injustice.\n\nKing employs the metaphor of a “bad check,” saying that America has given the Negro people a bad check which has come back marked “insufficient funds.” However, he refuses to believe that the bank of justice is bankrupt. This metaphor underlines the broken promises made to African Americans.\n\nCentral to hi

In [3]:
import os
from dotenv import load_dotenv
load_dotenv()
os.environ["HUGGINGFACEHUB_API_TOKEN"]=os.getenv("HUGGINGFACEHUB_API_TOKEN")



In [4]:
#web based loader
from langchain_community.document_loaders import WebBaseLoader
import bs4

#load, chunk and index the content of html page
loader= WebBaseLoader(web_paths= ["https://lilianweng.github.io/posts/2023-06-23-agent/"],
                      bs_kwargs= {"parse_only": bs4.SoupStrainer(class_=("post-footer", "post-content", "post-header"))}
                     )
text_documents= loader.load()
print(text_documents)

                      

USER_AGENT environment variable not set, consider setting it to identify your requests.


[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='\n\n      LLM Powered Autonomous Agents\n    \nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n\n\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistake

In [5]:
#pdf reader
from langchain_community.document_loaders import PyPDFLoader
loader= PyPDFLoader("unit2.pdf")
docs= loader.load()
print(docs)

[Document(metadata={'producer': 'Microsoft® PowerPoint® 2016', 'creator': 'Microsoft® PowerPoint® 2016', 'creationdate': '2025-06-07T10:23:45+05:45', 'title': '', 'author': 'HP', 'moddate': '2025-06-07T10:23:45+05:45', 'source': 'unit2.pdf', 'total_pages': 45, 'page': 0, 'page_label': '1'}, page_content='INTELLIGENT AGENT\nUnit 2\nPresented By: Saroj Bhandari'), Document(metadata={'producer': 'Microsoft® PowerPoint® 2016', 'creator': 'Microsoft® PowerPoint® 2016', 'creationdate': '2025-06-07T10:23:45+05:45', 'title': '', 'author': 'HP', 'moddate': '2025-06-07T10:23:45+05:45', 'source': 'unit2.pdf', 'total_pages': 45, 'page': 1, 'page_label': '2'}, page_content='Syllabus(4 hours)\n2.1. Introduction of agents, Structure of Intelligent agent, Properties of Intelligent\nAgents\n2.2. Configuration of Agents, PEAS description of Agents, PAGE\n2.3. Types of Agents: Simple Reflexive, Model Based, Goal Based, Utility Based,\nLearning Agent\n2.4. Environment Types: Deterministic, Stochastic, Sta

In [6]:
#for chunking
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter= RecursiveCharacterTextSplitter(chunk_size= 250, chunk_overlap= 50) #this will be responsible for splitting documents now
documents= text_splitter.split_documents(docs)
print(documents[:5])

[Document(metadata={'producer': 'Microsoft® PowerPoint® 2016', 'creator': 'Microsoft® PowerPoint® 2016', 'creationdate': '2025-06-07T10:23:45+05:45', 'title': '', 'author': 'HP', 'moddate': '2025-06-07T10:23:45+05:45', 'source': 'unit2.pdf', 'total_pages': 45, 'page': 0, 'page_label': '1'}, page_content='INTELLIGENT AGENT\nUnit 2\nPresented By: Saroj Bhandari'), Document(metadata={'producer': 'Microsoft® PowerPoint® 2016', 'creator': 'Microsoft® PowerPoint® 2016', 'creationdate': '2025-06-07T10:23:45+05:45', 'title': '', 'author': 'HP', 'moddate': '2025-06-07T10:23:45+05:45', 'source': 'unit2.pdf', 'total_pages': 45, 'page': 1, 'page_label': '2'}, page_content='Syllabus(4 hours)\n2.1. Introduction of agents, Structure of Intelligent agent, Properties of Intelligent\nAgents\n2.2. Configuration of Agents, PEAS description of Agents, PAGE'), Document(metadata={'producer': 'Microsoft® PowerPoint® 2016', 'creator': 'Microsoft® PowerPoint® 2016', 'creationdate': '2025-06-07T10:23:45+05:45', 

In [7]:
#vector Embedding and vector store
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
db= Chroma.from_documents(documents[:20], HuggingFaceEmbeddings())

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
#vector database
query= "what is an intelligent agent"
result= db.similarity_search(query)
result[0].page_content

'Agents\n• An AI agent is a software program that can interact with its surroundings,\ngather information, and use that information to complete tasks on its own to\nachieve goals set by humans.'

In [9]:
#fiass vector database
from langchain_community.vectorstores import FAISS
db= FAISS.from_documents(documents[:20], HuggingFaceEmbeddings())


In [10]:
query= "what is an intelligent agent?"
result= db.similarity_search(query)
result[0].page_content

'Agents\n• An AI agent is a software program that can interact with its surroundings,\ngather information, and use that information to complete tasks on its own to\nachieve goals set by humans.'