In [1]:
from dotenv import load_dotenv
load_dotenv(r'C:\Users\DELL\OneDrive\Desktop\chatbot\env')

True

In [3]:
# Import necessary libraries
from langchain.agents import initialize_agent, Tool, AgentType
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.utilities import GoogleSearchAPIWrapper
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import DeepLake
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA

# ----------------------------------------
# 1. Initialize the LLM (Language Model)
# ----------------------------------------
# We are using OpenAI's GPT-3.5-turbo model with instruct capabilities for more specific instructions.
llm = OpenAI(model="gpt-3.5-turbo-instruct", temperature=0)

# ----------------------------------------
# 2. Define Tools
# ----------------------------------------

# Tool 1: Google Search API Wrapper
# This tool allows us to search for the most up-to-date information from the web.
search = GoogleSearchAPIWrapper()
search_tool = Tool(
    name="Google Search",  # Name of the tool
    func=search.run,  # Function to run the search query
    description="Useful for finding current or recent information on events or news."  # Description of the tool's purpose
)

# Tool 2: Summarization Chain
# We create a simple prompt template for summarizing text and use it with a chain.
summarize_prompt = PromptTemplate(
    input_variables=["query"],  # Input placeholder for the query to summarize
    template="Write a concise summary of the following text: {query}"  # Template for the summarizer
)
summarize_chain = LLMChain(llm=llm, prompt=summarize_prompt)  # Chain combining the prompt and the language model

summarizer_tool = Tool(
    name="Summarizer",  # Name of the tool
    func=summarize_chain.run,  # Function to execute summarization
    description="Useful for summarizing lengthy text inputs."  # Description of the tool's purpose
)

# Tool 3: RetrievalQA with DeepLake (for preloaded documents)
# Here we use a vector database to store and retrieve documents based on queries.

# Instantiate OpenAI Embeddings to convert documents into embeddings for storage and retrieval
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

# Prepare some sample documents to be stored in the vector database
texts = [
    "Napoleon Bonaparte was born on August 15, 1769.",
    "Lady Gaga was born on March 28, 1986.",
    "Michael Jordan was born on February 17, 1963."
]

# Use RecursiveCharacterTextSplitter to break documents into chunks for efficient storage and retrieval
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
docs = text_splitter.create_documents(texts)

# Create a DeepLake vector store to store the documents with their embeddings
my_activeloop_org_id = "kunalukey809"
my_activeloop_dataset_name = "langchain_demo_dataset"
dataset_path = f"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}"  # Define dataset path
db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings)  # Create DeepLake database
db.add_documents(docs)  # Add documents to the vector database

# Create a RetrievalQA chain to retrieve answers from the stored documents
retrieval_qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",  # 'stuff' means simply pulling documents from the vector store for the answer
    retriever=db.as_retriever()  # Use the DeepLake database as the retriever
)

# Define the RetrievalQA tool
retrieval_tool = Tool(
    name="Retrieval QA",  # Name of the tool
    func=retrieval_qa.run,  # Function to run the retrieval-based question answering
    description="Useful for answering questions based on preloaded documents."  # Description of the tool
)

# ----------------------------------------
# 3. Initialize the Agent
# ----------------------------------------
# Combine all the tools into a list and initialize the agent.
tools = [search_tool, summarizer_tool, retrieval_tool]

# Initialize the agent, specifying which tools to use and the language model to process the queries.
agent = initialize_agent(
    tools=tools,  # List of tools the agent will use
    llm=llm,  # The language model used by the agent
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,  # Type of agent that reacts based on query descriptions
    verbose=True  # Enable verbose output to track the agent's decisions
)

# ----------------------------------------
# 4. Run Queries
# ----------------------------------------

# Example 1: Search for news and summarize
print("\n### Example 1: Summarize Latest News ###")
# Query to search for news about Boeing Starliner and summarize it
response = agent.run("What's the latest news about the Boeing Starliner? Summarize it.")
print(response)  # Print the summarized news

# Example 2: Answer question from preloaded documents
print("\n### Example 2: Retrieval QA ###")
# Query to answer a factual question from the preloaded documents (Napoleon's birth date)
response = agent.run("When was Napoleon Bonaparte born?")
print(response)  # Print the retrieved answer

# Example 3: Complex question combining tools
print("\n### Example 3: Multi-Step Query ###")
# Query that asks for stock market updates, searches for them, and summarizes the results
response = agent.run(
    "Search for the latest updates on the stock market in 2024 and summarize the results."
)
print(response)  # Print the search and summary result

# Example 4: Retrieval with Michael Jordan data
print("\n### Example 4: Retrieval QA - Michael Jordan ###")
# Query to answer when Michael Jordan was born using the preloaded data
response = agent.run("When was Michael Jordan born?")
print(response)  # Print the retrieved answer


Using embedding function is deprecated and will be removed in the future. Please use embedding instead.


Deep Lake Dataset in hub://kunalukey809/langchain_demo_dataset already exists, loading from the storage


Creating 3 embeddings in 1 batches of size 3:: 100%|██████████| 1/1 [00:42<00:00, 42.61s/it]


Dataset(path='hub://kunalukey809/langchain_demo_dataset', tensors=['embedding', 'id', 'metadata', 'text'])

  tensor      htype      shape     dtype  compression
  -------    -------    -------   -------  ------- 
 embedding  embedding  (6, 1536)  float32   None   
    id        text      (6, 1)      str     None   
 metadata     json      (6, 1)      str     None   
   text       text      (6, 1)      str     None   

### Example 1: Summarize Latest News ###


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I should use Google Search to find recent news articles about the Boeing Starliner.
Action: Google Search
Action Input: "Boeing Starliner news"[0m
Observation: [36;1m[1;3mBoeing's Starliner spacecraft undocked from the International Space Station on Friday, Sept. 6, with separation confirmed at 6:04 pm Eastern time. Boeing's Crew Space Transportation (CST)-100 Starliner spacecraft is being developed in collaboration with NASA's Commercial Crew Program. The Starliner 