In [4]:
pip install huggingface_hub

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [1]:
from pydantic import BaseModel
import streamlit as st
from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
import json
# from langchain_community.embeddings import SentenceTransformerEmbeddings

# PDFQuestionAnswering class
class PDFQuestionAnswering(BaseModel):
    name: str = "PDF Question Answering"
    description: str = "Useful for retrieving relevant information from a PDF based on a query."

    def process_pdf(self, uploaded_file):
        pdfreader = PdfReader(uploaded_file)
        raw_text = ''
        for page in pdfreader.pages:
            content = page.extract_text()
            if content:
                raw_text += content
        return raw_text

    def split_text(self, raw_text: str):
        text_splitter = CharacterTextSplitter(
            separator="\n",
            chunk_size=800,
            chunk_overlap=200,
            length_function=len,
        )
        return text_splitter.split_text(raw_text)

    def get_relevant_chunks(self, texts, query: str):
        # Generate embeddings for the texts
        # embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
        openai_api_key=OPENAI_API_KEY =<OPENAI_API_KEY>
        embeddings = OpenAIEmbeddings()  # Replace this with another embedding model if needed
        
        # Create a vector database (e.g., FAISS)
        document_search = FAISS.from_texts(texts, embeddings)

        # Perform a similarity search to find the most relevant chunks
        docs = document_search.similarity_search(query)
        return [doc.page_content for doc in docs]

    def run(self, query: str):
        uploaded_file='handbook.pdf'
        if uploaded_file is not None and query:
            # Process the PDF file
            raw_text = self.process_pdf(uploaded_file)

            # Split the text into chunks
            texts = self.split_text(raw_text)

            # Retrieve relevant chunks based on the query
            relevant_chunks = self.get_relevant_chunks(texts, query)
            
            # Return the relevant chunks
            return relevant_chunks
        else:
            return {"error": "PDF file or query not provided"}


In [1]:

import os
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
from langchain_google_community import GoogleSearchAPIWrapper
from langchain_core.tools import Tool
from langchain.tools.base import StructuredTool

In [2]:
google_search = GoogleSearchAPIWrapper()
google_tool = Tool(
    name="google-search",
    description="Search Google for recent results.",
    func=google_search.run
)

In [4]:
os.environ["OPENAI_API_KEY"] =<OPENAIKEY>

In [9]:
import requests

file_path = './handbook.pdf'

with open(file_path, 'rb') as file:
    response = requests.post(f"http://127.0.0.1:8000/add_pdf/", files={"file": file})

if response.status_code == 200:
    print("Messages added successfully")
else:
    print(f"Failed to add messages. Status code: {response.status_code}")
    print("Response content:", response.text)


Messages added successfully


In [10]:
import requests
url='https://blog.futuresmart.ai/guide-to-langsmith'
# file_path = './handbook.pdf'

# with open(file_path, 'rb') as file:
response = requests.post(f"http://127.0.0.1:8000/scrape_webdata/", json={"url": url})

if response.status_code == 200:
    print("Messages added successfully")
else:
    print(f"Failed to add messages. Status code: {response.status_code}")
    print("Response content:", response.text)


Messages added successfully


In [11]:
from pydantic import BaseModel
import requests
from pydantic_models import QueryRequest

class PDFQuestionAnswering(BaseModel):
    name: str = "Pdf Question Answering"
    description: str = "Useful for getting relavent information from pdf"

    def run(self, query: str) -> str:
        try:
            print(query)
            request_data=QueryRequest(input=query)
            response = requests.post(f"http://127.0.0.1:8000/search_query_in_pdf", json=request_data.dict())
            response.raise_for_status()
            return response.json()
        except requests.RequestException as e:
            # Handle request errors
            return {"error": f"Request failed: {str(e)}"}
        except Exception as e:
            # Handle other exceptions
            return {"error": str(e)}


In [12]:
from pydantic import BaseModel
import requests
from pydantic_models import QueryRequest

class WebQuestionAnswering(BaseModel):
    name: str = "Web Question Answering"
    description: str = "Useful to Answer user based on query from webdata."

    def run(self, query: str) -> str:
        try:
            print(query)
            request_data=QueryRequest(input=query)
            response = requests.post(f"http://127.0.0.1:8000/search_query_in_web", json=request_data.dict())
            response.raise_for_status()
            return response.json()
        except requests.RequestException as e:
            # Handle request errors
            return {"error": f"Request failed: {str(e)}"}
        except Exception as e:
            # Handle other exceptions
            return {"error": str(e)}


In [4]:
pdf_search = PDFQuestionAnswering()
pdf_tool = StructuredTool.from_function(
    name="pdf-search",
    description="Search Pdf for relavent information",
    func=pdf_search.run
)

NameError: name 'PDFQuestionAnswering' is not defined

In [14]:
GOOGLE_API_KEY=<GOOGLE_API_KEY>
GOOGLE_CSE_ID=<GOGGLE_CSE_ID>

In [15]:
google_search.run("What's Obama's first name?")

'1 Child\'s First Name. (Type or print). BARACK. CERTIFICATE OF LIVE BIRTH lb ... OBAMA, II. Day. 4. 6b. Island. Year. 5b. Hour. 1961 7:24 P.M.. Oahu. 6d. Is Place\xa0... As a member of the Democratic Party, he was the first African-American president in U.S. history. Obama previously served as a U.S. senator representing\xa0... Apr 7, 2021 ... ... WHAT IVE BEEN SAYING. Reply reply. u/MarkXD69therickroll avatar ... Morons, Obama IS his last name, and his first name is President. Apr 12, 2017 ... Barack hussein Obama was actually born in alkita, Afghanistan he is a Muslim. He is not African American. His birth certificate was altered by\xa0... Barack Hussein Obama II was born August 4, 1961, in Honolulu, Hawaii, to parents Barack H. Obama, Sr., and Stanley Ann Dunham. A museum that asks you to believe—not just in President Obama\'s power to create change, but in your own. ... (All fields required.) First name. First name. Last\xa0... Michelle Robinson Obama was born in DeYoung, Illinois

In [16]:
web_search = WebQuestionAnswering()
web_tool = Tool(
    name="Web-search",
    description="Search Web data for information",
    func=web_search.run
)

In [17]:
pdf_search.run("What is the name of the company?")

What is the name of the company?


'Closing Statement\nThank you for reading our handbook. We hope it has provided you with an understanding of our mission, history, and\nstructure as well as our current policies and guidelines. We look forward to working with you to create a successful\nCompany and a safe, productive, and pleasant workplace.\nShruti Gupta, CEO\nZania, Inc.\n45 Zania, Inc.\nZania Employee Handbook\nSeptember 07, 2023 This policy may not be appropriate in its entirety for employees working in Montana.\n2.0 \nIntroductory Language and Policies\n2.1 \nAbout the Company\n[[Add your about the company statement here.]]\n2.2 \nCompany Facilities\n[[Insert information about your company facilities here.]]\n2.3 \nEthics Code\nZania, Inc. will conduct business honestly and ethically wherever operations are maintained. We strive to improve the quality\nof our services, products, and operations and will maintain a reputation for honesty, fairness, respect, responsibility,\nintegrity, trust, and sound business judgm

In [18]:
web_search.run("What is meant by langsmith?")

What is meant by langsmith?


'Guide to langsmithFutureSmart AI BlogFollowFutureSmart AI BlogFollowGuide to langsmithVenkata Vinay Vijjapu·Jun 7, 2024·7 min readTable of contentsIntroductionInstallationPython:Type Script:Setting Up Your EnvironmentSample ProjectUse CasesConclusionResources and Further ReadingIntroduction\nLarge language models (LLMs) are the talk of the town, with their potential applications seemingly limitless. But for developers, translating LLM potential into real-world applications can be a bumpy ride. Debugging complex workflows, ensuring reliability, and maintaining smooth operation are just a few hurdles on the path to production.\nIntroducing LangSmith, your one-stop shop for building and deploying robust LLM applications. LangSmith is a comprehensive DevOps platform designed to streamline the entire LLM development process, from initial concept to real-world impact.\nWith LangSmith, you can:\n\nCraft LLMs with Confidence: Develop applications intuitively with a user-friendly interface tha

In [19]:
from langchain_community.llms import HuggingFaceEndpoint
from langchain_community.chat_models.huggingface import ChatHuggingFace


In [20]:
HUGGINGFACEHUB_API_TOKEN='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'

In [21]:
from langchain_openai import ChatOpenAI

In [22]:
OPENAI_API_KEY =OPENAI_API_KEY

In [23]:
llm = ChatOpenAI(temperature=0, api_key=OPENAI_API_KEY)

In [24]:
from langchain.agents import create_structured_chat_agent

In [26]:
from langchain import hub
prompt = hub.pull("hwchase17/openai-tools-agent")


In [27]:
prompt


ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], optional_variables=['chat_history'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]], 'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, partial_variables={'chat_history': []}, metadata={'lc_hub_owner': 'hwchase17', 'lc_hub_repo': 'openai-tools-agent', 'lc_hub_commit_hash': 'c18672812789a3b9697656dd539edf0120285dcae36396d0b548ae42a4ed66f5'}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(in

In [28]:
from langchain.agents import create_openai_tools_agent
agent = create_openai_tools_agent(llm, [google_tool,pdf_tool,web_tool], prompt)

In [29]:
from langchain.agents import AgentExecutor


In [30]:
agent_executor=AgentExecutor(agent=agent, tools=[google_tool,pdf_tool,web_tool], verbose=True, handle_parsing_errors=True, max_iterations=5)

In [None]:
agent_executor.invoke({"input":"name of company?"})