In [1]:
import os
os.chdir("../")

In [2]:
%pwd

'd:\\Projects\\RAG Model'

In [3]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [4]:
#Extract Data From the PDF File
def load_pdf_file(data):
    loader = DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()

    return documents

In [5]:
extracted_data=load_pdf_file(data='data/')

In [6]:
print(extracted_data)

[Document(metadata={'source': 'data\\sample-data.pdf', 'page': 0}, page_content="Introduction\nKnowledge base at Plumb5 is a learning platform that offers exposure to various aspects of digitalmarketing in businesses, right from its objectives to its need to its advancements. By and large it propelsbetween the most important activities i.e, acquisition and retention of audience in any business.\nOver the years, there were quite some advancements digitally in the ﬁeld of marketing and sales but don'tyou worry, we've got it all covered up for you in this course. All the concepts have been thoroughly touchedupon and simpliﬁed as you move ahead.\nNow to get into the insights of digital marketing and sales automation, we ought to know what is marketingand sales in its entirety!\nAs you read through, you will ﬁnd the journey of automation systems running from fundamentals to itsadvanced levels.\nDo you ever wonder how a business or a brand could rightly target who might potentially be their 

In [7]:
#Split the Data into Text Chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [8]:
text_chunks = text_split(extracted_data)
print("Length of Text Chunks", len(text_chunks))

Length of Text Chunks 30


In [9]:
#text_chunks

In [10]:
from langchain.embeddings import HuggingFaceEmbeddings

In [11]:
#Download the Embeddings from Hugging Face
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings (model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

In [12]:
embeddings = download_hugging_face_embeddings()

  embeddings = HuggingFaceEmbeddings (model_name='sentence-transformers/all-MiniLM-L6-v2')
  from .autonotebook import tqdm as notebook_tqdm


In [13]:
query_result = embeddings.embed_query("Hello world")
print("Length", len(query_result))

Length 384


In [14]:
#query_result

In [18]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec
import os

pc = Pinecone(api_key="")

index_name = "testbot"

'''
pc.create_index(
name=index_name,
dimension=384,
metric="cosine",
spec=ServerlessSpec(
cloud="aws",
region="us-east-1")
)
'''

In [19]:
import os
os.environ["PINECONE_API_KEY"] = ""
os.environ["GOOGLE_API_KEY"] = ""

In [20]:
# Embed each chunk and upsert the embeddings into your Pinecone index.
from langchain_pinecone import PineconeVectorStore
docsearch = PineconeVectorStore.from_documents(
documents=text_chunks,
index_name=index_name,
embedding=embeddings,
)

In [21]:
# Load Existing index

from langchain_pinecone import PineconeVectorStore

# Embed each chunk and upsert the embeddings into your Pinecone index
docsearch = PineconeVectorStore.from_existing_index(
index_name=index_name,
embedding=embeddings
)

In [22]:
docsearch

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x1e970aafc20>

In [23]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})

In [24]:
retrieved_docs = retriever.invoke("What Knowledge Base at Plumb5?")

In [25]:
retrieved_docs

[]

In [26]:
#from langchain_openai import OpenAI
#llm = OpenAI(temperature=0.4, max_tokens=500)

In [27]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", temperature=0.4, max_tokens=500)

In [28]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
'''You are an assistant for question-answering tasks. 
Use the following pieces of retrieved context to answer the question. 
If you don't know the answer, say that you don't know. 
Use three sentences maximum and keep the answer concise.
\n\n
{context}
'''
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}")
    ]
)

In [29]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [30]:
response = rag_chain.invoke({"input":"What is Knowledge Base at Plumb5?"})
print(response["answer"])

Plumb5 uses a comprehensive knowledge base encompassing plumbing codes, product specifications, and best practices.  This allows their platform to offer real-time insights and guidance to plumbers in the field.  It also facilitates accurate estimates and efficient job completion.
