### Chat with PDFs (GEN-AI) Project

In [1]:
import langchain
from langchain_openai import OpenAI,OpenAIEmbeddings
from PyPDF2 import PdfReader
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.vectorstores.faiss import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import PyPDFLoader

from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.llms import GooglePalm

from dotenv import load_dotenv
import os 

In [2]:
loader = PyPDFLoader("attention model.pdf")
pdf_doc = loader.load()
pdf_doc
text = ""
for page in pdf_doc:
    text += page.page_content

In [3]:
text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=2000,
    chunk_overlap=200,
    length_function=len)

In [4]:
chunks = text_splitter.split_text(text=text)

In [11]:
# EMBEDDING FROM HUGGINFACE
instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-large")
vectorstore = FAISS.from_texts(texts=chunks,embedding=instructor_embeddings)
vectordb_file_path = "faiss_index"
vectorstore.save_local(vectordb_file_path)
# vectordb = FAISS.load_local(vectordb_file_path, instructor_embeddings)

load INSTRUCTOR_Transformer
max_seq_length  512


In [14]:
#EMBEDDING USIN OPENAI
embedding = OpenAIEmbeddings()
vectorstore = FAISS.from_texts(texts=chunks, embedding=embedding)

In [15]:
##CONFIGURE THE GOOGLEPALM API
api_key = os.getenv("GOOGLE_API_KEY")
llm = GooglePalm(google_api_key=api_key, temperature= 0.2)

In [16]:
llm('who is the prime minister of india')

'Narendra Modi'

In [17]:
memory = ConversationBufferMemory(return_messages=True, memory_key="chat_history")

In [18]:
conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=vectorstore.as_retriever(),
    memory=memory
)

In [19]:
result = conversation_chain.invoke({"question":"what is attention?"})

In [21]:
result['answer']

'Attention is a mechanism that allows the decoder to selectively retrieve information from the annotations.'