In [1]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain.embeddings import TensorflowHubEmbeddings
from langchain_cohere.embeddings import CohereEmbeddings
from langchain.vectorstores import FAISS
# from langchain.llms import GPT4All
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA

### Loading and Extracting PDF Content

In [2]:
pdf_loader = PyPDFLoader("../sample_documents/Kidney-Stones-Patient-Guide.pdf")
pages = pdf_loader.load()

In [3]:
pages[0].page_content

'KIDNEY STONES\nKidney Stones Patient Guide'

### Creating Chunks from Pages Content

In [3]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=50)
chunks = text_splitter.split_documents(pages)

### Initializing Embedding Model

In [4]:
# embeddings = TensorflowHubEmbeddings(model_url="../models/universal-sentence-encoder_4")
embeddings = CohereEmbeddings(model="embed-english-light-v3.0")

### Storing Embeddings to Vector DB (FAISS)

In [5]:
db = FAISS.from_documents(chunks, embeddings)

### Initializing LLM

In [6]:
# llm = GPT4All(model="../models/Llama-3.2-1B-Instruct-Q4_0.gguf", device="cpu")
llm = ChatGroq(model="mistral-saba-24b", temperature=0)

### Initializing QA Chain

In [7]:
qa_chain = RetrievalQA.from_chain_type(llm, retriever=db.as_retriever())

### Asking Questions to the Document

In [None]:
query = "What type of stone is formed due to high volume of uric acid in urine?"
answer = qa_chain.run(query)

In [None]:
print(answer)

In [10]:
query = "How can we prevent forming them?"
answer = qa_chain.run(query)

In [None]:
print(answer)

In [None]:
5/0

In [23]:
query = "List down all types of kidney stones mentioned in the document."
answer = qa_chain.run(query)

In [None]:
print(answer)