In [1]:
from dotenv import load_dotenv
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_cohere.embeddings import CohereEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain_groq import ChatGroq
from langchain.memory import ConversationBufferMemory

In [2]:
# Loading environment variables (i.e., GROQ_API_KEY).
# Becuase ChatGroq expects the `GROQ_API_KEY` to be loaded in environment.
load_dotenv()

True

In [3]:
pdf_loader = PyPDFLoader("../sample_documents/Kidney-Stones-Patient-Guide.pdf")
pages = pdf_loader.load()

In [5]:
pages[0].page_content

'KIDNEY STONES\nKidney Stones Patient Guide'

In [4]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=50)
chunks = text_splitter.split_documents(pages)

In [5]:
# embeddings = HuggingFaceEmbeddings()
embeddings = CohereEmbeddings(model="embed-english-light-v3.0")

In [6]:
db = FAISS.from_documents(chunks, embeddings)

In [7]:
llm = ChatGroq(model="mistral-saba-24b", temperature=0)

In [None]:
memory = ConversationBufferMemory(
    llm=llm,
    output_key="answer",
    memory_key="chat_history",
    return_messages=True
)

In [9]:
conv_chain = ConversationalRetrievalChain.from_llm(
    llm, 
    retriever=db.as_retriever(),
    memory=memory,
    verbose=True
)

In [None]:
5/0

In [None]:
response = conv_chain({"question": "What type of stone is formed due to high volume of uric acid in urine?"})

In [None]:
print(response["answer"])

In [None]:
response = conv_chain({"question": "How can we prevent forming them?"})

In [None]:
print(response["answer"])