In [18]:
from langchain_ollama import OllamaLLM
#from langchain_ollama import OllamaEmbeddings
from langchain_community.embeddings import OllamaEmbeddings
from langchain.prompts import PromptTemplate
from langchain_community.vectorstores import DocArrayInMemorySearch
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from sys import argv
import os
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains.question_answering import load_qa_chain


In [19]:
# 1. Create the model
llm = OllamaLLM(model='llama3.2')
embeddings = OllamaEmbeddings(model='znbang/bge:small-en-v1.5-f32')


In [20]:
# 2. Load the PDF file and create a retriever to be used for providing context
pdf_folder_path = "<replace This with your own file directory which contains pdf>"
documents = []
for file in os.listdir(pdf_folder_path):
    if file.endswith('.pdf'):
        pdf_path = os.path.join(pdf_folder_path, file)
        loader = PyPDFLoader(pdf_path)
        documents.extend(loader.load())
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
#print(documents)
pages = text_splitter.split_documents(documents)
store = DocArrayInMemorySearch.from_documents(pages, embedding=embeddings)
retriever = store.as_retriever()


In [21]:
# 3. Create the prompt template
template = """
Answer the question based only on the context provided.

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)

def format_docs(docs):
  return "\n\n".join(doc.page_content for doc in docs)

# 4. Build the chain of operations
chain = (
  {
    'context': retriever | format_docs,
    'question': RunnablePassthrough(),
  }
  | prompt
  | llm
  | StrOutputParser()
)


In [22]:
resp = chain.invoke({'question': "can you list the total and project balance of xxx.pdf and who did the quote and date"})

In [None]:
print(resp)

In [24]:
resp = chain.invoke({'question': "can you list the final price details of yyy.pdf and who did the quote and date"})

In [None]:
print(resp)