In [2]:
# %pip install -qU langchain_community

In [12]:
# %pip install pypdf

In [4]:
from langchain_community.document_loaders import PyPDFDirectoryLoader

directory_path = (
    "book.pdf",
    "data_source/slides.pdf"
)
loader = PyPDFDirectoryLoader("data_source/")

### load Data

In [6]:
data = loader.load()
data[0]

Document(metadata={'source': 'data_source/book.pdf', 'page': 0}, page_content="NetworkSecurity:PrivateCommunicationinaNon-PrivateWorld\nChapter1:Introduction- Securecommunication: Focusonhowtocommunicatesecurelyoverinsecuremediums.- Disclaimer: Authors' opinionsmaynot reflect theiremployers. Mentionsof commercialproductsareforinformationonly.- Designinsights: Offersinsightsthat gobeyondbasicspecifications.\nChapter2:IntroductiontoCryptography- Purpose: Cryptographyensures:\n- Confidentiality(keepingcommunicationprivate).- Integrity(ensuringmessagesareunaltered).- Authentication(verifyingidentity).\n- TypesofCryptographicFunctions:\n- Hashfunctions: Producefixed-lengthoutputs, hardtoreverse.- Secretkeyfunctions: Samekeyforencryptionanddecryption.- Publickeyfunctions: Usesseparatekeysforencryption(public)anddecryption(private); alsosupportsdigital signatures.\n- CryptographicAttacks:\n- Ciphertext-onlyattack.- Known-plaintextattack.- Chosen-plaintextattack.\nChapter3:SecretKeyCryptograph

### Split Data

In [7]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)

In [9]:
# %pip install langchain_chroma langchain_ollama

### generate embeddings and store in vector database

In [11]:
from langchain_chroma import Chroma
from langchain_ollama import OllamaEmbeddings

local_embeddings = OllamaEmbeddings(model="nomic-embed-text")

vectorstore = Chroma.from_documents(documents=all_splits, embedding=local_embeddings)

### Define Model

In [None]:
from langchain_ollama import ChatOllama

model = ChatOllama(
    model="llama3.1:8b",
)

## Q&A Chatbot

In [None]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate


RAG_TEMPLATE = """
You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.

<context>
{context}
</context>

Answer the following question:

{question}"""

rag_prompt = ChatPromptTemplate.from_template(RAG_TEMPLATE)

retriever = vectorstore.as_retriever()

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

qa_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | rag_prompt
    | model
    | StrOutputParser()
)


question = "What are the approaches to Task Decomposition?"

qa_chain.invoke(question)