In [None]:
!pip install langchain
!pip install langchain-community
!pip install langchain-classic
!pip install sentence-transformers
!pip install faiss-cpu
!pip install langchain-core


In [None]:
!pip install transformers==4.46.3
!pip install huggingface_hub==0.36.2
!pip install langchain-huggingface


In [None]:
import os
import numpy as np
from langchain_community.embeddings import HuggingFaceBgeEmbeddings, OpenAIEmbeddings
from langchain_community.llms import HuggingFacePipeline
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_text_splitters.character import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_classic.chains import RetrievalQA
from langchain_classic.prompts import PromptTemplate


In [None]:
os.makedirs("chapters",exist_ok=True)

In [None]:
loader = PyPDFDirectoryLoader("./chapters/")

In [None]:
!pip install pypdf

In [None]:
docs_before_split=loader.load()


In [None]:
len(docs_before_split[1].page_content)

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size= 700,
    chunk_overlap = 50
)
docs_after_split = text_splitter.split_documents(docs_before_split)

In [None]:
len(docs_after_split[1].page_content)

In [None]:
huggingface_embeddings = HuggingFaceBgeEmbeddings(
    model_name= "sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs = {'device' : 'cpu'},
    encode_kwargs = {'normalize_embeddings' : True}
)


In [None]:
vectorstore= FAISS.from_documents(docs_after_split, huggingface_embeddings)

In [None]:
retriever = vectorstore.as_retriever(search_type="similarity" , search_kwargs={"k" : 3})

In [None]:
access_token="***"

In [None]:
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint


llm_model = HuggingFaceEndpoint(
    repo_id="HuggingFaceH4/zephyr-7b-beta",
    huggingfacehub_api_token=access_token,
    temperature=0.1,
    max_new_tokens=500
)


chat = ChatHuggingFace(
    llm=llm_model,
)



In [None]:
prompt_template = """Use the following pieces of context to answer the question at the end. Please follow the following rules:
1. If you don't know the answer, don't try to make up an answer. Just say "I can't find the final answer but you may want to check the following links".
2. If you find the answer, write the answer in a concise way with five sentences maximum.

{context}

Question: {question}

Helpful Answer:
"""

PROMPT = PromptTemplate(
 template=prompt_template, input_variables=["context", "question"]
)


In [None]:
retrievalQA = RetrievalQA.from_chain_type(
    llm=chat,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs = {"prompt" : PROMPT}
)

In [None]:
query = "What is the main titles in chapter 1?"

In [None]:
result = retrievalQA.invoke({"query" : query})
print(result)

In [None]:
query2 = "what questions should asked When looking for use cases?"
result = retrievalQA.invoke({"query" : query2})
print(result)