In [1]:
import os
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEndpointEmbeddings, HuggingFaceEndpoint
from langchain_huggingface import ChatHuggingFace  
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableMap, RunnablePassthrough

In [None]:
# HF_TOKEN
HF_TOKEN = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_TOKEN

In [3]:

# Load PDFs
loader = PyPDFDirectoryLoader("./pdf")
documents = loader.load()

In [5]:
# Embeddings 
embedder = HuggingFaceEndpointEmbeddings(
    model="sentence-transformers/all-MiniLM-L6-v2",
    huggingfacehub_api_token=HF_TOKEN
)

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
#  LLM - ChatHuggingFace wrapper
base_llm = HuggingFaceEndpoint(
    repo_id="HuggingFaceH4/zephyr-7b-beta",
    task="text-generation",
    huggingfacehub_api_token=HF_TOKEN,
    max_new_tokens=256,
    temperature=0.2,
)
llm = ChatHuggingFace(llm=base_llm) 

In [7]:
# Split and index
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
chunks = splitter.split_documents(documents)
vectordb = FAISS.from_documents(chunks, embedder)
retriever = vectordb.as_retriever(search_kwargs={"k": 4})

In [8]:
# Prompt
prompt = PromptTemplate.from_template("""
You are a helpful AI assistant. Use ONLY the following context to answer the question. 
If answer is not in the context, say 'I do not know'.
                                      Context: {context}
Question: {question}
Answer:""")

In [None]:
# LCEL RAG CHAIN 
rag_chain = (
    RunnableMap({
        "context": retriever,
        "question": RunnablePassthrough()
    })
    | prompt
    | llm
)

In [None]:
response = rag_chain.invoke("give me margin of errror of Metropolitan area of Washington-Arlington-Alexandria, DC-VA-MD-WV")
print(response)

content=" The margin of error for household income in Washington-Arlington-Alexandria, DC-VA-MD-WV in 2022 is -0.8 at the 90 percent confidence level, as stated in the document with id 'a7ba5e8e-cdde-421b-bf6a-f42a61b03fda' on page 13. The margin of error for poverty in Charlotte-Concord-Gastonia, NC-SC is -1.5 at the 90 percent confidence level, as stated in the document with id '65c6931f-9b64-857d-2f196b0bc54." additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 162, 'prompt_tokens': 1364, 'total_tokens': 1526}, 'model_name': 'HuggingFaceH4/zephyr-7b-beta', 'system_fingerprint': '', 'finish_reason': 'stop', 'logprobs': None} id='lc_run--85bfb7b1-5a00-4ec1-9fc6-c9830832db95-0' usage_metadata={'input_tokens': 1364, 'output_tokens': 162, 'total_tokens': 1526}
