In [24]:
!pip install -q --upgrade langchain langchain-community langchain-core langchain-openai
!pip install -q faiss-cpu pypdf sentence-transformers
!pip install -q langchain-text-splitters
!pip install -q transformers accelerate torch

In [13]:
import os
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langchain_community.llms import HuggingFacePipeline

In [25]:
from google.colab import files
files.upload()

Saving RAG_Architecture_Internal_Engineering_Document.pdf to RAG_Architecture_Internal_Engineering_Document (2).pdf


{'RAG_Architecture_Internal_Engineering_Document (2).pdf': b'%PDF-1.7\r\n%\xb5\xb5\xb5\xb5\r\n1 0 obj\r\n<</Type/Catalog/Pages 2 0 R/Lang(te) /StructTreeRoot 36 0 R/MarkInfo<</Marked true>>/OutputIntents[<</Type/OutputIntent/S/GTS_PDFA1/OutputConditionIdentifier(sRGB) /RegistryName(http://www.color.org) /Info(Creator: HP     Manufacturer:IEC    Model:sRGB) /DestOutputProfile 716 0 R>>] /Metadata 717 0 R/ViewerPreferences 718 0 R>>\r\nendobj\r\n2 0 obj\r\n<</Type/Pages/Count 6/Kids[ 3 0 R 25 0 R 27 0 R 29 0 R 31 0 R 33 0 R] >>\r\nendobj\r\n3 0 obj\r\n<</Type/Page/Parent 2 0 R/Resources<</Font<</F1 5 0 R/F2 9 0 R/F3 11 0 R/F4 13 0 R/F5 18 0 R/F6 20 0 R>>/ExtGState<</GS7 7 0 R/GS8 8 0 R>>/ProcSet[/PDF/Text/ImageB/ImageC/ImageI] >>/MediaBox[ 0 0 612 792] /Contents 4 0 R/Group<</Type/Group/S/Transparency/CS/DeviceRGB>>/StructParents 0>>\r\nendobj\r\n4 0 obj\r\n<</Filter/FlateDecode/Length 4071>>\r\nstream\r\nx\x9c\xb5\\\xdds\xdb6\x12\x7f\xf7\x8c\xff\x07<\xdd\x88w\x11K\x10\x04?2\x19\xdd\xb8N

In [26]:
pdf_path = "RAG_Architecture_Internal_Engineering_Document.pdf"
loader = PyPDFLoader(pdf_path)
documents = loader.load()
print(f"Total pages loaded: {len(documents)}")

Total pages loaded: 6


In [27]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=150
)
docs = text_splitter.split_documents(documents)
print(f"Total chunks created: {len(docs)}")

Total chunks created: 12


In [28]:
embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

In [29]:
vectorstore = FAISS.from_documents(docs, embedding_model)
vectorstore.save_local("rag_faiss_index")
print("FAISS index created and saved")

FAISS index created and saved


In [30]:
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
hf_pipeline = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=300
)
llm = HuggingFacePipeline(pipeline=hf_pipeline)

Device set to use cuda:0


In [31]:
prompt = ChatPromptTemplate.from_template(
    """
    You are an AI assistant for engineers.
    Answer the question using ONLY the context below.
    If the answer is not in the context, say "I don't know".

    Context:
    {context}

    Question:
    {question}
    """
)
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)
rag_chain = (
    {
        "context": retriever | format_docs,
        "question": RunnablePassthrough()
    }
    | prompt
    | llm
)

In [33]:
query = "Why does RAG reduce hallucinations?"
response = rag_chain.invoke(query)
print(response)

Injecting retrieved, relevant knowledge into the LLM prompt at query time
