## PDF loader

## Load pdf document

In [1]:
#Load pdf document
from langchain_community.document_loaders import PyPDFLoader

file_path = "/Users/vaishnavirohitkulange/Documents/GENAI_Calssses/RAG_DEMO/Rainbow-Bazaar-Return-Refund-&-Cancellation-Policy.pdf"
pdf_loader = PyPDFLoader(file_path)
pdf_docs = pdf_loader.load()

  from .autonotebook import tqdm as notebook_tqdm


## Chunking

In [2]:
from langchain_text_splitters import RecursiveCharacterTextSplitter, CharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

In [3]:
pdf_chunks = text_splitter.split_documents(pdf_docs)

In [4]:
print(len(pdf_chunks))

9


In [5]:
print(pdf_chunks[0].page_content[:1000])

Return, Refund and Cancellation Policy 
1.1. Refunds, Cancellations and Returns of Goods 
Please note that you can submit complaints with the Order and product sold via 
rainbowbazaar.shop and we shall process the refund to you within 30 days from date of 
receipt of complaint. Once we issue your refund, it takes additional time for your financial 
institution to make funds available in your account, which can vary from 2-10 days from 
the date of refund processing. All orders are manually processed on the Website and sent 
for shipment as soon as they are placed. During this process we incur some irreversible 
fees. Therefore, while we understand that orders might need to be changed sometimes, we 
are unable to do it free of charge after a certain point. We strictly adhere to the following 
cancellation policy: 
• If you cancel your order BEFORE it has been shipped, you will not be charged any cancellation 
fee;


## Embeddding

In [6]:
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
import os
import sentence_transformers

In [7]:
embedding_model = HuggingFaceBgeEmbeddings(model_name="sentence-transformers/all-miniLM-L6-v2")

  embedding_model = HuggingFaceBgeEmbeddings(model_name="sentence-transformers/all-miniLM-L6-v2")


#### Creating a vector Store

In [8]:
from langchain_community.vectorstores import Chroma
vector_store = Chroma.from_documents(documents=pdf_chunks, 
                                     embedding=embedding_model,
                                     persist_directory="/Users/vaishnavirohitkulange/Documents/GENAI_Calssses/RAG_DEMO/chroma_db")
vector_store.persist()

  vector_store.persist()


In [9]:
print("Total Vectors in Store: ", vector_store._collection.count())

Total Vectors in Store:  18


## Querying 

#### Create retriver

In [10]:
retriver = vector_store.as_retriever(search_kwargs={"k":3})

#### Create an LLM 

In [12]:
from langchain_groq import ChatGroq
import os

# Set your actual GROQ API key here
os.environ["GROQ_API_KEY"] = "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"

print(os.getenv("GROQ_API_KEY"))  # should print gsk_...

llm = ChatGroq(model="openai/gpt-oss-120b", temperature=0)

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


### Build RAG chain

In [13]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from typing import List
from langchain_core.documents import Document


In [14]:
## helper function to format document into a string 

def format_docs(docs:List[Document])-> str:
    return"\n\n".join([doc.page_content for doc in docs])

In [15]:
prompt = ChatPromptTemplate.from_messages([
    ("system","You are a helpful assistant that helps people find information based on the provided pdf only. "),
    ("human","Context:{context}\n\nQuestion: {question}\n\nAnswer in a concise manner.")
])

In [16]:
qa_chain = (
    {
        "context": retriver | format_docs,
        "question": RunnablePassthrough()
    }
    | prompt
    | llm
    | StrOutputParser()
)

In [17]:
query = "What are the cancellation charges?"

In [18]:
response = qa_chain.invoke(query)
print("Response: ", response)

Response:  - **Before the order is shipped:** No cancellation fee.  
- **After the order is shipped:** Cancellation is handled as a return and you will incur the normal return fees.  
- **If you refuse delivery:** A cancellation fee of **100 % of the order value** is applied (unless you can prove the product was tampered with, faulty, etc.).  
- **If you cancel > 7 days but < 15 days after placing the order:** A charge equal to **the number of days %** of the order total is deducted (e.g., cancel on day 9 → 9 % fee).


In [19]:
query = ["Where we can raise refund request?", "What is the time limit to raise a cancellation request?", "What are the cancellation charges?"]

In [22]:
response = qa_chain.batch(query)
print("Response: ", response)

Response:  ['You can raise a refund request by contacting the seller:\n\n- **For product orders** – submit a complaint through the\u202frainbowbazaar.shop\u202fwebsite (using the email address listed on the seller’s profile).  \n- **For service orders** – email the request to\u202f**rb@thepridecircle.com**\u202fwith your name, order number, details and reason for cancellation.', 'You can request a cancellation without any fee **up to\u202f7\u202fdays after placing the order (and before it is shipped).** After the 7‑day mark, a charge applies.', '- **Before the order is shipped:**\u202fNo cancellation fee.  \n- **After the order is shipped:**\u202fCancellation is handled as a return and the normal return fees apply.  \n- **If you cancel\u202f7–15\u202fdays after placing the order (but before it ships):**\u202fA charge is applied equal to **the number of days\u202f%** of the order total (e.g., cancel on day\u202f9 →\u202f9\u202f% charge).  \n- **If the order is refused at delivery:**\u20