In [1]:
# Import standard libraries for file handling and text processing
import os, pathlib, textwrap, glob

# Load documents from various sources (URLs, text files, PDFs)
from langchain_community.document_loaders import UnstructuredURLLoader, TextLoader, PyPDFLoader

# Split long texts into smaller, manageable chunks for embedding
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Vector store to store and retrieve embeddings efficiently using FAISS
from langchain.vectorstores import FAISS

# Generate text embeddings using OpenAI or Hugging Face models
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings, SentenceTransformerEmbeddings

# Use local LLMs (e.g., via Ollama) for response generation
from langchain.llms import Ollama

# Build a retrieval chain that combines a retriever, a prompt, and an LLM
from langchain.chains import ConversationalRetrievalChain

# Create prompts for the RAG system
from langchain.prompts import PromptTemplate

print("‚úÖ Libraries imported! You're good to go!")

  from .autonotebook import tqdm as notebook_tqdm


‚úÖ Libraries imported! You're good to go!


In [20]:
import glob
from langchain_community.document_loaders import PyPDFLoader, UnstructuredURLLoader

pdf_paths = glob.glob("data/Everstorm_*.pdf")
raw_docs = []

# --- Load PDFs (each page is a Document) ---
for path in pdf_paths:
    raw_docs.extend(PyPDFLoader(path).load())

print(f"Loaded {len(raw_docs)} PDF pages from {len(pdf_paths)} files.")

URLS = [
    "https://developer.bigcommerce.com/docs/store-operations/shipping",
    "https://developer.bigcommerce.com/docs/store-operations/orders/refunds",
]

try:
    loader = UnstructuredURLLoader(urls=URLS)
    web_raw_docs = loader.load()
    print(f"Fetched {len(web_raw_docs)} documents from the web.")

    # ‚úÖ Correct: extend with a list of Documents
    raw_docs.extend(web_raw_docs)

except Exception as e:
    print("‚ö†Ô∏è  Web fetch failed, using offline copies:", e)

print(f"Loaded {len(raw_docs)} documents total (pdf + web).")


Ignoring wrong pointing object 81 0 (offset 0)
Ignoring wrong pointing object 76 0 (offset 0)
Ignoring wrong pointing object 80 0 (offset 0)


Loaded 8 PDF pages from 4 files.
Fetched 2 documents from the web.
Loaded 10 documents total (pdf + web).


In [21]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,
    chunk_overlap=30,
)

chunks = text_splitter.split_documents(raw_docs)

print(f"‚úÖ {len(chunks)} chunks ready for embedding")

‚úÖ 113 chunks ready for embedding


In [23]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import SentenceTransformerEmbeddings

embeddings = SentenceTransformerEmbeddings(
    model_name="thenlper/gte-small"
)

vectordb = FAISS.from_documents(chunks, embeddings)
retriever = vectordb.as_retriever(search_kwargs={"k": 8})

vectordb.save_local("faiss_index")

print("‚úÖ Vector store with", vectordb.index.ntotal, "embeddings")

‚úÖ Vector store with 113 embeddings


In [24]:
from langchain_community.llms import Ollama

llm = Ollama(model="gemma3:1b", temperature=0.1)
print(llm.invoke("Explain RAG in one sentence."))

  llm = Ollama(model="gemma3:1b", temperature=0.1)


RAG (Retrieval-Augmented Generation) is a technique that combines a large language model with an external knowledge base to improve its responses by retrieving relevant information before generating a response.


In [41]:
from langchain.prompts import PromptTemplate
from langchain_community.llms import Ollama
from langchain.chains import ConversationalRetrievalChain

# SYSTEM_TEMPLATE = """
# You are a **Customer Support Chatbot**. Use only the information in CONTEXT to answer.
# If the answer is not in CONTEXT, respond with ‚ÄúI'm not sure from the docs.‚Äù

# Rules:
# 1) Use ONLY the provided <context> to answer.
# 2) If the answer is not in the context, say: "I don't know based on the retrieved documents."
# 3) Be concise and accurate. Prefer quoting key phrases from the context.
# 4) When possible, cite sources as [source: source] using the metadata.

# CONTEXT:
# {context}

# USER:
# {question}
# """

SYSTEM_TEMPLATE = """
You are a Customer Support Chatbot.

You MUST answer using ONLY the information in <context>.
If the answer is not in <context>, reply exactly: "I'm not sure from the docs."

Rules:
1) If the answer IS in <context>, quote the exact sentence(s) that support it.
2) If the answer is NOT in <context>, reply exactly: "I'm not sure from the docs."
3) Keep the answer to 1-3 sentences.
4) Add citations like [source: ...] when available.

<context>
{context}
</context>

Question: {question}
Answer:
"""


prompt = PromptTemplate(template=SYSTEM_TEMPLATE, input_variables=["context", "question"])
llm = Ollama(model="gemma3:1b", temperature=0.1)
chain = ConversationalRetrievalChain.from_llm(llm, retriever, combine_docs_chain_kwargs={"prompt": prompt}, return_source_documents=True)


In [43]:
chat_history = []
test_questions = ["who is john?", "what if No tracking movement for 7 days?", "what is the contact?"]

for q in test_questions:
    res = chain({"question": q, "chat_history": chat_history})
    answer = res["answer"]
    chat_history.append((q, answer))

    print("===========================")
    print("QUESTION:", q)
    print("ANSWER:", res["answer"])
    # print("SOURCES:", len(res["source_documents"]))
    # for i, d in enumerate(res["source_documents"][:5]):
    #     print("\n--- source doc", i, "---", d.metadata.get("source"))
    #     print(d.page_content[:500])

QUESTION: who is john?
ANSWER: I'm not sure from the docs.
QUESTION: what if No tracking movement for 7 days?
ANSWER: If no tracking movement for 7 days, we open a carrier trace.
QUESTION: what is the contact?
ANSWER: I'm not sure from the docs.


In [42]:
q = "what if No tracking movement for 7 days?"
docs = retriever.get_relevant_documents(q)

context = "\n\n".join(
    [f"[source: {d.metadata.get('source','')}] {d.page_content}" for d in docs]
)

final_prompt = prompt.format(context=context, question=q)
# print(final_prompt[:])   # inspect the first ~2000 chars

print(llm.invoke(final_prompt))


We open a carrier trace.


In [44]:
import streamlit as st

st.set_page_config(page_title="RAG Demo", page_icon="üí¨")

st.title("üìÑ RAG Chat Demo")
st.caption("Ask questions based on the indexed documents")

# --- session state ---
if "chat_history" not in st.session_state:
    st.session_state.chat_history = []

# --- input box ---
question = st.text_input("Ask a question")

if question:
    with st.spinner("Thinking..."):
        result = chain({
            "question": question,
            "chat_history": st.session_state.chat_history
        })

        answer = result["answer"]
        st.session_state.chat_history.append((question, answer))

# --- display chat ---
for q, a in st.session_state.chat_history[::-1]:
    st.markdown(f"**You:** {q}")
    st.markdown(f"**Assistant:** {a}")
    st.markdown("---")


2025-12-19 23:22:06.742 
  command:

    streamlit run /Users/thanhnguyen/anaconda3/envs/rag-chatbot/lib/python3.11/site-packages/ipykernel_launcher.py [ARGUMENTS]
2025-12-19 23:22:06.744 Session state does not function when running a script without `streamlit run`
