## Rag-Chatbot

link =https://en.wikipedia.org/w/index.php?title=Ethiopia&oldid=1306479864   

In [5]:
%pip install -qU langchain langchain_community langchain[google-genai]

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
import os
from dotenv import load_dotenv
load_dotenv()
# Load the API key from the .env file
API_KEY = os.getenv("GOOGLE_API_KEY")
if API_KEY is None:
    raise ValueError("API key not found. Please set the OPENAI_API_KEY environment variable.")

## Loading Document

In [31]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings


loader = WebBaseLoader("https://www.govinfo.gov/content/pkg/CDOC-110hdoc50/html/CDOC-110hdoc50.htm")
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, separators=["\n\n", "\n", " ", ""])
chunks = text_splitter.split_documents(documents)
print("Number of chunks: ", len(chunks))

# embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")

Number of chunks:  430


## Embedding Model Google Gemini

In [9]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from tenacity import retry, wait_random_exponential, stop_after_attempt

embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")

@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def embed_with_retry(texts):
 return embeddings.embed_documents(texts)

# Now, use this function to embed your documents
try:
 vectors = embed_with_retry(chunks)
except Exception as e:
 print(f"Failed to embed documents after multiple retries: {e}")

Failed to embed documents after multiple retries: RetryError[<Future at 0x21b304a6990 state=finished raised TypeError>]


In [32]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings_model = HuggingFaceEmbeddings(model_name="NovaSearch/stella_en_1.5B_v5")

In [6]:
from langchain_community.vectorstores import InMemoryVectorStore
store = InMemoryVectorStore.from_documents(chunks, embeddings_model)


In [26]:
retriever = store.as_retriever()

In [27]:
from langchain.chat_models import init_chat_model
from langchain_core.prompts import ChatPromptTemplate

llm = init_chat_model("gemini-2.5-flash", model_provider="google_genai")

prompt = ChatPromptTemplate.from_messages([
    ("system", "Your a helpful assistant that can answer questions about the USA Constitution. Use the provided context to answer the question."),
    ("user", "Question: {question}\nContext: {context}")
])

chain = prompt | llm

In [30]:
query = "Can you summarize the context? and list some interesting things you see"

#  RAG

# Retrieval
docs = retriever.invoke(query)
docs_context = "\n\n".join([doc.page_content for doc in docs])
# Augmented + Generation
response = chain.invoke({
    "question": query,
    "context": docs_context
})


response.content

'The context you provided, "H Q D O", does not appear to be text from the USA Constitution or any meaningful document. It\'s just a sequence of letters.\n\nTherefore, I cannot summarize it or list interesting things related to the USA Constitution from it.\n\nPlease provide the actual text or context you\'d like me to analyze!'

In [2]:
%pip install -qU "langchain[google-genai]"

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [7]:

from langchain.chat_models import init_chat_model

llm = init_chat_model("gemini-2.5-flash", model_provider="google_genai")
llm.invoke("hello how are you")

AIMessage(content="Hello! As an AI, I don't have feelings or a physical state, but I'm functioning perfectly and ready to assist you.\n\nHow can I help you today?", additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash', 'safety_ratings': []}, id='run--8a35191b-9346-44c2-ac70-3128b4fb5d7c-0', usage_metadata={'input_tokens': 5, 'output_tokens': 759, 'total_tokens': 764, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 722}})

## Prompt Templates