In [None]:
import os
from dotenv import load_dotenv
import langchain.vectorstores 
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
import faiss
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter 
from langchain_huggingface.embeddings import HuggingFaceEmbeddings 
from langchain_openai import OpenAI
from langchain_openai import ChatOpenAI
from langchain_groq import ChatGroq 
from crewai_tools import SerperDevTool
from crewai import Agent, Task, Crew, LLM
from crewai_tools import ScrapeWebsiteTool
from langchain.docstore.document import Document
import crewai
from tqdm import tqdm
import time
import random
import hashlib



In [2]:
load_dotenv() 

True

In [3]:
GROQ_API_KEY = os.getenv("GROQ_API_KEY") 
SERPER_API_KEY = os.getenv("SERPER_API_KEY")
GEMINI=os.getenv("GEMINI")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")


In [4]:
# llm = ChatOpenAI( 
#     model="gpt-4o",
#     temperature=0,
#     max_tokens=500,
#     timeout=None,
#     max_retries=10,
# )

In [None]:
# Initialize LLM
llm = ChatGroq(
    model="llama-3.3-70b-versatile",
    temperature=0,
    max_tokens=500,
    timeout=None,
    max_retries=12,
)

In [6]:
crew_llm_search = crewai.LLM(
    model="gemini/gemini-1.5-flash",
    api_key=GEMINI,
    max_tokens=500,
    temperature=0.7
)

crew_llm_scraper = crewai.LLM(
    model="gemini/gemini-1.5-flash",
    api_key=GEMINI,
    max_tokens=500,
    temperature=0.7
)


In [7]:
def retry_llm_invoke(llm, messages, max_retries=5):
    for attempt in range(max_retries):
        try:
            return llm.invoke(messages)
        except Exception as e:
            print(f"[Retry {attempt + 1}] LLM call failed: {e}")
            if attempt == max_retries - 1:
                raise
            wait = 2 ** attempt + random.random()
            print(f"Waiting {wait:.2f} seconds before retry...")
            time.sleep(wait)


In [8]:
def check_local_knowledge(query, context):
    """Router function to determine if we can answer from local knowledge"""
    prompt = '''Role: Question-Answering Assistant
Task: Determine whether the system can answer the user's question based on the provided text.
Instructions:
    - Analyze the text and identify if it contains the necessary information to answer the user's question.
    - Provide a clear and concise response indicating whether the system can answer the question or not.
    - Your response should include only a single word. Nothing else, no other text, information, header/footer. 
Output Format:
    - Answer: Yes/No
Study the below examples and based on that, respond to the last question. 
Examples:
    Input: 
        Text: The capital of France is Paris.
        User Question: What is the capital of France?
    Expected Output:
        Answer: Yes
    Input: 
        Text: The population of the United States is over 330 million.
        User Question: What is the population of China?
    Expected Output:
        Answer: No
    Input:
        User Question: {query}
        Text: {text}
'''
    formatted_prompt = prompt.format(text=context, query=query)
    response = retry_llm_invoke(llm, formatted_prompt)
    return response.content.strip().lower() == "yes"


In [9]:
def setup_web_scraping_agent():
    """Setup the web scraping agent and related components"""
    search_tool = SerperDevTool()  # Tool for performing web searches
    scrape_website = ScrapeWebsiteTool()  # Tool for extracting data from websites
    
    # Define the web search agent
    web_search_agent = Agent(
        role="Expert Web Search Agent",
        goal="Identify and retrieve relevant web data for user queries",
        backstory="An expert in identifying valuable web sources for the user's needs",
        allow_delegation=False,
        verbose=False,
        llm=crew_llm_search
    )
    
    # Define the web scraping agent
    web_scraper_agent = Agent(
        role="Expert Web Scraper Agent",
        goal="Extract and analyze content from specific web pages identified by the search agent",
        backstory="A highly skilled web scraper, capable of analyzing and summarizing website content accurately",
        allow_delegation=False,
        verbose=False,
        llm=crew_llm_scraper
    )
    
    # Define the web search task
    search_task = Task(
        description=(
            "Identify the most relevant web page or article for the topic: '{topic}'. "
            "Use all available tools to search for and provide a link to a web page "
            "that contains valuable information about the topic. Keep your response concise."
        ),
        expected_output=(
            "A concise summary of the most relevant web page or article for '{topic}', "
            "including the link to the source and key points from the content."
        ),
        tools=[search_tool],
        agent=web_search_agent,
    )
    
    # Define the web scraping task
    scraping_task = Task(
        description=(
            "Extract and analyze data from the given web page or website. Focus on the key sections "
            "that provide insights into the topic: '{topic}'. Use all available tools to retrieve the content, "
            "and summarize the key findings in a concise manner."
        ),
        expected_output=(
            "A detailed summary of the content from the given web page or website, highlighting the key insights "
            "and explaining their relevance to the topic: '{topic}'. Ensure clarity and conciseness."
        ),
        tools=[scrape_website],
        agent=web_scraper_agent,
    )
    
    # Define the crew to manage agents and tasks
    crew = Crew(
        agents=[web_search_agent, web_scraper_agent],
        tasks=[search_task, scraping_task],
        verbose=0,
        memory=False,
    )
    return crew

def get_web_content(query):
    """Get content from web scraping"""
    crew = setup_web_scraping_agent()
    result = crew.kickoff(inputs={"topic": query})
    return result.raw

In [10]:
def setup_vector_db_multi(pdf_paths):
    """Setup vector DB with progress tracking for loading, splitting, embedding"""
    all_chunks = []

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=50
    )

    print("Loading and splitting PDFs...")
    for pdf_path in tqdm(pdf_paths, desc="Processing PDFs"):
        loader = PyPDFLoader(pdf_path)
        documents = loader.load()
        chunks = text_splitter.split_documents(documents)
        all_chunks.extend(chunks)

    print(f"Total chunks: {len(all_chunks)}")

    # Step 1: Generate embeddings with progress
    print("Generating embeddings...")
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    
    texts = [doc.page_content for doc in all_chunks]

    # tqdm wrapper to visualize embedding progress
    embedded_vectors = []
    for text in tqdm(texts, desc="Embedding chunks"):
        embedded_vectors.append(embeddings.embed_query(text))

    # Step 2: Create FAISS index manually
    print("Building FAISS vector DB...")
    text_embedding_pairs = list(zip(texts, embedded_vectors))
    vector_db = FAISS.from_embeddings(text_embedding_pairs, embeddings)

    print("Vector database setup complete.")
    return vector_db

def get_local_content(vector_db, query):
    """Get content from vector database"""
    docs = vector_db.similarity_search(query, k=5)
    return " ".join([doc.page_content for doc in docs])

In [11]:
def generate_final_answer(context, query, conversation_history):
    """Generate final answer using LLM with memory"""
    # Append the new context + query to the ongoing chat
    conversation_history.append(("system", f"Context: {context}"))
    conversation_history.append(("human", query))

    response = llm.invoke(conversation_history)

    # Append model reply to history
    conversation_history.append(("assistant", response.content))

    return response.content, conversation_history

In [12]:
def hash_chunk(text):
    """Returns a consistent hash for a given text chunk."""
    return hashlib.md5(text.encode("utf-8")).hexdigest()

def load_existing_hashes(path="chunk_hashes.txt"):
    """Loads hashes from disk if file exists."""
    if os.path.exists(path):
        with open(path, "r") as f:
            return set(line.strip() for line in f)
    return set()

def save_hashes_to_disk(existing_hashes, path="chunk_hashes.txt"):
    """Saves hash set to disk."""
    with open(path, "w") as f:
        for h in existing_hashes:
            f.write(h + "\n")


In [None]:
def process_query(query, vector_db, local_context, conversation_history, existing_hashes):
    """Main function to process user query with memory + web cache with deduplication"""
    print(f"\nProcessing query: {query}")

    # Step 1: Check if we can answer from local knowledge
    can_answer_locally = check_local_knowledge(query, local_context)
    print(f"Can answer locally: {can_answer_locally}")

    # Step 2: Get context from vector DB or web
    if can_answer_locally:
        context = get_local_content(vector_db, query)
        print("Retrieved context from local documents")
    else:
        context = get_web_content(query)
        print("Retrieved context from web scraping")

        # Optional: Cache this web content into the vector DB (with deduplication)
        if context.strip():
            print("Caching web content into vector DB (with deduplication)...")

            web_doc = Document(page_content=context, metadata={"source": "web"})
            splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
            new_chunks = splitter.split_documents([web_doc])

            unique_chunks = []
            for chunk in new_chunks:
                chunk_hash = hash_chunk(chunk.page_content)
                if chunk_hash not in existing_hashes:
                    unique_chunks.append(chunk)
                    existing_hashes.add(chunk_hash)

            print(f"{len(unique_chunks)} new unique chunks will be added to the vector DB.")
            if unique_chunks:
                vector_db.add_documents(unique_chunks)
                print("Unique web content added to vector DB.")
            else:
                print("No unique content found. Skipping vector DB update.")

    # Step 3: Generate final answer
    answer, updated_history = generate_final_answer(context, query, conversation_history)
    return answer, updated_history, existing_hashes


In [14]:
def main():
    pdf_paths = [
        r'E:\Desktop\Data Science\Gen AI\Agentic_Rag\genai-principles.pdf',
        r'E:\Desktop\Data Science\Gen AI\Agentic_Rag\Agentic_ai.pdf',
        r'E:\Desktop\Data Science\Gen AI\Agentic_Rag\rag.pdf'
    ]

    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

    # Load or build vector DB
    if os.path.exists("faiss_index") and os.path.exists("faiss_index/index.faiss"):
        print("Loading existing vector database from disk...")
        vector_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
    else:
        print("Building new vector database...")
        vector_db = setup_vector_db_multi(pdf_paths)
        vector_db.save_local("faiss_index")

    # ✅ Load hash memory from disk
    existing_hashes = load_existing_hashes("chunk_hashes.txt")

    conversation_history = [
        ("system", "You are a helpful assistant. Use the provided context to answer the query accurately.")
    ]
    query_count = 0

    print("Chat is ready. Type your query (type 'exit' to quit, 'reset' or 'new' to start a new conversation):")
    while True:
        query = input("You: ").strip()

        if query.lower() == "exit":
            break
        elif query.lower() in ("reset", "new"):
            print("🆕 Starting a new conversation...")
            conversation_history = [
                ("system", "You are a helpful assistant. Use the provided context to answer the query accurately.")
            ]
            continue

        local_context = get_local_content(vector_db, query)
        result, conversation_history, existing_hashes = process_query(query, vector_db, local_context, conversation_history, existing_hashes)
        print("\nAI:", result)

        query_count += 1
        if query_count % 5 == 0:
            print("📝 Saving updated vector DB and hash cache to disk...")
            vector_db.save_local("faiss_index")
            save_hashes_to_disk(existing_hashes, "chunk_hashes.txt")


In [15]:
if __name__ == "__main__":
    main()

Loading existing vector database from disk...
Chat is ready. Type your query (type 'exit' to quit, 'reset' or 'new' to start a new conversation):

Processing query: whats is Rag
Can answer locally: True
Retrieved context from local documents

AI: RAG (Retrieve, Augment, Generate) is a research paradigm in the field of natural language processing (NLP) and artificial intelligence (AI). It's a framework used to improve the performance of language models, particularly in tasks such as question answering, text generation, and conversational dialogue.

The RAG process typically involves three main steps:

1. **Indexing**: This step involves processing and storing a large corpus of text data in a way that allows for efficient retrieval of relevant information.
2. **Retrieval**: When a question or prompt is given, the RAG model retrieves a set of relevant documents or chunks of text from the indexed corpus.
3. **Generation**: The retrieved text is then used as input to a language model, which

2025-06-14 14:24:12,663 - 16840 - serper_dev_tool.py-serper_dev_tool:179 - ERROR: Error making request to Serper API: 400 Client Error: Bad Request for url: https://google.serper.dev/search
Response content: b'{"message":"Missing query parameter","statusCode":400}'
2025-06-14 14:24:13,224 - 16840 - serper_dev_tool.py-serper_dev_tool:179 - ERROR: Error making request to Serper API: 400 Client Error: Bad Request for url: https://google.serper.dev/search
Response content: b'{"message":"Missing query parameter","statusCode":400}'
2025-06-14 14:24:13,790 - 16840 - serper_dev_tool.py-serper_dev_tool:179 - ERROR: Error making request to Serper API: 400 Client Error: Bad Request for url: https://google.serper.dev/search
Response content: b'{"message":"Missing query parameter","statusCode":400}'
2025-06-14 14:24:13,930 - 16840 - serper_dev_tool.py-serper_dev_tool:179 - ERROR: Error making request to Serper API: 400 Client Error: Bad Request for url: https://google.serper.dev/search
Response con

Retrieved context from web scraping
Caching web content into vector DB (with deduplication)...
1 new unique chunks will be added to the vector DB.
Unique web content added to vector DB.

AI: It seems like you didn't ask a question. Please feel free to ask me anything about RAG or any other topic, and I'll do my best to help.

Processing query: 
Can answer locally: False


2025-06-14 14:25:37,166 - 16840 - serper_dev_tool.py-serper_dev_tool:179 - ERROR: Error making request to Serper API: 400 Client Error: Bad Request for url: https://google.serper.dev/search
Response content: b'{"message":"Missing query parameter","statusCode":400}'
2025-06-14 14:25:37,732 - 16840 - serper_dev_tool.py-serper_dev_tool:179 - ERROR: Error making request to Serper API: 400 Client Error: Bad Request for url: https://google.serper.dev/search
Response content: b'{"message":"Missing query parameter","statusCode":400}'
2025-06-14 14:25:38,106 - 16840 - serper_dev_tool.py-serper_dev_tool:179 - ERROR: Error making request to Serper API: 400 Client Error: Bad Request for url: https://google.serper.dev/search
Response content: b'{"message":"Missing query parameter","statusCode":400}'
2025-06-14 14:25:38,698 - 16840 - serper_dev_tool.py-serper_dev_tool:179 - ERROR: Error making request to Serper API: 400 Client Error: Bad Request for url: https://google.serper.dev/search
Response con

Retrieved context from web scraping
Caching web content into vector DB (with deduplication)...
1 new unique chunks will be added to the vector DB.
Unique web content added to vector DB.

AI: It seems like you didn't ask a question. Please feel free to ask me anything about RAG or any other topic, and I'll do my best to help. 

If you're interested in learning more about RAG, I can provide some additional information. RAG is a powerful tool for natural language processing and has many potential applications. It can be used to improve the performance of language models, generate text, and answer questions.

Some of the key benefits of RAG include:

* Improved accuracy: RAG can help language models to better understand the context and nuances of language, leading to more accurate responses.
* Increased efficiency: RAG can reduce the amount of time and computational resources required to process and generate text.
* Enhanced creativity: RAG can be used to generate novel and creative text, 

KeyboardInterrupt: 