In [2]:
import nest_asyncio

In [3]:
nest_asyncio.apply()

In [None]:
!pip install llama-index qdrant_client torch transformers
!pip install llama-index-embeddings-huggingface

!pip install llama-index-llms-ollama

!pip install llama-index-vector-stores-qdrant



In [6]:
import qdrant_client

collection_name="demo2"

client = qdrant_client.QdrantClient(
    host="localhost",
    port=6333
)

In [None]:
from llama_index.core import SimpleDirectoryReader

input_dir_path = './docs'

loader = SimpleDirectoryReader(
            input_dir = input_dir_path,
            required_exts=[".pdf"],
            recursive=True
        )
docs = loader.load_data()

In [70]:
len(docs)

2

In [71]:
from llama_index.core.node_parser import SimpleNodeParser
from llama_index.core.text_splitter import SentenceSplitter

node_parser = SimpleNodeParser.from_defaults(
    chunk_size=512,
    chunk_overlap=128
)

def create_index(documents):
    service_context = ServiceContext.from_defaults(node_parser=node_parser)
    vector_store = QdrantVectorStore(client=client, collection_name=collection_name)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    index = VectorStoreIndex.from_documents(
        documents,
        storage_context=storage_context,
        service_context=service_context
    )
    return index

In [72]:
from llama_index.core import Settings, VectorStoreIndex, StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# Initialize embedding model
embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-large-en-v1.5",
    trust_remote_code=True
)

# Update settings
Settings.embed_model = embed_model
Settings.node_parser = node_parser

def create_index(documents):
    vector_store = QdrantVectorStore(
        client=client, 
        collection_name=collection_name
    )
    storage_context = StorageContext.from_defaults(
        vector_store=vector_store
    )
    index = VectorStoreIndex.from_documents(
        documents,
        storage_context=storage_context
    )
    return index

# Create the index
index = create_index(docs)

In [73]:
from llama_index.llms.openai import OpenAI
import os

# Set your OpenAI API key
os.environ["OPENAI_API_KEY"] = "OPEN-AI-API-KEY-HERE"  # Replace with your actual OpenAI API key

# Initialize LLM
llm = OpenAI(
    model="gpt-3.5-turbo",  # OpenAI model
    temperature=0.7,
    max_tokens=512
)

# Update the global settings
from llama_index.core import Settings
Settings.llm = llm
# from llama_index.llms.ollama import Ollama

# llm = Ollama(model="llama3.2:1b", request_timeout=120.0)

# Settings.llm = llm

In [74]:
from llama_index.llms.openai import OpenAI
import os

# Initialize LLM
llm = OpenAI(
    model="gpt-3.5-turbo",
    temperature=0.7,
    max_tokens=512
)

# Update the global settings
from llama_index.core import Settings
Settings.llm = llm

# Create the reranker
rerank = SentenceTransformerRerank(
    model="cross-encoder/ms-marco-MiniLM-L-2-v2", 
    top_n=3
)

# Create the query engine
query_engine = index.as_query_engine(
    similarity_top_k=10,
    node_postprocessors=[rerank]
)

# Define the prompt template
template = """Context information is below:
              ---------------------
              {context_str}
              ---------------------
              Based on the context above, analyze the query and provide the response in the following format:
              
              Scenario: [Describe the situation from matching context]
              Remediation: [Provide specific prevention/remediation steps]
              Points of contact: [List relevant contact information/helplines]
              
              If no relevant information is found in the context, respond with "No matching scenario found."
              
              Query: {query_str}
              
              Response:"""

qa_prompt_tmpl = PromptTemplate(template)

# Update query engine with new template
query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
)

In [75]:
from llama_index.core.postprocessor import SentenceTransformerRerank

rerank = SentenceTransformerRerank(
    model="cross-encoder/ms-marco-MiniLM-L-2-v2", 
    top_n=3
)

In [44]:
# from llama_index.core import get_response_synthesizer

# query_engine = index.as_query_engine(
#     similarity_top_k=10,
#     node_postprocessors=[rerank]
# )

# def print_source_nodes(response):
#     source_nodes = response.source_nodes
#     print("\nRetrieved chunks:")
#     for i, node in enumerate(source_nodes):
#         print(f"\nChunk {i+1}:")
#         print(node.text)

# response = query_engine.query("Got message about cryptocurrency investment with guaranteed returns. What are the Points of Contact?")
# # print_source_nodes(response)

In [78]:
import time
from llama_index.core import get_response_synthesizer
from IPython.display import Markdown, display

class RAGDebugger:
    def __init__(self, query_engine):
        self.query_engine = query_engine
        self.timings = {}
    
    def _time_vector_search(self, query):
        """Time the vector search operation"""
        start = time.time()
        # Access the internal retriever to get timing for vector search
        retrieved_nodes = self.query_engine._retriever.retrieve(query)
        end = time.time()
        self.timings['vector_search'] = end - start
        return retrieved_nodes

    def _time_llm_response(self, query):
        """Time the LLM (OpenAI) response"""
        start = time.time()
        response = self.query_engine.query(query)
        end = time.time()
        self.timings['llm_response'] = end - start
        return response

    def print_debug_info(self, user_query):
        """
        Print debug information including timings for each step
        """
        print("\n" + "="*50)
        print("RAG DEBUG INFORMATION")
        print("="*50)

        # Time the entire process
        total_start = time.time()

        print("\nUser Query:")
        print("-"*50)
        print(user_query)

        # Time vector search
        print("\nRetrieving chunks from Vector DB...")
        retrieved_nodes = self._time_vector_search(user_query)
        print(f"Time taken for Vector Search: {self.timings['vector_search']:.3f} seconds")

        print("\nContext being sent to OpenAI:")
        print("-"*50)
        for i, node in enumerate(retrieved_nodes):
            print(f"\nChunk {i+1}:")
            print(f"{node.node.text.strip()}")
            print("\n" + "-"*30)

        # Time LLM response
        print("\nGetting response from OpenAI...")
        response = self._time_llm_response(user_query)
        print(f"Time taken for OpenAI Response: {self.timings['llm_response']:.3f} seconds")

        # Calculate total time
        total_time = time.time() - total_start
        self.timings['total'] = total_time

        print("\nFinal Response:")
        print("-"*50)
        print(str(response))

        print("\nTiming Summary:")
        print("-"*50)
        print(f"Vector Search Time: {self.timings['vector_search']:.3f} seconds")
        print(f"OpenAI Response Time: {self.timings['llm_response']:.3f} seconds")
        print(f"Total Time: {self.timings['total']:.3f} seconds")

        return response

# Example usage:
debugger = RAGDebugger(query_engine)
user_query = "Got an email regarding an investment scheme promising 50% returns in 3 months via WhatsApp"
response = debugger.print_debug_info(user_query)


RAG DEBUG INFORMATION

User Query:
--------------------------------------------------
Got an email regarding an investment scheme promising 50% returns in 3 months via WhatsApp

Retrieving chunks from Vector DB...
Time taken for Vector Search: 5.590 seconds

Context being sent to OpenAI:
--------------------------------------------------

Chunk 1:
Scenario  1:  I  received  an  email  stating  that  I  won  a  lottery.  I  am  being  asked  to  provide  
documents.
 Remediation:  Do  not  respond  to  the  email  or  share  any  documents.  This  is  a  classic  lottery  
scam
 
attempting
 
to
 
steal
 
your
 
information.
 Points  of  contact:  Cyber  Crime  Portal  (cybercrime.gov.in)  or  call  National  Cybercrime  Helpline  
1930
  Scenario  2:  Someone  called  claiming  to  be  from  my  bank  requesting  my  OTP  to  update  KYC.  Remediation:  Banks  never  ask  for  OTP  over  phone.  Never  share  OTP/PIN/CVV  with  anyone.  Points  of  contact:  File  complaint  with  loc