In [15]:
from flask import Flask, request, jsonify
from llama_index.core import Settings, Document, VectorStoreIndex,SimpleDirectoryReader
from llama_index.llms.google_genai import GoogleGenAI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.faiss import FaissVectorStore
from llama_index.core.tools import QueryEngineTool
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.query_engine import RouterQueryEngine,MultiStepQueryEngine
from llama_index.core.selectors import LLMSingleSelector, LLMMultiSelector
import pandas as pd
import faiss
import fitz
import os
from flask_cors import CORS
from llama_index.core.indices.query.query_transform.base import (
    StepDecomposeQueryTransform,
)
from llama_index.core import (
    Settings, Document, VectorStoreIndex, StorageContext, load_index_from_storage
)
from llama_index.core.agent import ReActAgent
from datetime import datetime

In [2]:
os.environ["GOOGLE_API_KEY"] = "AIzaSyC4QTtNK9uy-Kt0ElZq-q81FRzMsT_3ha0"

In [3]:
llm = GoogleGenAI(model="gemini-2.0-flash",
                  system_prompt="You are an AI assistant for Indian law enforcement and citizens, designed to provide accurate legal information while dynamically adapting communication to the user's role. When detecting the user is a police officer, deliver technical, precise operational details using professional terminology. When engaging with citizens, use clear, compassionate language explaining rights and procedures. Your core principles include maintaining neutrality, protecting privacy, preventing information misuse, and directing users to official resources when necessary. Always ground responses in verified legal documents, cross-reference official sources, and aim to enhance understanding of legal processes while serving both institutional and public interests with empathy and accuracy.",
                  temperature=0.5,)
Settings.llm = llm

In [4]:
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
Settings.embed_model = embed_model

In [7]:
documents = SimpleDirectoryReader(input_files = ['./police_act_1861.pdf']).load_data()

In [8]:
print(len(documents))

19


In [9]:
print(f"Document Metadata: {documents[0].metadata}")

Document Metadata: {'page_label': '1', 'file_name': 'police_act_1861.pdf', 'file_path': 'police_act_1861.pdf', 'file_type': 'application/pdf', 'file_size': 58624, 'creation_date': '2025-04-10', 'last_modified_date': '2025-03-21'}


In [10]:
from llama_index.core.schema import MetadataMode

# print(docs[0].get_content(metadata_mode=MetadataMode.LLM))   # what the llm sees
print(documents[0].get_content(metadata_mode=MetadataMode.EMBED))

page_label: 1
file_path: police_act_1861.pdf

 1 
   
1THE POLICE ACT, 1861 
 
(5 of 1861) 
                                                    [ 22nd March, 1861] 
An Act for the Regulation of Police 
 
 Preamble: - WHEREAS it is expedient to re-organise the police and to make it a 
more efficient instrument for the prevention and detection of crime; It is enacted as 
follows: - 
1. Interpretation clause: - The following words and expressions in this Act shall   
have the meaning assigned to them, unless there be something in the subject of context 
repugnant to such construction , that is to say- 
the words “Magistrate of the district” shall mean the chief officer charged with 
the executive administration of a district and exercising the powers of a Magistrate, by 
whatever designation the chief officer charged with such executive administration is styled; 
 the word “Magistrate” shall include all persons within the general police district, 
exercising all or and of the powers of a 

In [11]:
for doc in documents:
    # define the content/metadata template
    doc.text_template = "Metadata:\n{metadata_str}\n---\nContent:\n{content}"

    # exclude page label from embedding
    if "page_label" not in doc.excluded_embed_metadata_keys:
        doc.excluded_embed_metadata_keys.append("page_label")

In [12]:
print(documents[0].get_content(metadata_mode=MetadataMode.EMBED))

Metadata:
file_path: police_act_1861.pdf
---
Content:
 1 
   
1THE POLICE ACT, 1861 
 
(5 of 1861) 
                                                    [ 22nd March, 1861] 
An Act for the Regulation of Police 
 
 Preamble: - WHEREAS it is expedient to re-organise the police and to make it a 
more efficient instrument for the prevention and detection of crime; It is enacted as 
follows: - 
1. Interpretation clause: - The following words and expressions in this Act shall   
have the meaning assigned to them, unless there be something in the subject of context 
repugnant to such construction , that is to say- 
the words “Magistrate of the district” shall mean the chief officer charged with 
the executive administration of a district and exercising the powers of a Magistrate, by 
whatever designation the chief officer charged with such executive administration is styled; 
 the word “Magistrate” shall include all persons within the general police district, 
exercising all or and of the powe

In [13]:
documents[0].metadata

{'page_label': '1',
 'file_name': 'police_act_1861.pdf',
 'file_path': 'police_act_1861.pdf',
 'file_type': 'application/pdf',
 'file_size': 58624,
 'creation_date': '2025-04-10',
 'last_modified_date': '2025-03-21'}

In [16]:
splitter = SentenceSplitter(chunk_size=1024,chunk_overlap=100)

In [17]:
import  nest_asyncio
nest_asyncio.apply()

In [18]:
splitter = SentenceSplitter(chunk_size=1024,chunk_overlap=100)
nodes = splitter.get_nodes_from_documents(documents)
print(f"Length of nodes : {len(nodes)}")
print(f"get the content for node 0 :{nodes[0].get_content(metadata_mode='all')}")

Length of nodes : 19
get the content for node 0 :Metadata:
page_label: 1
file_name: police_act_1861.pdf
file_path: police_act_1861.pdf
file_type: application/pdf
file_size: 58624
creation_date: 2025-04-10
last_modified_date: 2025-03-21
---
Content:
1 
   
1THE POLICE ACT, 1861 
 
(5 of 1861) 
                                                    [ 22nd March, 1861] 
An Act for the Regulation of Police 
 
 Preamble: - WHEREAS it is expedient to re-organise the police and to make it a 
more efficient instrument for the prevention and detection of crime; It is enacted as 
follows: - 
1. Interpretation clause: - The following words and expressions in this Act shall   
have the meaning assigned to them, unless there be something in the subject of context 
repugnant to such construction , that is to say- 
the words “Magistrate of the district” shall mean the chief officer charged with 
the executive administration of a district and exercising the powers of a Magistrate, by 
whatever designatio

In [19]:
dummy = embed_model.get_text_embedding("test")
EMBED_DIMENSION = len(dummy)


In [20]:
EMBED_DIMENSION

384

In [21]:
pdf_index = faiss.IndexFlatL2(EMBED_DIMENSION)

In [23]:
vector_store = FaissVectorStore(faiss_index=pdf_index)

In [25]:
 index = VectorStoreIndex(nodes, vector_store=vector_store)

In [28]:
from llama_index.core.vector_stores import MetadataFilters,FilterCondition

In [30]:
query_engine = index.as_query_engine(similarity_top_k =2)

In [31]:
response = query_engine.query("can a police have another job?")

In [33]:
response.metadata

{'7936c0b3-330a-4c07-814b-f2043c2c3223': {'page_label': '5',
  'file_name': 'police_act_1861.pdf',
  'file_path': 'police_act_1861.pdf',
  'file_type': 'application/pdf',
  'file_size': 58624,
  'creation_date': '2025-04-10',
  'last_modified_date': '2025-03-21'},
 'f76bd5aa-1f01-4ecc-8ce3-8766bcf07d51': {'page_label': '6',
  'file_name': 'police_act_1861.pdf',
  'file_path': 'police_act_1861.pdf',
  'file_type': 'application/pdf',
  'file_size': 58624,
  'creation_date': '2025-04-10',
  'last_modified_date': '2025-03-21'}}

In [1]:
CORE_FILES = {
    'police_act': {
        'path': './police_act_1861.pdf',
        'description': 'The Police Act, 1861 is an Act of the Parliament of India that regulates the police force in India. It was enacted on 22 March 1861 and is one of the oldest laws in India. The Act provides for the establishment and regulation of police forces in the country, including their powers, duties, and responsibilities.'
    },
    'standing_order': {
        'path': './pso.pdf', 
        'description': 'The Police Standing Orders are a set of guidelines and instructions issued by the police department to its officers. These orders cover various aspects of police work, including procedures, protocols, and conduct expected from police personnel.'
    },
    'ipc_codes': {
        'path': './IPC_codes.pdf',
        'type': 'pdf',
        'description': 'The Indian Penal Code (IPC) is the main criminal code of India. It is a comprehensive code intended to cover all substantive aspects of criminal law. The IPC was enacted in 1860 and came into force in 1862.'
    },
    'emergency_numbers': {
        'path': './emergency_numbers.pdf',
        'description': 'Emergency contact numbers are crucial for public safety and security. These numbers provide immediate access to emergency services such as police, fire, and medical assistance. providing citizens with the necessary information to contact these services in times of crisis.'
    }
}


In [2]:
CORE_FILES.items()

dict_items([('police_act', {'path': './police_act_1861.pdf', 'description': 'The Police Act, 1861 is an Act of the Parliament of India that regulates the police force in India. It was enacted on 22 March 1861 and is one of the oldest laws in India. The Act provides for the establishment and regulation of police forces in the country, including their powers, duties, and responsibilities.'}), ('standing_order', {'path': './pso.pdf', 'description': 'The Police Standing Orders are a set of guidelines and instructions issued by the police department to its officers. These orders cover various aspects of police work, including procedures, protocols, and conduct expected from police personnel.'}), ('ipc_codes', {'path': './IPC_codes.pdf', 'type': 'pdf', 'description': 'The Indian Penal Code (IPC) is the main criminal code of India. It is a comprehensive code intended to cover all substantive aspects of criminal law. The IPC was enacted in 1860 and came into force in 1862.'}), ('emergency_numb

In [6]:
for doc_type, info in CORE_FILES.items():
    print(info)

{'path': './police_act_1861.pdf', 'description': 'The Police Act, 1861 is an Act of the Parliament of India that regulates the police force in India. It was enacted on 22 March 1861 and is one of the oldest laws in India. The Act provides for the establishment and regulation of police forces in the country, including their powers, duties, and responsibilities.'}
{'path': './pso.pdf', 'description': 'The Police Standing Orders are a set of guidelines and instructions issued by the police department to its officers. These orders cover various aspects of police work, including procedures, protocols, and conduct expected from police personnel.'}
{'path': './IPC_codes.pdf', 'type': 'pdf', 'description': 'The Indian Penal Code (IPC) is the main criminal code of India. It is a comprehensive code intended to cover all substantive aspects of criminal law. The IPC was enacted in 1860 and came into force in 1862.'}
{'path': './emergency_numbers.pdf', 'description': 'Emergency contact numbers are 