 DOCUMENT PARSER


In [8]:
from pypdf import PdfReader
from langchain_text_splitters import RecursiveCharacterTextSplitter
def parseText(pdf_path):
    """Parse PDF and split into chunks"""
    reader = PdfReader(pdf_path)
    policy_text = ""

    for page_num, page in enumerate(reader.pages, start=1):
        text = page.extract_text()
        if text:
            policy_text += f"\n\n[Page {page_num}]\n{text}"

    splitter = RecursiveCharacterTextSplitter(
        chunk_size=400,
        chunk_overlap=50
    )
    return splitter.split_text(policy_text)

SEMANTIC SEARCH


In [9]:
from sentence_transformers import SentenceTransformer
import faiss
embed_model = SentenceTransformer("all-MiniLM-L6-v2")

def build_index(chunks):
    """Build FAISS index for semantic search"""
    embeddings = embed_model.encode(chunks)
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(np.array(embeddings))
    return index

def searchText(question, chunks, index, top_k=3):
    """Retrieve relevant chunks from the policy"""
    q_embedding = embed_model.encode([question])
    _, indices = index.search(q_embedding, top_k)
    return "\n".join([chunks[i] for i in indices[0]])

POLICY DATABASE

In [10]:
class PolicyDatabase:
    """Stores policy chunks and FAISS index"""
    def __init__(self, chunks):
        self.chunks = chunks
        self.index = build_index(chunks)

    def query(self, question):
        return searchText(question, self.chunks, self.index)


In [11]:
def classifyText(question):
    """Simple keyword-based intent classification"""
    question_lower = question.lower()
    if "rent" in question_lower:
        return "room_rent"
    elif "claim" in question_lower:
        return "claims"
    return "general"

In [12]:
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
model_name = "Qwen/Qwen2-1.5B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
)    

TOOL CALLING CONTROLLER

In [13]:
def tool_router(question):
    """Decide which tool to use based on question"""
    # Here we can integrate classifyText for simplicity
    intent = classifyText(question)
    if intent == "room_rent" or intent == "claims" or intent == "general":
        return "PolicyDatabase.query"
    return "PolicyDatabase.query"

In [17]:
def summarizeText(context, question):
    """Generate a simple, clear answer using the context"""
    prompt = f"""
You are an insurance policy assistant.


STRICT RULES:
- Use ONLY the policy context below
- If numbers or limits are present, you MUST quote them exactly
- If the answer exists, DO NOT say "not specified"
- If the answer does NOT exist, say:
  "The policy does not explicitly mention this information."

Explain in simple language.
Policy Context:
{context}

Question:
{question}

Answer:
"""
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    output = model.generate(**inputs, max_new_tokens=200)
    return tokenizer.decode(output[0], skip_special_tokens=True)


In [15]:
def displayInformation(answer):
    print("\nAnswer:\n")
    print(answer)


In [None]:
if __name__ == "__main__":
    chunks = parseText("policy.pdf")

    policy_db = PolicyDatabase(chunks)

    question = "How much ambulance expense is covered?"

    tool_used = tool_router(question)
    print("Tool Selected by LLM:", tool_used)

    if tool_used == "PolicyDatabase.query":
        context = policy_db.query(question)
    else:
        context = "No relevant tool found."
    answer = summarizeText(context, question)
    displayInformation(answer)


    

Tool Selected by LLM: PolicyDatabase.query

Answer:


You are an insurance policy assistant.


STRICT RULES:
- Use ONLY the policy context below
- If numbers or limits are present, you MUST quote them exactly
- If the answer exists, DO NOT say "not specified"
- If the answer does NOT exist, say:
  "The policy does not explicitly mention this information."

Explain in simple language.
Policy Context:
8. Ambulance Expenses:  1% of Sum Insured per Policy period up to a maximum of INR 1500 will be reimbursed 
to Insured  for the cost of ambulance transportation.  Ambulance services used should be of a licensed 
ambulance operator. 
 
9. Ayurvedic Medicine: Ayurvedic Treatment covered up to maximum 15% of Sum Insured per Policy Period
restricted maximum up to 25% of the Sum Insured per illness/injury. 
 
2. Medical Practitioner , Surgeon, Anesthetist, Consultants, and Specialists Fees - All admissible claims under 
this section during the policy period restricted maximum up to 40% of the Su