In [5]:
import chromadb
import pandas as pd
import os

# 1. Configuration
db_path = os.path.abspath(r"D:\pY\InsuranceRAG\local_db")
print(f"üìÇ Looking for database at: {db_path}")

client = chromadb.PersistentClient(path=db_path)
collections = client.list_collections()
collection_names = [c.name for c in collections]
print(f"üìö Found {len(collections)} collections: {collection_names}")

# 2. Function to inspect a specific collection
def inspect_collection(name):
    if name not in collection_names:
        print(f"‚ùå Collection '{name}' does not exist.")
        return None
    
    collection = client.get_collection(name=name)
    results = collection.get(include=["documents", "metadatas"])
    
    if not results['ids']:
        print(f"‚ö†Ô∏è Collection '{name}' exists but is empty.")
        return None
        
    records = []
    for i in range(len(results['ids'])):
        meta = results['metadatas'][i]
        records.append({
            "Collection": name,
            "Category": meta.get("source_type", "N/A"),
            "Client ID": meta.get("client_id", "Company"),
            "Sub. Date": meta.get("submission_date", "N/A"),
            "Source File": os.path.basename(meta.get("source", "Unknown")),
            "Content Preview": results['documents'][i][:100].replace('\n', ' ') + "..."
        })
    
    return pd.DataFrame(records)

# 3. Fetch and Display Both Collections
print("\n--- üìú MASTER POLICIES ---")
df_policies = inspect_collection("policy_master_collection")
if df_policies is not None:
    display(df_policies.sort_values(by="Source File"))

print("\n--- üìë CLIENT CLAIMS ---")
df_claims = inspect_collection("claims_collection")
if df_claims is not None:
    display(df_claims.sort_values(by=["Client ID", "Sub. Date"]))

# 4. Summary Table
if df_policies is not None or df_claims is not None:
    print("\nüìä Database Summary:")
    for c in collections:
        print(f" - {c.name}: {c.count()} chunks")

üìÇ Looking for database at: D:\pY\InsuranceRAG\local_db
üìö Found 3 collections: ['claims_collection', 'policy_master_collection', 'insurance_docs']

--- üìú MASTER POLICIES ---


Unnamed: 0,Collection,Category,Client ID,Sub. Date,Source File,Content Preview
139,policy_master_collection,Policy,Company,,Membership Handbook.pdf.ingesting,request (Only original receipted invoices can ...
137,policy_master_collection,Policy,Company,,Membership Handbook.pdf.ingesting,of psychiatric illness up to the level shown ...
138,policy_master_collection,Policy,Company,,Membership Handbook.pdf.ingesting,member you should be given a claim form. If no...
140,policy_master_collection,Policy,Company,,Membership Handbook.pdf.ingesting,gig-gulf.com/uae/en/group-healthcare to obtain...
141,policy_master_collection,Policy,Company,,Membership Handbook.pdf.ingesting,Policy Handbook Health Insurance 12 13 6...
...,...,...,...,...,...,...
224,policy_master_collection,Policy,Company,,TRAVEL_TC_HSBCUAE_EN.pdf.ingesting,Following Death of Close Relative Chapter G:...
223,policy_master_collection,Policy,Company,,TRAVEL_TC_HSBCUAE_EN.pdf.ingesting,3.\t You are travelling against the advice of...
222,policy_master_collection,Policy,Company,,TRAVEL_TC_HSBCUAE_EN.pdf.ingesting,defined as within a five miles limit of a co...
229,policy_master_collection,Policy,Company,,TRAVEL_TC_HSBCUAE_EN.pdf.ingesting,Chapter N ¬É\t Personal Baggage and Personal M...



--- üìë CLIENT CLAIMS ---
‚ö†Ô∏è Collection 'claims_collection' exists but is empty.

üìä Database Summary:
 - claims_collection: 0 chunks
 - policy_master_collection: 279 chunks
 - insurance_docs: 559 chunks
