In [None]:
import os
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_milvus import Milvus

# 1. Load the data
# We assume parking_policy.md is in the same directory
loader = TextLoader("../../data/parking_policy.md", encoding="utf-8")
documents = loader.load()

# 2. Split the text
# Chunk size 500 is good for precise policy details. 
# Overlap ensures context isn't lost at the edges of cuts.
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100,
    separators=["\n\n", "\n", " ", ""]
)
docs = text_splitter.split_documents(documents)

print(f"Loaded {len(documents)} document(s) and split into {len(docs)} chunks.")

# 3. Define the Embedding Model
# We use a standard, free, local model (no API key needed)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# 4. Initialize Milvus Lite and Store Data
# Setting the URI to a local file path automatically triggers 'Milvus Lite' mode.
URI = "../../data/parking.db"

print("Creating Milvus vector store...")
vector_store = Milvus.from_documents(
    documents=docs,
    embedding=embeddings,
    connection_args={"uri": URI},
    collection_name="parking_policy_collection",
    drop_old=True  # Drops the collection if it exists (good for testing/re-running)
)

print(f"Successfully created vector store at {URI}")


Loaded 1 document(s) and split into 8 chunks.
Creating Milvus vector store...


  from pkg_resources import DistributionNotFound, get_distribution


Successfully created vector store at ../../data/parking.db

--- Test Query Result ---
Query: What is the hourly rate for parking?
Retrieved: ## 2. Operating Hours & Access

- **Standard Hours:** The facility is open 24 hours a day, 7 days a week.
- **Staffed Hours:** On-site customer support is available from 8:00 AM to 8:00 PM daily.
- **After-Hours Access:** Pedestrian access to the facility after 10:00 PM requires scanning a valid active reservation QR code or a monthly pass at the pedestrian doors.

## 3. Pricing Policy (Base Rates)...


I0000 00:00:1771852801.839168   59686 chttp2_transport.cc:1353] unix:/tmp/tmp3itqrkeh_parking.db.sock: Got goaway [11] err=UNAVAILABLE:GOAWAY received; Error code: 11; Debug Text: too_many_pings {grpc_status:14, http2_error:11}
E0000 00:00:1771852801.839271   59686 chttp2_transport.cc:1385] unix:/tmp/tmp3itqrkeh_parking.db.sock: Received a GOAWAY with error code ENHANCE_YOUR_CALM and debug data equal to "too_many_pings". Current keepalive time (before throttling): 10000ms


In [1]:
from langchain_milvus import Milvus

from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
URI = "../../data/parking.db"
vector_store = Milvus(
    embedding_function=embeddings,
    connection_args={"uri": URI},
    collection_name="parking_policy_collection",
    drop_old=False  # Set to False to ensure you are loading existing data
)

  from pkg_resources import DistributionNotFound, get_distribution


In [None]:
from langchain_classic.retrievers import ContextualCompressionRetriever
from langchain_classic.retrievers.document_compressors import CrossEncoderReranker
from langchain_community.cross_encoders import HuggingFaceCrossEncoder

# ... [Your existing code: Loader, Splitter, Embeddings, Milvus setup] ...

print("\n--- Setting up Reranker ---")

# 1. Define the Base Retriever
# We fetch a larger initial pool of documents (k=10) from Milvus
base_retriever = vector_store.as_retriever(search_kwargs={"k": 10})

# 2. Define the Cross-Encoder Model
# BAAI/bge-reranker-base is an excellent, free, open-source reranker
reranker_model = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-base")

# 3. Create the Reranker (Compressor)
# We tell it to analyze the top 10 from Milvus, and only keep the Top 3
compressor = CrossEncoderReranker(model=reranker_model, top_n=3)

# 4. Combine them into the final Retrieval Pipeline
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, 
    base_retriever=base_retriever
)

print("Reranker setup complete!")

# --- Test the Two-Stage Retrieval ---
query = "Are heavy commercial trucks allowed?"
print(f"\nTesting Query: '{query}'\n")

# Use the compression_retriever.invoke() instead of vector_store.similarity_search()
reranked_results = compression_retriever.invoke(query)

for i, doc in enumerate(reranked_results):
    print(f"--- Rank {i+1} ---")
    print(doc.page_content)
    print("")


--- Setting up Reranker ---


config.json:   0%|          | 0.00/799 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

I0000 00:00:1771937930.471718   69258 chttp2_transport.cc:1353] unix:/tmp/tmp5__wkfjw_parking.db.sock: Got goaway [11] err=UNAVAILABLE:GOAWAY received; Error code: 11; Debug Text: too_many_pings {grpc_status:14, http2_error:11}
E0000 00:00:1771937930.471928   69258 chttp2_transport.cc:1385] unix:/tmp/tmp5__wkfjw_parking.db.sock: Received a GOAWAY with error code ENHANCE_YOUR_CALM and debug data equal to "too_many_pings". Current keepalive time (before throttling): 10000ms


tokenizer_config.json:   0%|          | 0.00/443 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/279 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

Reranker setup complete!

Testing Query: 'Are heavy commercial trucks allowed?'

--- Rank 1 ---
- **Address:** 101 Innovation Blvd, Tech District, Metro City, 54321.
- **Entrance:** Main entrance is located on the North side of Innovation Blvd, accessible via the right-hand lane.
- **Height Clearance:** Maximum vehicle height is 2.1 meters (6.8 feet).
- **Vehicle Types:** We accept sedans, SUVs, and motorcycles. Heavy commercial trucks, trailers, and RVs are strictly prohibited.

## 2. Operating Hours & Access

--- Rank 2 ---
## 6. Rules of Conduct

- **Speed Limit:** 10 km/h (6 mph) inside the facility.
- **Parking Discipline:** Vehicles must be parked within the marked lines. Taking up two spots may result in towing at the owner's expense.
- **Prohibited Items:** Storage of flammable materials or leaving unattended baggage is strictly prohibited.

--- Rank 3 ---
## 5. Amenities & Services

- **EV Charging:** Level 2 Electric Vehicle chargers are available on **Level 2, Row A**. Stand

I0000 00:00:1771938125.529173   69258 chttp2_transport.cc:1353] unix:/tmp/tmp5__wkfjw_parking.db.sock: Got goaway [11] err=UNAVAILABLE:GOAWAY received; Error code: 11; Debug Text: too_many_pings {http2_error:11, grpc_status:14}
E0000 00:00:1771938125.529394   69258 chttp2_transport.cc:1385] unix:/tmp/tmp5__wkfjw_parking.db.sock: Received a GOAWAY with error code ENHANCE_YOUR_CALM and debug data equal to "too_many_pings". Current keepalive time (before throttling): 20000ms


In [5]:

# Optional: Quick verification query
query = "What is the hourly rate for parking?"
results = client.similarity_search(query, k=3)

print("\n--- Test Query Result ---")
print(f"Query: {query}")
print(f"Retrieved: {results}...")

AttributeError: 'MilvusClient' object has no attribute 'similarity_search'

In [2]:
import sqlite3

def create_database():
    # Connect to (or create) the database file
    conn = sqlite3.connect("../../data/parking_db.sqlite")
    cursor = conn.cursor()

    # ---------------------------------------------------------
    # 1. Create Tables
    # ---------------------------------------------------------
    
    # Table for Physical Parking Spots
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS parking_spots (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        spot_number TEXT NOT NULL UNIQUE,
        spot_type TEXT NOT NULL,  -- 'Standard', 'EV', 'Accessible'
        floor TEXT NOT NULL,      -- 'Ground', 'Level 1', 'Level 2'
        status TEXT DEFAULT 'available', -- 'available', 'occupied'
        price_per_hour REAL DEFAULT 5.00
    )
    """)

    # Table for Reservations (aligns with Stage 2 requirements)
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS reservations (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        spot_id INTEGER,
        user_name TEXT NOT NULL,
        car_number TEXT NOT NULL,
        reservation_time TEXT DEFAULT CURRENT_TIMESTAMP, -- When the booking was made
        start_time TEXT NOT NULL,
        end_time TEXT NOT NULL,
        status TEXT DEFAULT 'pending', -- 'pending', 'confirmed', 'cancelled'
        FOREIGN KEY (spot_id) REFERENCES parking_spots (id)
    )
    """)

    # ---------------------------------------------------------
    # 2. Seed Initial Data (Based on your Policy)
    # ---------------------------------------------------------
    
    # Check if spots already exist to avoid duplicates on re-run
    cursor.execute("SELECT count(*) FROM parking_spots")
    if cursor.fetchone()[0] == 0:
        print("Seeding database with initial spots based on Policy...")
        
        spots_data = [
            # -- Ground Floor (Policy: Accessible spots here) --
            ("G-01", "Accessible", "Ground", "available"),
            ("G-02", "Accessible", "Ground", "available"),
            ("G-03", "Standard", "Ground", "available"),
            
            # -- Level 1 (Standard Parking) --
            ("L1-A1", "Standard", "Level 1", "available"),
            ("L1-A2", "Standard", "Level 1", "available"),
            ("L1-A3", "Standard", "Level 1", "maintenance"), # Simulating maintenance
            
            # -- Level 2 (Policy: EV Charging on Row A) --
            ("L2-A1", "EV", "Level 2", "available"),
            ("L2-A2", "EV", "Level 2", "available"),
            ("L2-B1", "Standard", "Level 2", "available"),
        ]
        
        cursor.executemany("""
            INSERT INTO parking_spots (spot_number, spot_type, floor, status)
            VALUES (?, ?, ?, ?)
        """, spots_data)
        
        print(f"Added {len(spots_data)} parking spots.")
    else:
        print("Database already contains data. Skipping seed.")

    # ---------------------------------------------------------
    # 3. Verify and Close
    # ---------------------------------------------------------
    conn.commit()
    conn.close()
    print("Database 'parking_db.sqlite' created successfully.")

if __name__ == "__main__":
    create_database()

Seeding database with initial spots based on Policy...
Added 9 parking spots.
Database 'parking_db.sqlite' created successfully.
