In [None]:
import os
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_milvus import Milvus

# 1. Load the data
# We assume parking_policy.md is in the same directory
loader = TextLoader("../../data/parking_policy.md", encoding="utf-8")
documents = loader.load()

# 2. Split the text
# Chunk size 500 is good for precise policy details. 
# Overlap ensures context isn't lost at the edges of cuts.
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100,
    separators=["\n\n", "\n", " ", ""]
)
docs = text_splitter.split_documents(documents)

print(f"Loaded {len(documents)} document(s) and split into {len(docs)} chunks.")

# 3. Define the Embedding Model
# We use a standard, free, local model (no API key needed)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# 4. Initialize Milvus Lite and Store Data
# Setting the URI to a local file path automatically triggers 'Milvus Lite' mode.
URI = "../../data/parking.db"

print("Creating Milvus vector store...")
vector_store = Milvus.from_documents(
    documents=docs,
    embedding=embeddings,
    connection_args={"uri": URI},
    collection_name="parking_policy_collection",
    drop_old=True  # Drops the collection if it exists (good for testing/re-running)
)

print(f"Successfully created vector store at {URI}")


Loaded 1 document(s) and split into 8 chunks.
Creating Milvus vector store...


  from pkg_resources import DistributionNotFound, get_distribution


Successfully created vector store at ../../data/parking.db

--- Test Query Result ---
Query: What is the hourly rate for parking?
Retrieved: ## 2. Operating Hours & Access

- **Standard Hours:** The facility is open 24 hours a day, 7 days a week.
- **Staffed Hours:** On-site customer support is available from 8:00 AM to 8:00 PM daily.
- **After-Hours Access:** Pedestrian access to the facility after 10:00 PM requires scanning a valid active reservation QR code or a monthly pass at the pedestrian doors.

## 3. Pricing Policy (Base Rates)...


I0000 00:00:1771852801.839168   59686 chttp2_transport.cc:1353] unix:/tmp/tmp3itqrkeh_parking.db.sock: Got goaway [11] err=UNAVAILABLE:GOAWAY received; Error code: 11; Debug Text: too_many_pings {grpc_status:14, http2_error:11}
E0000 00:00:1771852801.839271   59686 chttp2_transport.cc:1385] unix:/tmp/tmp3itqrkeh_parking.db.sock: Received a GOAWAY with error code ENHANCE_YOUR_CALM and debug data equal to "too_many_pings". Current keepalive time (before throttling): 10000ms


In [None]:

# Optional: Quick verification query
query = "What is the hourly rate for parking?"
results = vector_store.similarity_search(query, k=3)

print("\n--- Test Query Result ---")
print(f"Query: {query}")
print(f"Retrieved: {results}...")


--- Test Query Result ---
Query: What is the hourly rate for parking?
Retrieved: [Document(metadata={'pk': 464480558951956482, 'source': '../../data/parking_policy.md'}, page_content='## 2. Operating Hours & Access\n\n- **Standard Hours:** The facility is open 24 hours a day, 7 days a week.\n- **Staffed Hours:** On-site customer support is available from 8:00 AM to 8:00 PM daily.\n- **After-Hours Access:** Pedestrian access to the facility after 10:00 PM requires scanning a valid active reservation QR code or a monthly pass at the pedestrian doors.\n\n## 3. Pricing Policy (Base Rates)'), Document(metadata={'pk': 464480558951956483, 'source': '../../data/parking_policy.md'}, page_content='## 3. Pricing Policy (Base Rates)\n\n_Note: Prices are subject to dynamic adjustments based on demand. Check real-time availability for exact quotes._\n\n- **Hourly Rate:** $5.00 per hour (or part thereof).\n- **Daily Max:** $35.00 for any 24-hour period.\n- **Overnight Flat Rate:** $15.00 (Entry afte

I0000 00:00:1771852993.904574   59686 chttp2_transport.cc:1353] unix:/tmp/tmp3itqrkeh_parking.db.sock: Got goaway [11] err=UNAVAILABLE:GOAWAY received; Error code: 11; Debug Text: too_many_pings {grpc_status:14, http2_error:11}
E0000 00:00:1771852993.904626   59686 chttp2_transport.cc:1385] unix:/tmp/tmp3itqrkeh_parking.db.sock: Received a GOAWAY with error code ENHANCE_YOUR_CALM and debug data equal to "too_many_pings". Current keepalive time (before throttling): 20000ms
