In [None]:
import os
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_milvus import Milvus

# 1. Load the data
# We assume parking_policy.md is in the same directory
loader = TextLoader("./parking_policy.md", encoding="utf-8")
documents = loader.load()

# 2. Split the text
# Chunk size 500 is good for precise policy details. 
# Overlap ensures context isn't lost at the edges of cuts.
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100,
    separators=["\n\n", "\n", " ", ""]
)
docs = text_splitter.split_documents(documents)

print(f"Loaded {len(documents)} document(s) and split into {len(docs)} chunks.")

# 3. Define the Embedding Model
# We use a standard, free, local model (no API key needed)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# 4. Initialize Milvus Lite and Store Data
# Setting the URI to a local file path automatically triggers 'Milvus Lite' mode.
URI = "./parking.db"

print("Creating Milvus vector store...")
vector_store = Milvus.from_documents(
    documents=docs,
    embedding=embeddings,
    connection_args={"uri": URI},
    collection_name="parking_policy_collection",
    drop_old=True  # Drops the collection if it exists (good for testing/re-running)
)

print(f"Successfully created vector store at {URI}")

# Optional: Quick verification query
query = "What is the hourly rate for parking?"
results = vector_store.similarity_search(query, k=1)

print("\n--- Test Query Result ---")
print(f"Query: {query}")
print(f"Retrieved: {results[0].page_content}...")