# HousingMind: Semantic Search Preview

This notebook demonstrates how to use embeddings to search across housing policy documents or instructions.

In [None]:
from sentence_transformers import SentenceTransformer, util
import pandas as pd
import torch

# Load example dataset
instruction_path = Path("../instruction_data/housingmind_instructions.jsonl")
records = []
with open(instruction_path, 'r') as f:
    for line in f:
        records.append(json.loads(line.strip()))
df = pd.DataFrame(records)

In [None]:
# Load embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(df['prompt'].tolist(), convert_to_tensor=True)

In [None]:
# Define a search query
query = "How do PHAs calculate HAP for PBV units?"
query_embedding = model.encode(query, convert_to_tensor=True)

In [None]:
# Perform semantic search
hits = util.semantic_search(query_embedding, embeddings, top_k=5)
for hit in hits[0]:
    idx = hit['corpus_id']
    print(f"Score: {hit['score']:.4f}\nPrompt: {df.iloc[idx]['prompt']}\nResponse: {df.iloc[idx]['response'][:300]}\n")

## Next Steps
- Expand with metadata filtering (program type, document source)
- Use embeddings in retrieval-augmented generation (RAG)