In [8]:
from pathlib import Path
import json

chunks_path = Path("normalized/chunks.jsonl")
chunks = [json.loads(line) for line in chunks_path.read_text(encoding="utf-8").splitlines() if line.strip()]
len(chunks), chunks[0]["chunk_id"]


(82, 'antibiotic_stewardship::0')

In [2]:
# Now setup Qdrant client (after stopping FastAPI server)
from qdrant_client import QdrantClient, models as qm

COLLECTION = "toy_agent_docs"

# Create Qdrant client - database should be free now
qdrant = QdrantClient(path="qdrant_local")
print("✅ Qdrant client connected successfully!")

✅ Qdrant client connected successfully!


In [1]:
# Setup OpenAI client first
import os
from dotenv import load_dotenv
load_dotenv()

from openai import OpenAI
oai = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

EMBED_MODEL = "text-embedding-3-small"

In [3]:
def embed_texts(texts):
    resp = oai.embeddings.create(model=EMBED_MODEL, input=texts)
    # IMPORTANT: Keep order same as input
    return [d.embedding for d in resp.data]


In [None]:
type(chunks), len(chunks)


(list, 82)

In [9]:
BATCH = 16

def to_point(idx, chunk, vector):
    return qm.PointStruct(
        id=idx,  # ✅ integer ID
        vector=vector,
        payload={
            "chunk_id": chunk["chunk_id"],   # keep your original ID here
            "doc_id": chunk["doc_id"],
            "title": chunk["title"],
            "text": chunk["text"],
            "metadata": chunk["metadata"],
        },
    )

total = 0
for i in range(0, len(chunks), BATCH):
    batch = chunks[i:i+BATCH]
    vectors = embed_texts([c["text"] for c in batch])

    # create integer IDs based on position (stable as long as chunks list order is stable)
    points = [to_point(i + j, c, v) for j, (c, v) in enumerate(zip(batch, vectors))]

    qdrant.upsert(collection_name=COLLECTION, points=points)
    total += len(points)
    print("Upserted:", total)


Upserted: 16
Upserted: 32
Upserted: 48
Upserted: 64
Upserted: 80
Upserted: 82


In [4]:
dim = len(embed_texts(["dimension check"])[0])
dim


1536

In [5]:
existing = [c.name for c in qdrant.get_collections().collections]
existing


['toy_agent_docs']

In [6]:
if COLLECTION not in existing:
    qdrant.create_collection(
        collection_name=COLLECTION,
        vectors_config=qm.VectorParams(size=dim, distance=qm.Distance.COSINE),
    )
    print("✅ Created:", COLLECTION)
else:
    print("ℹ️ Exists:", COLLECTION)


ℹ️ Exists: toy_agent_docs


In [11]:
# Test the vector search functionality
def test_search(query):
    query_vector = embed_texts([query])[0]
    
    # Use the newer API method
    results = qdrant.query_points(
        collection_name=COLLECTION,
        query=query_vector,
        limit=3,
        with_payload=True
    )
    
    print(f"Query: '{query}'")
    print(f"Found {len(results.points)} results:")
    for i, result in enumerate(results.points, 1):
        print(f"\n{i}. Score: {result.score:.3f}")
        print(f"   Title: {result.payload['title']}")
        print(f"   Text: {result.payload['text'][:150]}...")
        
# Test with a medical query
test_search("What are symptoms of asthma?")

Query: 'What are symptoms of asthma?'
Found 3 results:

1. Score: 0.456
   Title: Asthma Action Plan
   Text: Asthma Action Plan (Educational) 2025-12-29 demo, rag, medical, asthma Synthetic (generated for demo) generated Summary This is a fictional educationa...

2. Score: 0.446
   Title: Asthma Action Plan
   Text: Asthma Action Plan (Educational)

Summary
-------
This is a fictional educational document about Asthma Action Plan (Educational). It is for software ...

3. Score: 0.446
   Title: Asthma Action Plan
   Text: Asthma Action Plan (Educational)

Summary
-------
This is a fictional educational document about Asthma Action Plan (Educational). It is for software ...


In [None]:
# Close the Qdrant client to free up the database for other processes
try:
    qdrant.close()
    print("✅ Qdrant client connection closed successfully!")
except Exception as e:
    print(f"Note: {e}")
    
# Clear the variable as well
qdrant = None

Note: name 'qdrant' is not defined


: 