Ollama : LLM inference server runing at background
Qdrant : vector database server running at background

In [2]:
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PointStruct
from sentence_transformers import SentenceTransformer
import ollama

  from .autonotebook import tqdm as notebook_tqdm


Qdrant Setup

In [None]:
# quadrant server docker is running at localhost:6333
qdrant = QdrantClient(url="http://localhost:6333")

In [20]:
COLLECTION = "docs"
# Create collection if not exists
if COLLECTION not in [c.name for c in qdrant.get_collections().collections]:
    qdrant.create_collection(
        collection_name=COLLECTION,
        vectors_config=VectorParams(size=384, distance=Distance.COSINE)
    )

Indexing

In [4]:
EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
embedder = SentenceTransformer(EMBED_MODEL)

In [None]:
# ---- Store Sample Docs ----
docs = [
    "Qdrant is a vector database used for semantic search.",
    "Ollama allows running local LLMs like Llama3 or Mistral.",
    "Vector search enables retrieval augmented generation (RAG)."
]

vectors = embedder.encode(docs).tolist()

In [21]:
points = []
for i in range(len(docs)):
    points.append(PointStruct(id=i, vector=vectors[i], payload={"text": docs[i]}))

In [22]:
qdrant.upsert(collection_name=COLLECTION, points=points)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

---- Query + Retrieval ----

In [24]:
query = "What is Qdrant used for?"
query_vec = embedder.encode(query).tolist()

In [25]:
search_results = qdrant.search(
    collection_name=COLLECTION,
    query_vector=query_vec,
    limit=2
)


  search_results = qdrant.search(


In [26]:
search_results

[ScoredPoint(id=0, version=0, score=0.5962877, payload={'text': 'Qdrant is a vector database used for semantic search.'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=2, version=0, score=0.153411, payload={'text': 'Vector search enables retrieval augmented generation (RAG).'}, vector=None, shard_key=None, order_value=None)]

In [27]:
context = "\n".join([r.payload["text"] for r in search_results])

In [28]:
context

'Qdrant is a vector database used for semantic search.\nVector search enables retrieval augmented generation (RAG).'

 ---- Ask Ollama with Context ----

In [30]:
# ---- Ask Ollama with Context ----
prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"
response = ollama.generate(model="llama3", prompt=prompt)

In [32]:
print(response["response"])

According to the context, Qdrant is a vector database used for **semantic search**, which enables **Retrieval Augmented Generation (RAG)**. In other words, Qdrant is used for searching and generating content based on semantic similarity between vectors.
