# Installation

In [1]:
! pip install -U sentence-transformers

Collecting sentence-transformers
  Using cached sentence_transformers-2.3.1-py3-none-any.whl (132 kB)
Collecting huggingface-hub>=0.15.1
  Using cached huggingface_hub-0.20.3-py3-none-any.whl (330 kB)
Collecting transformers<5.0.0,>=4.32.0
  Using cached transformers-4.37.2-py3-none-any.whl (8.4 MB)
Collecting scipy
  Using cached scipy-1.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (38.4 MB)
Collecting scikit-learn
  Downloading scikit_learn-1.4.1.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.1/12.1 MB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting nltk
  Using cached nltk-3.8.1-py3-none-any.whl (1.5 MB)
Collecting Pillow
  Using cached pillow-10.2.0-cp310-cp310-manylinux_2_28_x86_64.whl (4.5 MB)
Collecting torch>=1.11.0
  Using cached torch-2.2.0-cp310-cp310-manylinux1_x86_64.whl (755.5 MB)
Collecting sentencepiece
  Using cached sentencepiece-0.

## Import the models

In [2]:
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer

In [4]:
encoder = SentenceTransformer("all-MiniLM-L6-v2", device="cpu")

In [5]:
documents = [
    {
        "name": "The Time Machine",
        "description": "A man travels through time and witnesses the evolution of humanity.",
        "author": "H.G. Wells",
        "year": 1895,
    },
    {
        "name": "Ender's Game",
        "description": "A young boy is trained to become a military leader in a war against an alien race.",
        "author": "Orson Scott Card",
        "year": 1985,
    },
    {
        "name": "Brave New World",
        "description": "A dystopian society where people are genetically engineered and conditioned to conform to a strict social hierarchy.",
        "author": "Aldous Huxley",
        "year": 1932,
    },
    {
        "name": "The Hitchhiker's Guide to the Galaxy",
        "description": "A comedic science fiction series following the misadventures of an unwitting human and his alien friend.",
        "author": "Douglas Adams",
        "year": 1979,
    },
    {
        "name": "Dune",
        "description": "A desert planet is the site of political intrigue and power struggles.",
        "author": "Frank Herbert",
        "year": 1965,
    },
    {
        "name": "Foundation",
        "description": "A mathematician develops a science to predict the future of humanity and works to save civilization from collapse.",
        "author": "Isaac Asimov",
        "year": 1951,
    },
    {
        "name": "Snow Crash",
        "description": "A futuristic world where the internet has evolved into a virtual reality metaverse.",
        "author": "Neal Stephenson",
        "year": 1992,
    },
    {
        "name": "Neuromancer",
        "description": "A hacker is hired to pull off a near-impossible hack and gets pulled into a web of intrigue.",
        "author": "William Gibson",
        "year": 1984,
    },
    {
        "name": "The War of the Worlds",
        "description": "A Martian invasion of Earth throws humanity into chaos.",
        "author": "H.G. Wells",
        "year": 1898,
    },
    {
        "name": "The Hunger Games",
        "description": "A dystopian society where teenagers are forced to fight to the death in a televised spectacle.",
        "author": "Suzanne Collins",
        "year": 2008,
    },
    {
        "name": "The Andromeda Strain",
        "description": "A deadly virus from outer space threatens to wipe out humanity.",
        "author": "Michael Crichton",
        "year": 1969,
    },
    {
        "name": "The Left Hand of Darkness",
        "description": "A human ambassador is sent to a planet where the inhabitants are genderless and can change gender at will.",
        "author": "Ursula K. Le Guin",
        "year": 1969,
    },
    {
        "name": "The Three-Body Problem",
        "description": "Humans encounter an alien civilization that lives in a dying system.",
        "author": "Liu Cixin",
        "year": 2008,
    },
]

In [None]:
from qdrant_client import QdrantClient

client = QdrantClient("localhost", port=6333)
# client = QdrantClient(":memory:")

In [None]:
client.recreate_collection(
    collection_name="my_books",
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(),  # Vector size is defined by used model
        distance=models.Distance.COSINE,
    ),
)

In [8]:
encoder.get_sentence_embedding_dimension()

384

## Upload data to collection

In [10]:
client.upload_records(
    collection_name="my_books",
    records=[
        models.Record(
            id=idx, vector=encoder.encode(doc["description"]).tolist(), payload=doc
        )
        for idx, doc in enumerate(documents)
    ],
)

  client.upload_records(


## Ask the engine a question

In [18]:
hits = client.search(
    collection_name="my_books",
    query_vector=encoder.encode("virus").tolist(),
    limit=3,
)
for hit in hits:
    print(hit.payload, "score:", hit.score)

{'author': 'Michael Crichton', 'description': 'A deadly virus from outer space threatens to wipe out humanity.', 'name': 'The Andromeda Strain', 'year': 1969} score: 0.49328357
{'author': 'William Gibson', 'description': 'A hacker is hired to pull off a near-impossible hack and gets pulled into a web of intrigue.', 'name': 'Neuromancer', 'year': 1984} score: 0.33094203
{'author': 'H.G. Wells', 'description': 'A Martian invasion of Earth throws humanity into chaos.', 'name': 'The War of the Worlds', 'year': 1898} score: 0.1828531


## Narrow down the query

In [19]:
hits = client.search(
    collection_name="my_books",
    query_vector=encoder.encode("alien invasion").tolist(),
    query_filter=models.Filter(
        must=[models.FieldCondition(key="year", range=models.Range(gte=2000))]
    ),
    limit=1,
)
for hit in hits:
    print(hit.payload, "score:", hit.score)

{'author': 'Liu Cixin', 'description': 'Humans encounter an alien civilization that lives in a dying system.', 'name': 'The Three-Body Problem', 'year': 2008} score: 0.45902938
