In [1]:
from qdrant_client import QdrantClient
from qdrant_client.http import models 
import numpy as np
from faker import Faker

## Создаем клиент Qdrant

In [2]:
client = QdrantClient(host='localhost', port=6333)
client

<qdrant_client.qdrant_client.QdrantClient at 0x771f74fb2cf0>

## Создаем коллекцию

In [3]:
my_collection = 'FastAPI manual'
client.create_collection(
    collection_name= my_collection,
    vectors_config=models.VectorParams(size=100, distance=models.Distance.COSINE)

)

UnexpectedResponse: Unexpected Response: 409 (Conflict)
Raw response content:
b'{"status":{"error":"Wrong input: Collection `FastAPI manual` already exists!"},"time":0.003462575}'

### Синтетические данные  
Вектора песен размером 100

In [4]:
data = np.random.uniform(low=-1.0, high=1.0, size=(1000, 100))
index = list(range(len(data)))

In [5]:
data.shape

(1000, 100)

### Заполняем коллекцию

In [6]:
client.upsert(
    collection_name=my_collection,
    points=models.Batch(
        ids=index,
        vectors=data.tolist()
    )
)

UpdateResult(operation_id=5, status=<UpdateStatus.COMPLETED: 'completed'>)

### Проверяем векторы

In [7]:
client.retrieve(
    collection_name=my_collection,
    ids=[1],
    with_vectors=True
)

[Record(id=1, payload={}, vector=[-0.15930259, 0.10289711, -0.16272765, 0.1394354, 0.025902126, 0.042671364, 0.018339517, -0.028610857, -0.10022684, -0.08878664, 0.14337938, 0.063238405, -0.14067414, 0.16549942, -0.05889081, -0.10155912, 0.045757696, 0.059758082, 0.019764012, 0.1478785, -0.16048503, 0.010520236, 0.03688653, 0.08574183, -0.026521832, -0.15105157, 0.035636883, 0.11714151, 0.07650535, 0.08014891, 0.16760416, 0.01914519, 0.08798494, -0.1464455, -0.12080377, -0.0012280128, 0.1266234, -0.08986996, 0.1442304, -0.16243611, 0.12745532, 0.098596126, 0.08816437, 0.033275243, 0.06031153, 0.04070129, 0.16146295, -0.1560904, 0.13504452, -0.14397888, -0.0602666, -0.12946494, -0.14791214, 0.028582962, 0.07567946, 0.04903095, 0.14600877, -0.15192963, 0.1308647, -0.10750951, 0.034584373, 0.003185723, -0.12998098, 0.06959774, 0.06828325, 0.03311941, -0.0017054784, 0.030262448, 0.15832235, 0.13379596, -0.06726853, 0.066290826, 0.1002043, 0.009044697, -0.15295537, 0.072625816, -0.006710512

### Генерация фейковых метаданных

In [8]:
fake_something = Faker()
fake_something.name(), fake_something.address()

('Theresa Chung', '060 Christopher Estates Suite 427\nSimston, VA 15444')

In [9]:
payload = []

for i in range(1_000):
    payload.append(
        {
            'artist': fake_something.name(),
            'song': ' '.join(fake_something.words(np.random.randint(1, 4))),
            'url_song': fake_something.url(),
            'year': int(fake_something.year()),
            'country': fake_something.country()
        }
    )

In [10]:
payload[:2]

[{'artist': 'Sara Crawford',
  'song': 'business hotel',
  'url_song': 'http://davis-williams.net/',
  'year': 2014,
  'country': 'France'},
 {'artist': 'Blake Jackson',
  'song': 'budget economy',
  'url_song': 'http://barrett.com/',
  'year': 1999,
  'country': 'South Africa'}]

### Загружаем векторы с метаданными

In [11]:
client.upsert(
    collection_name=my_collection,
    points=models.Batch(
        ids=index,
        vectors=data.tolist(),
        payloads=payload,
    )
)

UpdateResult(operation_id=6, status=<UpdateStatus.COMPLETED: 'completed'>)

#### Песня, на которую будем искать похожие векторы

In [12]:
song_to_find = np.random.uniform(low=-1.0, high=1.0, size=100).tolist()

In [13]:
# Фильтр 
song_filter = models.Filter(
        must=[
            models.FieldCondition(key='country', match=models.MatchValue(value='Spain')),
            models.FieldCondition(key='year', range=models.Range(gte=2000))
        ]
    )

# ЗАпрос
client.query_points(
    collection_name=my_collection,
    query=song_to_find,
    query_filter=song_filter,
    limit=1
)

QueryResponse(points=[])

# Рекомендации
Рекомендация песен после 2000 на основе 5 близких song_to_find

In [14]:
near_songs = client.query_points(
    collection_name=my_collection,
    query=song_to_find,
    limit=5,
    with_payload=True
)

In [15]:
ids_near_songs = [i.id for i in near_songs.points]

In [16]:
song_filter = models.Filter(
    must=[
        models.FieldCondition(key='year', range=models.Range(gte=2000))
    ]
)
recommend = client.recommend(
    collection_name=my_collection,
    positive=ids_near_songs,
    query_filter=song_filter,
    limit=10,
    with_payload=True
)

  recommend = client.recommend(


In [17]:
[(i.payload['artist'], i.payload['song'], i.payload['year']) for i in recommend]

[('Thomas Williams', 'nice interest leader', 2022),
 ('Jennifer Lucas', 'point program', 2025),
 ('Justin Burns', 'maintain', 2020),
 ('Isabel Russell', 'recognize soon', 2015),
 ('Justin Allen', 'suffer role fire', 2016),
 ('Kimberly Mills', 'member end crime', 2010),
 ('Lori Johnson', 'hard', 2011),
 ('Chad Lewis', 'evening community', 2025),
 ('April Jenkins', 'evening wrong catch', 2025),
 ('Paul Jenkins', 'contain politics', 2009)]