In [107]:
from qdrant_client import QdrantClient
from qdrant_client.http import models 
import numpy as np
from faker import Faker

## Создаем клиент Qdrant

In [2]:
client = QdrantClient(host='localhost', port=6333)
client

<qdrant_client.qdrant_client.QdrantClient at 0x70fbd02b6060>

## Создаем коллекцию

In [3]:
my_collection = 'FastAPI manual'
client.create_collection(
    collection_name= my_collection,
    vectors_config=models.VectorParams(size=100, distance=models.Distance.COSINE)

)

UnexpectedResponse: Unexpected Response: 409 (Conflict)
Raw response content:
b'{"status":{"error":"Wrong input: Collection `FastAPI manual` already exists!"},"time":0.000043252}'

### Синтетические данные  
Вектора песен размером 100

In [10]:
data = np.random.uniform(low=-1.0, high=1.0, size=(1000, 100))
index = list(range(len(data)))

In [11]:
data.shape

(1000, 100)

### Заполняем коллекцию

In [15]:
client.upsert(
    collection_name=my_collection,
    points=models.Batch(
        ids=index,
        vectors=data.tolist()
    )
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

### Проверяем векторы

In [108]:
client.retrieve(
    collection_name=my_collection,
    ids=[1],
    with_vectors=True
)

[Record(id=1, payload={'artist': 'Lori Sutton', 'song': 'town nature subject', 'url_song': 'http://www.fox.com/', 'year': 1994, 'country': 'Lesotho'}, vector=[-0.14712138, 0.16016959, 0.037380517, -0.12598726, 0.14563747, -0.056666538, 0.04022164, -0.1629162, -0.039272938, -0.080843, -0.025742818, 0.16634284, 0.13909465, 0.059110276, -0.16890536, 0.134364, -0.029499292, -0.05601338, 0.08059835, 0.068124264, 0.028918931, -0.02787886, 0.10119449, -0.10631618, -0.0069067264, -0.009358144, 0.0044330503, -0.10453846, 0.102308065, -0.13766657, 0.03565489, -0.1631925, -0.08945773, 0.09778853, -0.14641955, 0.09974647, -0.08349091, 0.09900944, -0.013942976, 0.0075908694, -0.086344324, -0.11087018, 0.05931475, 0.11171853, 0.03428706, -0.077888206, 0.047316745, 0.14962414, -0.030742837, -0.03708222, 0.103453524, 0.14883964, 0.094391085, 0.17280675, -0.120905764, -0.17213657, -0.06498828, -0.014683873, -0.15991892, -0.06474649, 0.076984204, 0.06970776, 0.14273237, -0.04520562, 0.09000934, 0.095513

### Генерация фейковых метаданных

In [33]:
fake_something = Faker()
fake_something.name(), fake_something.address()

('Megan Johnson', '19971 Brown Circle\nSouth Matthew, AZ 67180')

In [94]:
payload = []

for i in range(1_000):
    payload.append(
        {
            'artist': fake_something.name(),
            'song': ' '.join(fake_something.words(np.random.randint(1, 4))),
            'url_song': fake_something.url(),
            'year': int(fake_something.year()),
            'country': fake_something.country()
        }
    )

In [95]:
payload[:2]

[{'artist': 'Bradley Williams',
  'song': 'machine seek defense',
  'url_song': 'http://www.henderson.com/',
  'year': 2020,
  'country': 'Azerbaijan'},
 {'artist': 'Lori Sutton',
  'song': 'town nature subject',
  'url_song': 'http://www.fox.com/',
  'year': 1994,
  'country': 'Lesotho'}]

### Загружаем векторы с метаданными

In [96]:
client.upsert(
    collection_name=my_collection,
    points=models.Batch(
        ids=index,
        vectors=data.tolist(),
        payloads=payload,
    )
)

UpdateResult(operation_id=4, status=<UpdateStatus.COMPLETED: 'completed'>)

#### Песня, на которую будем искать похожие векторы

In [97]:
song_to_find = np.random.uniform(low=-1.0, high=1.0, size=100).tolist()

In [109]:
# Фильтр 
song_filter = models.Filter(
        must=[
            models.FieldCondition(key='country', match=models.MatchValue(value='Spain')),
            models.FieldCondition(key='year', range=models.Range(gte=2000))
        ]
    )

# ЗАпрос
client.query_points(
    collection_name=my_collection,
    query=song_to_find,
    query_filter=song_filter,
    limit=1
)

QueryResponse(points=[ScoredPoint(id=529, version=4, score=0.12057702, payload={'artist': 'Robin Silva', 'song': 'project set work', 'url_song': 'https://www.jones.com/', 'year': 2015, 'country': 'Spain'}, vector=None, shard_key=None, order_value=None)])

# Рекомендации
Рекомендация песен после 2000 на основе 5 близких song_to_find

In [118]:
near_songs = client.query_points(
    collection_name=my_collection,
    query=song_to_find,
    limit=5,
    with_payload=True
)

In [126]:
ids_near_songs = [i.id for i in near_songs.points]

In [169]:
song_filter = models.Filter(
    must=[
        models.FieldCondition(key='year', range=models.Range(gte=2000))
    ]
)
recommend = client.recommend(
    collection_name=my_collection,
    positive=ids_near_songs,
    query_filter=song_filter,
    limit=10,
    with_payload=True
)

  recommend = client.recommend(


In [170]:
[(i.payload['artist'], i.payload['song'], i.payload['year']) for i in recommend]

[('Carmen Ayala', 'name again', 2022),
 ('Steven Turner', 'one', 2025),
 ('Anne Boyd', 'role Mrs', 2001),
 ('James Robinson MD', 'least somebody', 2015),
 ('Jill Fowler', 'unit sometimes authority', 2017),
 ('Steven Brandt', 'probably many maybe', 2016),
 ('Jeremy Singh', 'story', 2008),
 ('Michael Atkinson', 'cup order', 2005),
 ('Robert Young', 'magazine', 2014),
 ('Aaron Mccullough', 'science', 2013)]