In [2]:
import uuid
import logging
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams, PointStruct
from sentence_transformers import SentenceTransformer

# Параметры
COLL_NAME = 'documents'
ENCODER_NAME = 'intfloat/multilingual-e5-large'
VEC_SIZE = 1024  # Обновлено для соответствия размеру векторов энкодера
CHUNK_SIZE = 1430
CHUNK_OVERLAP = 138
FILES_DIR = 'files'

# Настройка логирования
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levellevelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Функция для чтения файла и разбиения его на чанки
def file_to_chunks(file_path, chunk_size, chunk_overlap):
    with open(file_path, 'r', encoding='utf-8') as file:
        text = file.read()
    chunks = []
    for i in range(0, len(text), chunk_size - chunk_overlap):
        chunks.append(text[i:i + chunk_size])
    return chunks

# Функция для преобразования текста в вектора
def str_to_vec(encoder, texts):
    return encoder.encode(texts)

# Создаем подключение к векторной БД
try:
    qdrant_client = QdrantClient('localhost', port=6333)
    logger.info("Successfully connected to Qdrant server.")
except Exception as e:
    logger.error(f"Failed to connect to Qdrant server: {e}")
    raise

# Функция для сохранения чанков в базу данных
def save_chunks(encoder, chunks, file_name):
    try:
        chunk_embeddings = str_to_vec(encoder, chunks)
        points = []
        for i in range(len(chunk_embeddings)):
            point = PointStruct(
                id=str(uuid.uuid4()),
                vector=chunk_embeddings[i],
                payload={'file': file_name, 'chunk': chunks[i], 'file_path': os.path.join(FILES_DIR, file_name)}
            )
            points.append(point)
        operation_info = qdrant_client.upsert(
            collection_name=COLL_NAME,
            wait=True,
            points=points
        )
        logger.info(f"Successfully saved chunks for file {file_name}.")
        return operation_info
    except Exception as e:
        logger.error(f"Failed to save chunks for file {file_name}: {e}")
        raise

# Функция для загрузки файлов в базу данных
def files_to_vecdb(files, encoder, vec_size, chunk_size, chunk_overlap):
    try:
        qdrant_client.delete_collection(collection_name=COLL_NAME)
        logger.info(f"Collection {COLL_NAME} deleted successfully.")
        qdrant_client.create_collection(
            collection_name=COLL_NAME,
            vectors_config=VectorParams(size=vec_size, distance=Distance.COSINE),
        )
        logger.info(f"Collection {COLL_NAME} created successfully.")
        for file_name in files:
            chunks = file_to_chunks(os.path.join(FILES_DIR, file_name), chunk_size, chunk_overlap)
            save_chunks(encoder, chunks, file_name)
    except Exception as e:
        logger.error(f"Failed to process files to vecdb: {e}")
        raise

# Инициализация энкодера
encoder_model = SentenceTransformer(ENCODER_NAME)

# Загрузка файлов в базу данных
import os
files = [f for f in os.listdir(FILES_DIR) if os.path.isfile(os.path.join(FILES_DIR, f))]
files_to_vecdb(files, encoder_model, VEC_SIZE, CHUNK_SIZE, CHUNK_OVERLAP)

2024-05-31 16:03:21,788 - __main__ - INFO - Successfully connected to Qdrant server.
2024-05-31 16:03:21,789 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: intfloat/multilingual-e5-large
2024-05-31 16:03:25,008 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: cuda
2024-05-31 16:03:26,020 - httpx - INFO - HTTP Request: DELETE http://localhost:6333/collections/documents "HTTP/1.1 200 OK"
2024-05-31 16:03:26,021 - __main__ - INFO - Collection documents deleted successfully.
2024-05-31 16:03:26,221 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents "HTTP/1.1 200 OK"
2024-05-31 16:03:26,222 - __main__ - INFO - Collection documents created successfully.


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-05-31 16:03:28,341 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:28,343 - __main__ - INFO - Successfully saved chunks for file 10669.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:29,811 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:29,813 - __main__ - INFO - Successfully saved chunks for file 106717.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:30,045 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:30,047 - __main__ - INFO - Successfully saved chunks for file 107723.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:30,412 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:30,414 - __main__ - INFO - Successfully saved chunks for file 11028.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:31,689 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:31,690 - __main__ - INFO - Successfully saved chunks for file 11314.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:33,084 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:33,085 - __main__ - INFO - Successfully saved chunks for file 132041.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:33,728 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:33,730 - __main__ - INFO - Successfully saved chunks for file 13439.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:33,943 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:33,945 - __main__ - INFO - Successfully saved chunks for file 1417133.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:34,218 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:34,219 - __main__ - INFO - Successfully saved chunks for file 146063.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:34,710 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:34,711 - __main__ - INFO - Successfully saved chunks for file 1472675.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:35,172 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:35,173 - __main__ - INFO - Successfully saved chunks for file 1474402.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:35,758 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:35,759 - __main__ - INFO - Successfully saved chunks for file 1475.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:36,437 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:36,438 - __main__ - INFO - Successfully saved chunks for file 1671.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:37,305 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:37,306 - __main__ - INFO - Successfully saved chunks for file 172300.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:37,471 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:37,472 - __main__ - INFO - Successfully saved chunks for file 1747996.txt.


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-05-31 16:03:41,672 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:41,673 - __main__ - INFO - Successfully saved chunks for file 17714.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:41,951 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:41,952 - __main__ - INFO - Successfully saved chunks for file 182333.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:42,243 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:42,244 - __main__ - INFO - Successfully saved chunks for file 1828524.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:42,423 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:42,424 - __main__ - INFO - Successfully saved chunks for file 1858295.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:42,607 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:42,609 - __main__ - INFO - Successfully saved chunks for file 187336.txt.


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-05-31 16:03:43,904 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:43,905 - __main__ - INFO - Successfully saved chunks for file 1921.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:44,621 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:44,622 - __main__ - INFO - Successfully saved chunks for file 19985.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:44,818 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:44,819 - __main__ - INFO - Successfully saved chunks for file 2138257.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:46,484 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:46,486 - __main__ - INFO - Successfully saved chunks for file 22419.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:46,965 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:46,966 - __main__ - INFO - Successfully saved chunks for file 23698.txt.


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

2024-05-31 16:03:50,483 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:50,484 - __main__ - INFO - Successfully saved chunks for file 25022.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:51,045 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:51,046 - __main__ - INFO - Successfully saved chunks for file 273921.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:52,329 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:52,330 - __main__ - INFO - Successfully saved chunks for file 2777.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:52,480 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:52,481 - __main__ - INFO - Successfully saved chunks for file 278922.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:53,508 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:53,509 - __main__ - INFO - Successfully saved chunks for file 2797.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:53,749 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:53,750 - __main__ - INFO - Successfully saved chunks for file 293530.txt.


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-05-31 16:03:56,642 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:56,643 - __main__ - INFO - Successfully saved chunks for file 2944.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:56,973 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:56,974 - __main__ - INFO - Successfully saved chunks for file 2948657.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:57,320 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:57,320 - __main__ - INFO - Successfully saved chunks for file 300138.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:57,424 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:57,426 - __main__ - INFO - Successfully saved chunks for file 3235084.txt.


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-05-31 16:03:59,205 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:59,206 - __main__ - INFO - Successfully saved chunks for file 34248.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:03:59,354 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:03:59,355 - __main__ - INFO - Successfully saved chunks for file 354896.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:04:00,734 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:04:00,736 - __main__ - INFO - Successfully saved chunks for file 35695.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:04:01,102 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:04:01,103 - __main__ - INFO - Successfully saved chunks for file 36300.txt.


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-05-31 16:04:03,052 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:04:03,053 - __main__ - INFO - Successfully saved chunks for file 382275.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:04:03,099 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:04:03,100 - __main__ - INFO - Successfully saved chunks for file 3911740.txt.


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-05-31 16:04:05,482 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:04:05,483 - __main__ - INFO - Successfully saved chunks for file 39240.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:04:06,935 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:04:06,937 - __main__ - INFO - Successfully saved chunks for file 4101846.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:04:06,997 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:04:06,999 - __main__ - INFO - Successfully saved chunks for file 41062.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:04:07,374 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:04:07,375 - __main__ - INFO - Successfully saved chunks for file 4169181.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:04:07,885 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:04:07,886 - __main__ - INFO - Successfully saved chunks for file 44497.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:04:09,185 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:04:09,186 - __main__ - INFO - Successfully saved chunks for file 469138.txt.


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-05-31 16:04:11,375 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:04:11,376 - __main__ - INFO - Successfully saved chunks for file 50.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:04:11,680 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:04:11,682 - __main__ - INFO - Successfully saved chunks for file 533540.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:04:11,784 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:04:11,786 - __main__ - INFO - Successfully saved chunks for file 554913.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:04:11,965 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:04:11,966 - __main__ - INFO - Successfully saved chunks for file 60388.txt.


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2024-05-31 16:04:14,566 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:04:14,568 - __main__ - INFO - Successfully saved chunks for file 60835.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:04:14,683 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:04:14,684 - __main__ - INFO - Successfully saved chunks for file 62140.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:04:16,234 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:04:16,235 - __main__ - INFO - Successfully saved chunks for file 67938.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:04:16,294 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:04:16,295 - __main__ - INFO - Successfully saved chunks for file 6826333.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:04:16,455 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:04:16,457 - __main__ - INFO - Successfully saved chunks for file 820423.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:04:17,631 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:04:17,632 - __main__ - INFO - Successfully saved chunks for file 86619.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:04:17,967 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:04:17,968 - __main__ - INFO - Successfully saved chunks for file 92937.txt.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-31 16:04:18,124 - httpx - INFO - HTTP Request: PUT http://localhost:6333/collections/documents/points?wait=true "HTTP/1.1 200 OK"
2024-05-31 16:04:18,125 - __main__ - INFO - Successfully saved chunks for file 984291.txt.


In [6]:
# Вывод количества уникальных файлов в базе данных
try:
    count_result = qdrant_client.count(collection_name=COLL_NAME)
    print(f"Number of points in collection: {count_result.count}")

    all_points = qdrant_client.scroll(collection_name=COLL_NAME, limit=count_result.count, with_payload=True)
    file_names = set()
    for point in all_points.points:
        file_names.add(point.payload['file'])
    print("Number of unique files in collection:", len(file_names))
except Exception as e:
    logger.error(f"Failed to retrieve file count from collection {COLL_NAME}: {e}")

2024-05-31 16:06:48,568 - httpx - INFO - HTTP Request: POST http://localhost:6333/collections/documents/points/count "HTTP/1.1 200 OK"
2024-05-31 16:06:48,599 - httpx - INFO - HTTP Request: POST http://localhost:6333/collections/documents/points/scroll "HTTP/1.1 200 OK"
2024-05-31 16:06:48,676 - __main__ - ERROR - Failed to retrieve file count from collection documents: 'tuple' object has no attribute 'points'


Number of points in collection: 1093
