From 100bf974c9adb987decb083cf8bfad87d9b3d7b6 Mon Sep 17 00:00:00 2001 From: filipecosta90 <filipecosta.90@gmail.com> Date: Mon, 4 Mar 2024 12:22:55 +0000 Subject: [PATCH 01/12] Enabled api key elastic connections --- engine/clients/elasticsearch/config.py | 43 ++++++++++++++++++++--- engine/clients/elasticsearch/configure.py | 18 ++-------- engine/clients/elasticsearch/search.py | 10 ++---- engine/clients/elasticsearch/upload.py | 18 ++-------- 4 files changed, 45 insertions(+), 44 deletions(-) diff --git a/engine/clients/elasticsearch/config.py b/engine/clients/elasticsearch/config.py index 19b59d74..0658ed1e 100644 --- a/engine/clients/elasticsearch/config.py +++ b/engine/clients/elasticsearch/config.py @@ -1,4 +1,39 @@ -ELASTIC_PORT = 9200 -ELASTIC_INDEX = "bench" -ELASTIC_USER = "elastic" -ELASTIC_PASSWORD = "passwd" +import os +from elasticsearch import Elasticsearch + +ELASTIC_PORT = int(os.getenv("ELASTIC_PORT", 9200)) +ELASTIC_INDEX = os.getenv("ELASTIC_INDEX", "bench") +ELASTIC_USER = os.getenv("ELASTIC_USER", "elastic") +ELASTIC_PASSWORD = os.getenv("ELASTIC_PASSWORD", "passwd") +ELASTIC_API_KEY = os.getenv("ELASTIC_API_KEY", None) +ELASTIC_TIMEOUT = int(os.getenv("ELASTIC_TIMEOUT", 90)) + + +def get_es_client(host, connection_params): + client: Elasticsearch = None + init_params = { + **{ + "verify_certs": False, + "request_timeout": ELASTIC_TIMEOUT, + "retry_on_timeout": True, + }, + **connection_params, + } + if host.startswith("http"): + url = "" + else: + url = "http://" + url += f"{host}:{ELASTIC_PORT}" + if ELASTIC_API_KEY is None: + client = Elasticsearch( + url, + basic_auth=(ELASTIC_USER, ELASTIC_PASSWORD), + **init_params, + ) + else: + client = Elasticsearch( + url, + api_key=ELASTIC_API_KEY, + **init_params, + ) + return client diff --git a/engine/clients/elasticsearch/configure.py b/engine/clients/elasticsearch/configure.py index 76f64eb8..a4f08425 100644 --- a/engine/clients/elasticsearch/configure.py +++ b/engine/clients/elasticsearch/configure.py @@ -6,9 +6,7 @@ from engine.base_client.distances import Distance from engine.clients.elasticsearch.config import ( ELASTIC_INDEX, - ELASTIC_PASSWORD, - ELASTIC_PORT, - ELASTIC_USER, + get_es_client, ) @@ -25,19 +23,7 @@ class ElasticConfigurator(BaseConfigurator): def __init__(self, host, collection_params: dict, connection_params: dict): super().__init__(host, collection_params, connection_params) - init_params = { - **{ - "verify_certs": False, - "request_timeout": 90, - "retry_on_timeout": True, - }, - **connection_params, - } - self.client = Elasticsearch( - f"http://{host}:{ELASTIC_PORT}", - basic_auth=(ELASTIC_USER, ELASTIC_PASSWORD), - **init_params, - ) + self.client = get_es_client(host, connection_params) def clean(self): try: diff --git a/engine/clients/elasticsearch/search.py b/engine/clients/elasticsearch/search.py index 29d20ec5..ed15113c 100644 --- a/engine/clients/elasticsearch/search.py +++ b/engine/clients/elasticsearch/search.py @@ -7,9 +7,7 @@ from engine.base_client.search import BaseSearcher from engine.clients.elasticsearch.config import ( ELASTIC_INDEX, - ELASTIC_PASSWORD, - ELASTIC_PORT, - ELASTIC_USER, + get_es_client, ) from engine.clients.elasticsearch.parser import ElasticConditionParser @@ -38,11 +36,7 @@ def init_client(cls, host, distance, connection_params: dict, search_params: dic }, **connection_params, } - cls.client: Elasticsearch = Elasticsearch( - f"http://{host}:{ELASTIC_PORT}", - basic_auth=(ELASTIC_USER, ELASTIC_PASSWORD), - **init_params, - ) + cls.client = get_es_client(host, connection_params) cls.search_params = search_params @classmethod diff --git a/engine/clients/elasticsearch/upload.py b/engine/clients/elasticsearch/upload.py index 0d5c6f2b..78842a25 100644 --- a/engine/clients/elasticsearch/upload.py +++ b/engine/clients/elasticsearch/upload.py @@ -7,9 +7,7 @@ from engine.base_client.upload import BaseUploader from engine.clients.elasticsearch.config import ( ELASTIC_INDEX, - ELASTIC_PASSWORD, - ELASTIC_PORT, - ELASTIC_USER, + get_es_client, ) @@ -28,19 +26,7 @@ def get_mp_start_method(cls): @classmethod def init_client(cls, host, distance, connection_params, upload_params): - init_params = { - **{ - "verify_certs": False, - "request_timeout": 90, - "retry_on_timeout": True, - }, - **connection_params, - } - cls.client = Elasticsearch( - f"http://{host}:{ELASTIC_PORT}", - basic_auth=(ELASTIC_USER, ELASTIC_PASSWORD), - **init_params, - ) + cls.client = get_es_client(host, connection_params) cls.upload_params = upload_params @classmethod From 6549035af6246e912a785a33d876d42e7303c4e6 Mon Sep 17 00:00:00 2001 From: filipecosta90 <filipecosta.90@gmail.com> Date: Mon, 4 Mar 2024 12:47:06 +0000 Subject: [PATCH 02/12] Fixes per pre-commit hook --- engine/clients/elasticsearch/config.py | 1 + engine/clients/elasticsearch/configure.py | 5 +---- engine/clients/elasticsearch/search.py | 5 +---- engine/clients/elasticsearch/upload.py | 5 +---- 4 files changed, 4 insertions(+), 12 deletions(-) diff --git a/engine/clients/elasticsearch/config.py b/engine/clients/elasticsearch/config.py index 0658ed1e..a988eee7 100644 --- a/engine/clients/elasticsearch/config.py +++ b/engine/clients/elasticsearch/config.py @@ -1,4 +1,5 @@ import os + from elasticsearch import Elasticsearch ELASTIC_PORT = int(os.getenv("ELASTIC_PORT", 9200)) diff --git a/engine/clients/elasticsearch/configure.py b/engine/clients/elasticsearch/configure.py index a4f08425..c07e5939 100644 --- a/engine/clients/elasticsearch/configure.py +++ b/engine/clients/elasticsearch/configure.py @@ -4,10 +4,7 @@ from engine.base_client import IncompatibilityError from engine.base_client.configure import BaseConfigurator from engine.base_client.distances import Distance -from engine.clients.elasticsearch.config import ( - ELASTIC_INDEX, - get_es_client, -) +from engine.clients.elasticsearch.config import ELASTIC_INDEX, get_es_client class ElasticConfigurator(BaseConfigurator): diff --git a/engine/clients/elasticsearch/search.py b/engine/clients/elasticsearch/search.py index ed15113c..a20b2121 100644 --- a/engine/clients/elasticsearch/search.py +++ b/engine/clients/elasticsearch/search.py @@ -5,10 +5,7 @@ from elasticsearch import Elasticsearch from engine.base_client.search import BaseSearcher -from engine.clients.elasticsearch.config import ( - ELASTIC_INDEX, - get_es_client, -) +from engine.clients.elasticsearch.config import ELASTIC_INDEX, get_es_client from engine.clients.elasticsearch.parser import ElasticConditionParser diff --git a/engine/clients/elasticsearch/upload.py b/engine/clients/elasticsearch/upload.py index 78842a25..9b24ca5d 100644 --- a/engine/clients/elasticsearch/upload.py +++ b/engine/clients/elasticsearch/upload.py @@ -5,10 +5,7 @@ from elasticsearch import Elasticsearch from engine.base_client.upload import BaseUploader -from engine.clients.elasticsearch.config import ( - ELASTIC_INDEX, - get_es_client, -) +from engine.clients.elasticsearch.config import ELASTIC_INDEX, get_es_client class ClosableElastic(Elasticsearch): From 3f314d82659cac25d0c983161147706506061879 Mon Sep 17 00:00:00 2001 From: filipecosta90 <filipecosta.90@gmail.com> Date: Mon, 4 Mar 2024 13:57:43 +0000 Subject: [PATCH 03/12] popping the parallel config from a deep copy of search_params --- engine/clients/elasticsearch/search.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/engine/clients/elasticsearch/search.py b/engine/clients/elasticsearch/search.py index a20b2121..b7b09e2f 100644 --- a/engine/clients/elasticsearch/search.py +++ b/engine/clients/elasticsearch/search.py @@ -1,3 +1,4 @@ +import copy import multiprocessing as mp import uuid from typing import List, Tuple @@ -34,7 +35,9 @@ def init_client(cls, host, distance, connection_params: dict, search_params: dic **connection_params, } cls.client = get_es_client(host, connection_params) - cls.search_params = search_params + cls.search_params = copy.deepcopy(search_params) + # pop parallel + cls.search_params.pop("parallel", "1") @classmethod def search_one(cls, vector, meta_conditions, top) -> List[Tuple[int, float]]: From 6bd120fa70c8c7edca555a240a5d7b2d26e9595f Mon Sep 17 00:00:00 2001 From: filipe oliveira <filipecosta.90@gmail.com> Date: Fri, 22 Mar 2024 10:46:32 +0000 Subject: [PATCH 04/12] Update engine/clients/elasticsearch/config.py Co-authored-by: Kumar Shivendu <kshivendu1@gmail.com> --- engine/clients/elasticsearch/config.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/engine/clients/elasticsearch/config.py b/engine/clients/elasticsearch/config.py index a988eee7..2e8ffe4e 100644 --- a/engine/clients/elasticsearch/config.py +++ b/engine/clients/elasticsearch/config.py @@ -13,11 +13,9 @@ def get_es_client(host, connection_params): client: Elasticsearch = None init_params = { - **{ - "verify_certs": False, - "request_timeout": ELASTIC_TIMEOUT, - "retry_on_timeout": True, - }, + "verify_certs": False, + "request_timeout": ELASTIC_TIMEOUT, + "retry_on_timeout": True, **connection_params, } if host.startswith("http"): From d976cf6df2243036a65517d699e70d16451f5b4f Mon Sep 17 00:00:00 2001 From: filipe oliveira <filipecosta.90@gmail.com> Date: Fri, 22 Mar 2024 10:47:09 +0000 Subject: [PATCH 05/12] Update engine/clients/elasticsearch/config.py Co-authored-by: Kumar Shivendu <kshivendu1@gmail.com> --- engine/clients/elasticsearch/config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/engine/clients/elasticsearch/config.py b/engine/clients/elasticsearch/config.py index 2e8ffe4e..37ea2682 100644 --- a/engine/clients/elasticsearch/config.py +++ b/engine/clients/elasticsearch/config.py @@ -11,7 +11,6 @@ def get_es_client(host, connection_params): - client: Elasticsearch = None init_params = { "verify_certs": False, "request_timeout": ELASTIC_TIMEOUT, From 06ac0f8e846ace388970546a86d365b392b13fc7 Mon Sep 17 00:00:00 2001 From: filipe oliveira <filipecosta.90@gmail.com> Date: Fri, 22 Mar 2024 10:51:33 +0000 Subject: [PATCH 06/12] Update engine/clients/elasticsearch/config.py Co-authored-by: Kumar Shivendu <kshivendu1@gmail.com> --- engine/clients/elasticsearch/config.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/engine/clients/elasticsearch/config.py b/engine/clients/elasticsearch/config.py index 37ea2682..0f31d0f6 100644 --- a/engine/clients/elasticsearch/config.py +++ b/engine/clients/elasticsearch/config.py @@ -23,15 +23,6 @@ def get_es_client(host, connection_params): url = "http://" url += f"{host}:{ELASTIC_PORT}" if ELASTIC_API_KEY is None: - client = Elasticsearch( - url, - basic_auth=(ELASTIC_USER, ELASTIC_PASSWORD), - **init_params, - ) + return Elasticsearch(url, basic_auth=(ELASTIC_USER, ELASTIC_PASSWORD), **init_params) else: - client = Elasticsearch( - url, - api_key=ELASTIC_API_KEY, - **init_params, - ) - return client + return Elasticsearch(url, api_key=ELASTIC_API_KEY, **init_params) From 9751080444412920f2b918a0dd83d24b68209772 Mon Sep 17 00:00:00 2001 From: filipecosta90 <filipecosta.90@gmail.com> Date: Thu, 4 Apr 2024 10:34:21 +0100 Subject: [PATCH 07/12] Removed api_key usage per review --- engine/clients/elasticsearch/config.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/engine/clients/elasticsearch/config.py b/engine/clients/elasticsearch/config.py index 0f31d0f6..9084c745 100644 --- a/engine/clients/elasticsearch/config.py +++ b/engine/clients/elasticsearch/config.py @@ -6,7 +6,6 @@ ELASTIC_INDEX = os.getenv("ELASTIC_INDEX", "bench") ELASTIC_USER = os.getenv("ELASTIC_USER", "elastic") ELASTIC_PASSWORD = os.getenv("ELASTIC_PASSWORD", "passwd") -ELASTIC_API_KEY = os.getenv("ELASTIC_API_KEY", None) ELASTIC_TIMEOUT = int(os.getenv("ELASTIC_TIMEOUT", 90)) @@ -22,7 +21,6 @@ def get_es_client(host, connection_params): else: url = "http://" url += f"{host}:{ELASTIC_PORT}" - if ELASTIC_API_KEY is None: - return Elasticsearch(url, basic_auth=(ELASTIC_USER, ELASTIC_PASSWORD), **init_params) - else: - return Elasticsearch(url, api_key=ELASTIC_API_KEY, **init_params) + return Elasticsearch( + url, basic_auth=(ELASTIC_USER, ELASTIC_PASSWORD), **init_params + ) From 5d7e5176db98fffd8a067fe5120c563e3637deb1 Mon Sep 17 00:00:00 2001 From: filipecosta90 <filipecosta.90@gmail.com> Date: Thu, 4 Apr 2024 11:06:03 +0100 Subject: [PATCH 08/12] Enable specifying the elastic index timeouts. disabled ssl warnings --- engine/clients/elasticsearch/config.py | 38 +++++++++++++++++++---- engine/clients/elasticsearch/configure.py | 20 +++++++++--- engine/clients/elasticsearch/upload.py | 31 +++++++++++++++--- 3 files changed, 74 insertions(+), 15 deletions(-) diff --git a/engine/clients/elasticsearch/config.py b/engine/clients/elasticsearch/config.py index 9084c745..024ca97b 100644 --- a/engine/clients/elasticsearch/config.py +++ b/engine/clients/elasticsearch/config.py @@ -1,19 +1,28 @@ import os +import time +import urllib3 from elasticsearch import Elasticsearch ELASTIC_PORT = int(os.getenv("ELASTIC_PORT", 9200)) ELASTIC_INDEX = os.getenv("ELASTIC_INDEX", "bench") ELASTIC_USER = os.getenv("ELASTIC_USER", "elastic") ELASTIC_PASSWORD = os.getenv("ELASTIC_PASSWORD", "passwd") -ELASTIC_TIMEOUT = int(os.getenv("ELASTIC_TIMEOUT", 90)) +ELASTIC_TIMEOUT = int(os.getenv("ELASTIC_TIMEOUT", 300)) +ELASTIC_INDEX_TIMEOUT = os.getenv("ELASTIC_INDEX_TIMEOUT", "30m") +ELASTIC_INDEX_REFRESH_INTERVAL = os.getenv("ELASTIC_INDEX_REFRESH_INTERVAL", "-1") +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) def get_es_client(host, connection_params): + client: Elasticsearch = None init_params = { - "verify_certs": False, - "request_timeout": ELASTIC_TIMEOUT, - "retry_on_timeout": True, + **{ + "verify_certs": False, + "request_timeout": ELASTIC_TIMEOUT, + "retry_on_timeout": True, + "ssl_show_warn": False, + }, **connection_params, } if host.startswith("http"): @@ -21,6 +30,23 @@ def get_es_client(host, connection_params): else: url = "http://" url += f"{host}:{ELASTIC_PORT}" - return Elasticsearch( - url, basic_auth=(ELASTIC_USER, ELASTIC_PASSWORD), **init_params + client = Elasticsearch( + url, + basic_auth=(ELASTIC_USER, ELASTIC_PASSWORD), + **init_params, ) + assert client.ping() + return client + + +def _wait_for_es_status(client, status="yellow"): + print(f"waiting for ES {status} status...") + for _ in range(100): + try: + client.cluster.health(wait_for_status=status) + return client + except ConnectionError: + time.sleep(0.1) + else: + # timeout + raise Exception("Elasticsearch failed to start.") diff --git a/engine/clients/elasticsearch/configure.py b/engine/clients/elasticsearch/configure.py index c07e5939..d2a09db3 100644 --- a/engine/clients/elasticsearch/configure.py +++ b/engine/clients/elasticsearch/configure.py @@ -1,10 +1,15 @@ -from elasticsearch import Elasticsearch, NotFoundError +from elasticsearch import NotFoundError from benchmark.dataset import Dataset from engine.base_client import IncompatibilityError from engine.base_client.configure import BaseConfigurator from engine.base_client.distances import Distance -from engine.clients.elasticsearch.config import ELASTIC_INDEX, get_es_client +from engine.clients.elasticsearch.config import ( + ELASTIC_INDEX, + ELASTIC_INDEX_REFRESH_INTERVAL, + ELASTIC_INDEX_TIMEOUT, + get_es_client, +) class ElasticConfigurator(BaseConfigurator): @@ -23,12 +28,16 @@ def __init__(self, host, collection_params: dict, connection_params: dict): self.client = get_es_client(host, connection_params) def clean(self): + print("Ensuring the index does not exist...") try: self.client.indices.delete( - index=ELASTIC_INDEX, timeout="5m", master_timeout="5m" + index=ELASTIC_INDEX, + timeout=ELASTIC_INDEX_TIMEOUT, + master_timeout=ELASTIC_INDEX_TIMEOUT, ) except NotFoundError: pass + print("Finished ensuring the index does not exist...") def recreate(self, dataset: Dataset, collection_params): if dataset.config.distance == Distance.DOT: @@ -39,11 +48,14 @@ def recreate(self, dataset: Dataset, collection_params): self.client.indices.create( index=ELASTIC_INDEX, + timeout=ELASTIC_INDEX_TIMEOUT, + master_timeout=ELASTIC_INDEX_TIMEOUT, + wait_for_active_shards="all", settings={ "index": { "number_of_shards": 1, "number_of_replicas": 0, - "refresh_interval": -1, + "refresh_interval": ELASTIC_INDEX_REFRESH_INTERVAL, } }, mappings={ diff --git a/engine/clients/elasticsearch/upload.py b/engine/clients/elasticsearch/upload.py index 9b24ca5d..7c3174ba 100644 --- a/engine/clients/elasticsearch/upload.py +++ b/engine/clients/elasticsearch/upload.py @@ -2,10 +2,15 @@ import uuid from typing import List, Optional -from elasticsearch import Elasticsearch +import elastic_transport +from elasticsearch import ApiError, Elasticsearch from engine.base_client.upload import BaseUploader -from engine.clients.elasticsearch.config import ELASTIC_INDEX, get_es_client +from engine.clients.elasticsearch.config import ( + ELASTIC_INDEX, + _wait_for_es_status, + get_es_client, +) class ClosableElastic(Elasticsearch): @@ -48,7 +53,23 @@ def upload_batch( @classmethod def post_upload(cls, _distance): - cls.client.indices.forcemerge( - index=ELASTIC_INDEX, wait_for_completion=True, max_num_segments=1 - ) + print("forcing the merge into 1 segment...") + tries = 30 + for i in range(tries + 1): + try: + cls.client.indices.forcemerge( + index=ELASTIC_INDEX, wait_for_completion=True, max_num_segments=1 + ) + except (elastic_transport.TlsError, ApiError) as e: + if i < tries: + print( + "Received the following error during retry {}/{} while waiting for ES index to be ready... {}".format( + i, tries, e.__str__() + ) + ) + continue + else: + raise + _wait_for_es_status(cls.client) + break return {} From 0a2c99c8cc53c3cd553bb1b39b5632e1bcccf420 Mon Sep 17 00:00:00 2001 From: Weaviate Git Bot <83967968+weaviate-git-bot@users.noreply.github.com> Date: Tue, 9 Apr 2024 00:45:35 -0500 Subject: [PATCH 09/12] Updated Weaviate Docker image url (auto PR by bot) (#109) * updated the Weaviate Docker image location (automated bot update) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updated the Weaviate Docker image location (automated bot update) --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- engine/servers/weaviate-single-node/docker-compose.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/engine/servers/weaviate-single-node/docker-compose.yaml b/engine/servers/weaviate-single-node/docker-compose.yaml index 9291687a..e5e9e9ec 100644 --- a/engine/servers/weaviate-single-node/docker-compose.yaml +++ b/engine/servers/weaviate-single-node/docker-compose.yaml @@ -8,7 +8,7 @@ services: - '8090' - --scheme - http - image: semitechnologies/weaviate:1.24.1 + image: cr.weaviate.io/semitechnologies/weaviate:1.24.1 network_mode: host logging: driver: "json-file" From fa5b6b6197ecccdf9e655109257b4d90cb1d8083 Mon Sep 17 00:00:00 2001 From: Andrew Kane <acekane1@gmail.com> Date: Wed, 10 Apr 2024 23:49:01 -0700 Subject: [PATCH 10/12] pgvector improvements (#98) * pgvector improvements * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Updated Postgres parameters * Use versioned Docker image * Updated pgvector to 0.6.2 --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- engine/clients/pgvector/config.py | 3 +- engine/clients/pgvector/configure.py | 16 ------ engine/clients/pgvector/search.py | 25 ++++------ engine/clients/pgvector/upload.py | 25 +++++++++- .../pgvector-single-node/docker-compose.yaml | 8 ++- .../configurations/pgvector-single-node.json | 50 +++++++------------ 6 files changed, 59 insertions(+), 68 deletions(-) diff --git a/engine/clients/pgvector/config.py b/engine/clients/pgvector/config.py index dc3b8365..5507745c 100644 --- a/engine/clients/pgvector/config.py +++ b/engine/clients/pgvector/config.py @@ -1,6 +1,6 @@ import os -PGVECTOR_PORT = int(os.getenv("PGVECTOR_PORT", 9200)) +PGVECTOR_PORT = int(os.getenv("PGVECTOR_PORT", 5432)) PGVECTOR_DB = os.getenv("PGVECTOR_DB", "postgres") PGVECTOR_USER = os.getenv("PGVECTOR_USER", "postgres") PGVECTOR_PASSWORD = os.getenv("PGVECTOR_PASSWORD", "passwd") @@ -9,6 +9,7 @@ def get_db_config(host, connection_params): return { "host": host or "localhost", + "port": PGVECTOR_PORT, "dbname": PGVECTOR_DB, "user": PGVECTOR_USER, "password": PGVECTOR_PASSWORD, diff --git a/engine/clients/pgvector/configure.py b/engine/clients/pgvector/configure.py index d5587431..0da692b2 100644 --- a/engine/clients/pgvector/configure.py +++ b/engine/clients/pgvector/configure.py @@ -9,11 +9,6 @@ class PgVectorConfigurator(BaseConfigurator): - DISTANCE_MAPPING = { - Distance.L2: "vector_l2_ops", - Distance.COSINE: "vector_cosine_ops", - } - def __init__(self, host, collection_params: dict, connection_params: dict): super().__init__(host, collection_params, connection_params) self.conn = psycopg.connect(**get_db_config(host, connection_params)) @@ -38,17 +33,6 @@ def recreate(self, dataset: Dataset, collection_params): ) self.conn.execute("ALTER TABLE items ALTER COLUMN embedding SET STORAGE PLAIN") - try: - hnsw_distance_type = self.DISTANCE_MAPPING[dataset.config.distance] - except KeyError: - raise IncompatibilityError( - f"Unsupported distance metric: {dataset.config.distance}" - ) - - self.conn.execute( - f"CREATE INDEX on items USING hnsw(embedding {hnsw_distance_type}) WITH (m = {collection_params['hnsw_config']['m']}, ef_construction = {collection_params['hnsw_config']['ef_construct']})" - ) - self.conn.close() def delete_client(self): diff --git a/engine/clients/pgvector/search.py b/engine/clients/pgvector/search.py index fa8bde5a..62f53035 100644 --- a/engine/clients/pgvector/search.py +++ b/engine/clients/pgvector/search.py @@ -23,24 +23,19 @@ def init_client(cls, host, distance, connection_params: dict, search_params: dic cls.conn = psycopg.connect(**get_db_config(host, connection_params)) register_vector(cls.conn) cls.cur = cls.conn.cursor() - cls.distance = distance - cls.search_params = search_params["search_params"] - - @classmethod - def search_one(cls, vector, meta_conditions, top) -> List[Tuple[int, float]]: - cls.cur.execute(f"SET hnsw.ef_search = {cls.search_params['hnsw_ef']}") - - if cls.distance == Distance.COSINE: - query = f"SELECT id, embedding <=> %s AS _score FROM items ORDER BY _score LIMIT {top};" - elif cls.distance == Distance.L2: - query = f"SELECT id, embedding <-> %s AS _score FROM items ORDER BY _score LIMIT {top};" + cls.cur.execute( + f"SET hnsw.ef_search = {search_params['search_params']['hnsw_ef']}" + ) + if distance == Distance.COSINE: + cls.query = f"SELECT id, embedding <=> %s AS _score FROM items ORDER BY _score LIMIT %s" + elif distance == Distance.L2: + cls.query = f"SELECT id, embedding <-> %s AS _score FROM items ORDER BY _score LIMIT %s" else: raise NotImplementedError(f"Unsupported distance metric {cls.distance}") - cls.cur.execute( - query, - (np.array(vector),), - ) + @classmethod + def search_one(cls, vector, meta_conditions, top) -> List[Tuple[int, float]]: + cls.cur.execute(cls.query, (np.array(vector), top), binary=True, prepare=True) return cls.cur.fetchall() @classmethod diff --git a/engine/clients/pgvector/upload.py b/engine/clients/pgvector/upload.py index 8d59ee7f..c3921e95 100644 --- a/engine/clients/pgvector/upload.py +++ b/engine/clients/pgvector/upload.py @@ -4,11 +4,16 @@ import psycopg from pgvector.psycopg import register_vector +from engine.base_client.distances import Distance from engine.base_client.upload import BaseUploader from engine.clients.pgvector.config import get_db_config class PgVectorUploader(BaseUploader): + DISTANCE_MAPPING = { + Distance.L2: "vector_l2_ops", + Distance.COSINE: "vector_cosine_ops", + } conn = None cur = None upload_params = {} @@ -27,10 +32,28 @@ def upload_batch( vectors = np.array(vectors) # Copy is faster than insert - with cls.cur.copy("COPY items (id, embedding) FROM STDIN") as copy: + with cls.cur.copy( + "COPY items (id, embedding) FROM STDIN WITH (FORMAT BINARY)" + ) as copy: + copy.set_types(["integer", "vector"]) for i, embedding in zip(ids, vectors): copy.write_row((i, embedding)) + @classmethod + def post_upload(cls, distance): + try: + hnsw_distance_type = cls.DISTANCE_MAPPING[distance] + except KeyError: + raise IncompatibilityError(f"Unsupported distance metric: {distance}") + + cls.conn.execute("SET max_parallel_workers = 128") + cls.conn.execute("SET max_parallel_maintenance_workers = 128") + cls.conn.execute( + f"CREATE INDEX ON items USING hnsw (embedding {hnsw_distance_type}) WITH (m = {cls.upload_params['hnsw_config']['m']}, ef_construction = {cls.upload_params['hnsw_config']['ef_construct']})" + ) + + return {} + @classmethod def delete_client(cls): if cls.cur: diff --git a/engine/servers/pgvector-single-node/docker-compose.yaml b/engine/servers/pgvector-single-node/docker-compose.yaml index ea554d92..13e8b8e6 100644 --- a/engine/servers/pgvector-single-node/docker-compose.yaml +++ b/engine/servers/pgvector-single-node/docker-compose.yaml @@ -3,13 +3,17 @@ version: '3.7' services: pgvector: container_name: pgvector - image: ankane/pgvector:v0.5.1 + image: pgvector/pgvector:0.6.2-pg16 environment: - POSTGRES_DB=postgres - POSTGRES_USER=postgres - POSTGRES_PASSWORD=passwd - POSTGRES_HOST_AUTH_METHOD=trust - - POSTGRES_MAX_CONNECTIONS=200 + # shared_buffers should be 25% of memory + # maintenance_work_mem should be ~65% + command: postgres -c shared_buffers=6GB -c maintenance_work_mem=16GB -c max_connections=200 + # shm_size should be shared_buffers + maintenance_work_mem + shm_size: 22g ports: - 5432:5432 logging: diff --git a/experiments/configurations/pgvector-single-node.json b/experiments/configurations/pgvector-single-node.json index e1c8e33a..22ced04d 100644 --- a/experiments/configurations/pgvector-single-node.json +++ b/experiments/configurations/pgvector-single-node.json @@ -3,104 +3,88 @@ "name": "pgvector-default", "engine": "pgvector", "connection_params": {}, - "collection_params": { - "hnsw_config": { "m": 16, "ef_construct": 128 } - }, + "collection_params": {}, "search_params": [ - { "parallel": 1, "search_params": { "hnsw_ef": 128 } } + { "parallel": 8, "search_params": { "hnsw_ef": 128 } } ], - "upload_params": { "parallel": 1, "batch_size": 1024 } + "upload_params": { "parallel": 16, "batch_size": 1024, "hnsw_config": { "m": 16, "ef_construct": 128 } } }, { "name": "pgvector-parallel", "engine": "pgvector", "connection_params": {}, - "collection_params": { - "hnsw_config": { "m": 16, "ef_construct": 128 } - }, + "collection_params": {}, "search_params": [ { "parallel": 8, "search_params": { "hnsw_ef": 128 } }, { "parallel": 16, "search_params": { "hnsw_ef": 128 } }, { "parallel": 100, "search_params": { "hnsw_ef": 128 } } ], - "upload_params": { "parallel": 1, "batch_size": 1024 } + "upload_params": { "parallel": 1, "batch_size": 1024, "hnsw_config": { "m": 16, "ef_construct": 128 } } }, { "name": "pgvector-m-16-ef-128", "engine": "pgvector", "connection_params": {}, - "collection_params": { - "hnsw_config": { "m": 16, "ef_construct": 128 } - }, + "collection_params": {}, "search_params": [ { "parallel": 1, "search_params": { "hnsw_ef": 64 } }, { "parallel": 1, "search_params": { "hnsw_ef": 128 } }, { "parallel": 1, "search_params": { "hnsw_ef": 256 } }, { "parallel": 1, "search_params": { "hnsw_ef": 512 } }, { "parallel": 100, "search_params": { "hnsw_ef": 64 } }, { "parallel": 100, "search_params": { "hnsw_ef": 128 } }, { "parallel": 100, "search_params": { "hnsw_ef": 256 } }, { "parallel": 100, "search_params": { "hnsw_ef": 512 } } ], - "upload_params": { "parallel": 16 } + "upload_params": { "parallel": 16, "hnsw_config": { "m": 16, "ef_construct": 128 } } }, { "name": "pgvector-m-32-ef-128", "engine": "pgvector", "connection_params": {}, - "collection_params": { - "hnsw_config": { "m": 32, "ef_construct": 128 } - }, + "collection_params": {}, "search_params": [ { "parallel": 1, "search_params": { "hnsw_ef": 64 } }, { "parallel": 1, "search_params": { "hnsw_ef": 128 } }, { "parallel": 1, "search_params": { "hnsw_ef": 256 } }, { "parallel": 1, "search_params": { "hnsw_ef": 512 } }, { "parallel": 100, "search_params": { "hnsw_ef": 64 } }, { "parallel": 100, "search_params": { "hnsw_ef": 128 } }, { "parallel": 100, "search_params": { "hnsw_ef": 256 } }, { "parallel": 100, "search_params": { "hnsw_ef": 512 } } ], - "upload_params": { "parallel": 16 } + "upload_params": { "parallel": 16, "hnsw_config": { "m": 32, "ef_construct": 128 } } }, { "name": "pgvector-m-32-ef-256", "engine": "pgvector", "connection_params": {}, - "collection_params": { - "hnsw_config": { "m": 32, "ef_construct": 256 } - }, + "collection_params": {}, "search_params": [ { "parallel": 1, "search_params": { "hnsw_ef": 64 } }, { "parallel": 1, "search_params": { "hnsw_ef": 128 } }, { "parallel": 1, "search_params": { "hnsw_ef": 256 } }, { "parallel": 1, "search_params": { "hnsw_ef": 512 } }, { "parallel": 100, "search_params": { "hnsw_ef": 64 } }, { "parallel": 100, "search_params": { "hnsw_ef": 128 } }, { "parallel": 100, "search_params": { "hnsw_ef": 256 } }, { "parallel": 100, "search_params": { "hnsw_ef": 512 } } ], - "upload_params": { "parallel": 16 } + "upload_params": { "parallel": 16, "hnsw_config": { "m": 32, "ef_construct": 256 } } }, { "name": "pgvector-m-32-ef-512", "engine": "pgvector", "connection_params": {}, - "collection_params": { - "hnsw_config": { "m": 32, "ef_construct": 512 } - }, + "collection_params": {}, "search_params": [ { "parallel": 1, "search_params": { "hnsw_ef": 64 } }, { "parallel": 1, "search_params": { "hnsw_ef": 128 } }, { "parallel": 1, "search_params": { "hnsw_ef": 256 } }, { "parallel": 1, "search_params": { "hnsw_ef": 512 } }, { "parallel": 100, "search_params": { "hnsw_ef": 64 } }, { "parallel": 100, "search_params": { "hnsw_ef": 128 } }, { "parallel": 100, "search_params": { "hnsw_ef": 256 } }, { "parallel": 100, "search_params": { "hnsw_ef": 512 } } ], - "upload_params": { "parallel": 16 } + "upload_params": { "parallel": 16, "hnsw_config": { "m": 32, "ef_construct": 512 } } }, { "name": "pgvector-m-64-ef-256", "engine": "pgvector", "connection_params": {}, - "collection_params": { - "hnsw_config": { "m": 64, "ef_construct": 256 } - }, + "collection_params": {}, "search_params": [ { "parallel": 1, "search_params": { "hnsw_ef": 64 } }, { "parallel": 1, "search_params": { "hnsw_ef": 128 } }, { "parallel": 1, "search_params": { "hnsw_ef": 256 } }, { "parallel": 1, "search_params": { "hnsw_ef": 512 } }, { "parallel": 100, "search_params": { "hnsw_ef": 64 } }, { "parallel": 100, "search_params": { "hnsw_ef": 128 } }, { "parallel": 100, "search_params": { "hnsw_ef": 256 } }, { "parallel": 100, "search_params": { "hnsw_ef": 512 } } ], - "upload_params": { "parallel": 16 } + "upload_params": { "parallel": 16, "hnsw_config": { "m": 64, "ef_construct": 256 } } }, { "name": "pgvector-m-64-ef-512", "engine": "pgvector", "connection_params": {}, - "collection_params": { - "hnsw_config": { "m": 64, "ef_construct": 512 } - }, + "collection_params": {}, "search_params": [ { "parallel": 1, "search_params": { "hnsw_ef": 64 } }, { "parallel": 1, "search_params": { "hnsw_ef": 128 } }, { "parallel": 1, "search_params": { "hnsw_ef": 256 } }, { "parallel": 1, "search_params": { "hnsw_ef": 512 } }, { "parallel": 100, "search_params": { "hnsw_ef": 64 } }, { "parallel": 100, "search_params": { "hnsw_ef": 128 } }, { "parallel": 100, "search_params": { "hnsw_ef": 256 } }, { "parallel": 100, "search_params": { "hnsw_ef": 512 } } ], - "upload_params": { "parallel": 16 } + "upload_params": { "parallel": 16, "hnsw_config": { "m": 64, "ef_construct": 512 } } } ] From 37b62fb6c45787394c127e732617308ec4b16100 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 11 Apr 2024 13:26:37 +0530 Subject: [PATCH 11/12] [pre-commit.ci] pre-commit suggestions (#47) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pre-commit/pre-commit-hooks: v4.4.0 → v4.5.0](https://github.com/pre-commit/pre-commit-hooks/compare/v4.4.0...v4.5.0) - [github.com/psf/black: 22.12.0 → 24.3.0](https://github.com/psf/black/compare/22.12.0...24.3.0) - [github.com/PyCQA/isort: 5.12.0 → 5.13.2](https://github.com/PyCQA/isort/compare/5.12.0...5.13.2) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 754906cd..6817ea9d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,19 +11,19 @@ ci: repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: trailing-whitespace - id: check-added-large-files - repo: https://github.com/psf/black - rev: 22.12.0 + rev: 24.3.0 hooks: - id: black name: "Black: The uncompromising Python code formatter" - repo: https://github.com/PyCQA/isort - rev: 5.12.0 + rev: 5.13.2 hooks: - id: isort name: "Sort Imports" From 4d6b8b7820aabe37ac822461ea7f089aa441763b Mon Sep 17 00:00:00 2001 From: KShivendu <kshivendu1@gmail.com> Date: Thu, 11 Apr 2024 14:24:56 +0530 Subject: [PATCH 12/12] refactor: Nest search params in ES config --- engine/clients/elasticsearch/config.py | 20 +++++-------- engine/clients/elasticsearch/configure.py | 8 ++---- engine/clients/elasticsearch/search.py | 17 ++--------- engine/clients/elasticsearch/upload.py | 3 +- .../elasticsearch-single-node.json | 28 +++++++++---------- 5 files changed, 27 insertions(+), 49 deletions(-) diff --git a/engine/clients/elasticsearch/config.py b/engine/clients/elasticsearch/config.py index 024ca97b..60bf20bd 100644 --- a/engine/clients/elasticsearch/config.py +++ b/engine/clients/elasticsearch/config.py @@ -8,33 +8,27 @@ ELASTIC_INDEX = os.getenv("ELASTIC_INDEX", "bench") ELASTIC_USER = os.getenv("ELASTIC_USER", "elastic") ELASTIC_PASSWORD = os.getenv("ELASTIC_PASSWORD", "passwd") + ELASTIC_TIMEOUT = int(os.getenv("ELASTIC_TIMEOUT", 300)) ELASTIC_INDEX_TIMEOUT = os.getenv("ELASTIC_INDEX_TIMEOUT", "30m") ELASTIC_INDEX_REFRESH_INTERVAL = os.getenv("ELASTIC_INDEX_REFRESH_INTERVAL", "-1") -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) def get_es_client(host, connection_params): client: Elasticsearch = None init_params = { - **{ - "verify_certs": False, - "request_timeout": ELASTIC_TIMEOUT, - "retry_on_timeout": True, - "ssl_show_warn": False, - }, + "verify_certs": False, + "request_timeout": ELASTIC_TIMEOUT, + "retry_on_timeout": True, + "ssl_show_warn": False, **connection_params, } - if host.startswith("http"): - url = "" - else: - url = "http://" - url += f"{host}:{ELASTIC_PORT}" client = Elasticsearch( - url, + f"http://{host}:{ELASTIC_PORT}", basic_auth=(ELASTIC_USER, ELASTIC_PASSWORD), **init_params, ) + urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) assert client.ping() return client diff --git a/engine/clients/elasticsearch/configure.py b/engine/clients/elasticsearch/configure.py index d2a09db3..c770b4da 100644 --- a/engine/clients/elasticsearch/configure.py +++ b/engine/clients/elasticsearch/configure.py @@ -67,12 +67,8 @@ def recreate(self, dataset: Dataset, collection_params): "index": True, "similarity": self.DISTANCE_MAPPING[dataset.config.distance], "index_options": { - **{ - "type": "hnsw", - "m": 16, - "ef_construction": 100, - }, - **collection_params.get("index_options"), + "type": "hnsw", + **collection_params["index_options"], }, }, **self._prepare_fields_config(dataset), diff --git a/engine/clients/elasticsearch/search.py b/engine/clients/elasticsearch/search.py index b7b09e2f..97dc1d57 100644 --- a/engine/clients/elasticsearch/search.py +++ b/engine/clients/elasticsearch/search.py @@ -1,4 +1,3 @@ -import copy import multiprocessing as mp import uuid from typing import List, Tuple @@ -25,19 +24,9 @@ def get_mp_start_method(cls): return "forkserver" if "forkserver" in mp.get_all_start_methods() else "spawn" @classmethod - def init_client(cls, host, distance, connection_params: dict, search_params: dict): - init_params = { - **{ - "verify_certs": False, - "request_timeout": 90, - "retry_on_timeout": True, - }, - **connection_params, - } + def init_client(cls, host, _distance, connection_params: dict, search_params: dict): cls.client = get_es_client(host, connection_params) - cls.search_params = copy.deepcopy(search_params) - # pop parallel - cls.search_params.pop("parallel", "1") + cls.search_params = search_params @classmethod def search_one(cls, vector, meta_conditions, top) -> List[Tuple[int, float]]: @@ -45,7 +34,7 @@ def search_one(cls, vector, meta_conditions, top) -> List[Tuple[int, float]]: "field": "vector", "query_vector": vector, "k": top, - **{"num_candidates": 100, **cls.search_params}, + **cls.search_params['params'] } meta_conditions = cls.parser.parse(meta_conditions) diff --git a/engine/clients/elasticsearch/upload.py b/engine/clients/elasticsearch/upload.py index 7c3174ba..82709217 100644 --- a/engine/clients/elasticsearch/upload.py +++ b/engine/clients/elasticsearch/upload.py @@ -27,7 +27,7 @@ def get_mp_start_method(cls): return "forkserver" if "forkserver" in mp.get_all_start_methods() else "spawn" @classmethod - def init_client(cls, host, distance, connection_params, upload_params): + def init_client(cls, host, _distance, connection_params, upload_params): cls.client = get_es_client(host, connection_params) cls.upload_params = upload_params @@ -53,7 +53,6 @@ def upload_batch( @classmethod def post_upload(cls, _distance): - print("forcing the merge into 1 segment...") tries = 30 for i in range(tries + 1): try: diff --git a/experiments/configurations/elasticsearch-single-node.json b/experiments/configurations/elasticsearch-single-node.json index b3f0f609..3ffb4034 100644 --- a/experiments/configurations/elasticsearch-single-node.json +++ b/experiments/configurations/elasticsearch-single-node.json @@ -7,8 +7,8 @@ }, "collection_params": { "index_options": { "m": 16, "ef_construction": 100 } }, "search_params": [ - { "parallel": 1, "num_candidates": 128 }, { "parallel": 1, "num_candidates": 256 }, { "parallel": 1, "num_candidates": 512 }, - { "parallel": 100, "num_candidates": 128 }, { "parallel": 100, "num_candidates": 256 }, { "parallel": 100, "num_candidates": 512 } + { "parallel": 1, "params": {"num_candidates": 128} }, { "parallel": 1, "params": {"num_candidates": 256} }, { "parallel": 1, "params" :{"num_candidates": 512} }, + { "parallel": 100, "params": {"num_candidates": 128} }, { "parallel": 100, "params": {"num_candidates": 256} }, { "parallel": 100, "params" :{"num_candidates": 512} } ], "upload_params": { "parallel": 16 } }, @@ -20,8 +20,8 @@ }, "collection_params": { "index_options": { "m": 16, "ef_construction": 128 } }, "search_params": [ - { "parallel": 1, "num_candidates": 128 }, { "parallel": 1, "num_candidates": 256 }, { "parallel": 1, "num_candidates": 512 }, - { "parallel": 100, "num_candidates": 128 }, { "parallel": 100, "num_candidates": 256 }, { "parallel": 100, "num_candidates": 512 } + { "parallel": 1, "params": {"num_candidates": 128} }, { "parallel": 1, "params": {"num_candidates": 256} }, { "parallel": 1, "params" :{"num_candidates": 512} }, + { "parallel": 100, "params": {"num_candidates": 128} }, { "parallel": 100, "params": {"num_candidates": 256} }, { "parallel": 100, "params" :{"num_candidates": 512} } ], "upload_params": { "parallel": 16 } }, @@ -33,8 +33,8 @@ }, "collection_params": { "index_options": { "m": 32, "ef_construction": 128 } }, "search_params": [ - { "parallel": 1, "num_candidates": 128 }, { "parallel": 1, "num_candidates": 256 }, { "parallel": 1, "num_candidates": 512 }, - { "parallel": 100, "num_candidates": 128 }, { "parallel": 100, "num_candidates": 256 }, { "parallel": 100, "num_candidates": 512 } + { "parallel": 1, "params": {"num_candidates": 128} }, { "parallel": 1, "params": {"num_candidates": 256} }, { "parallel": 1, "params" :{"num_candidates": 512} }, + { "parallel": 100, "params": {"num_candidates": 128} }, { "parallel": 100, "params": {"num_candidates": 256} }, { "parallel": 100, "params" :{"num_candidates": 512} } ], "upload_params": { "parallel": 16 } }, @@ -46,8 +46,8 @@ }, "collection_params": { "index_options": { "m": 32, "ef_construction": 256 } }, "search_params": [ - { "parallel": 1, "num_candidates": 128 }, { "parallel": 1, "num_candidates": 256 }, { "parallel": 1, "num_candidates": 512 }, - { "parallel": 100, "num_candidates": 128 }, { "parallel": 100, "num_candidates": 256 }, { "parallel": 100, "num_candidates": 512 } + { "parallel": 1, "params": {"num_candidates": 128} }, { "parallel": 1, "params": {"num_candidates": 256} }, { "parallel": 1, "params" :{"num_candidates": 512} }, + { "parallel": 100, "params": {"num_candidates": 128} }, { "parallel": 100, "params": {"num_candidates": 256} }, { "parallel": 100, "params" :{"num_candidates": 512} } ], "upload_params": { "parallel": 16 } }, @@ -59,8 +59,8 @@ }, "collection_params": { "index_options": { "m": 32, "ef_construction": 512 } }, "search_params": [ - { "parallel": 1, "num_candidates": 128 }, { "parallel": 1, "num_candidates": 256 }, { "parallel": 1, "num_candidates": 512 }, - { "parallel": 100, "num_candidates": 128 }, { "parallel": 100, "num_candidates": 256 }, { "parallel": 100, "num_candidates": 512 } + { "parallel": 1, "params": {"num_candidates": 128} }, { "parallel": 1, "params": {"num_candidates": 256} }, { "parallel": 1, "params" :{"num_candidates": 512} }, + { "parallel": 100, "params": {"num_candidates": 128} }, { "parallel": 100, "params": {"num_candidates": 256} }, { "parallel": 100, "params" :{"num_candidates": 512} } ], "upload_params": { "parallel": 16 } }, @@ -72,8 +72,8 @@ }, "collection_params": { "index_options": { "m": 64, "ef_construction": 256 } }, "search_params": [ - { "parallel": 1, "num_candidates": 128 }, { "parallel": 1, "num_candidates": 256 }, { "parallel": 1, "num_candidates": 512 }, - { "parallel": 100, "num_candidates": 128 }, { "parallel": 100, "num_candidates": 256 }, { "parallel": 100, "num_candidates": 512 } + { "parallel": 1, "params": {"num_candidates": 128} }, { "parallel": 1, "params": {"num_candidates": 256} }, { "parallel": 1, "params" :{"num_candidates": 512} }, + { "parallel": 100, "params": {"num_candidates": 128} }, { "parallel": 100, "params": {"num_candidates": 256} }, { "parallel": 100, "params" :{"num_candidates": 512} } ], "upload_params": { "parallel": 16 } }, @@ -85,8 +85,8 @@ }, "collection_params": { "index_options": { "m": 64, "ef_construction": 512 } }, "search_params": [ - { "parallel": 1, "num_candidates": 128 }, { "parallel": 1, "num_candidates": 256 }, { "parallel": 1, "num_candidates": 512 }, - { "parallel": 100, "num_candidates": 128 }, { "parallel": 100, "num_candidates": 256 }, { "parallel": 100, "num_candidates": 512 } + { "parallel": 1, "params": {"num_candidates": 128} }, { "parallel": 1, "params": {"num_candidates": 256} }, { "parallel": 1, "params" :{"num_candidates": 512} }, + { "parallel": 100, "params": {"num_candidates": 128} }, { "parallel": 100, "params": {"num_candidates": 256} }, { "parallel": 100, "params" :{"num_candidates": 512} } ], "upload_params": { "parallel": 16 } }