From 100bf974c9adb987decb083cf8bfad87d9b3d7b6 Mon Sep 17 00:00:00 2001
From: filipecosta90 <filipecosta.90@gmail.com>
Date: Mon, 4 Mar 2024 12:22:55 +0000
Subject: [PATCH 01/12] Enabled api key elastic connections

---
 engine/clients/elasticsearch/config.py    | 43 ++++++++++++++++++++---
 engine/clients/elasticsearch/configure.py | 18 ++--------
 engine/clients/elasticsearch/search.py    | 10 ++----
 engine/clients/elasticsearch/upload.py    | 18 ++--------
 4 files changed, 45 insertions(+), 44 deletions(-)

diff --git a/engine/clients/elasticsearch/config.py b/engine/clients/elasticsearch/config.py
index 19b59d74..0658ed1e 100644
--- a/engine/clients/elasticsearch/config.py
+++ b/engine/clients/elasticsearch/config.py
@@ -1,4 +1,39 @@
-ELASTIC_PORT = 9200
-ELASTIC_INDEX = "bench"
-ELASTIC_USER = "elastic"
-ELASTIC_PASSWORD = "passwd"
+import os
+from elasticsearch import Elasticsearch
+
+ELASTIC_PORT = int(os.getenv("ELASTIC_PORT", 9200))
+ELASTIC_INDEX = os.getenv("ELASTIC_INDEX", "bench")
+ELASTIC_USER = os.getenv("ELASTIC_USER", "elastic")
+ELASTIC_PASSWORD = os.getenv("ELASTIC_PASSWORD", "passwd")
+ELASTIC_API_KEY = os.getenv("ELASTIC_API_KEY", None)
+ELASTIC_TIMEOUT = int(os.getenv("ELASTIC_TIMEOUT", 90))
+
+
+def get_es_client(host, connection_params):
+    client: Elasticsearch = None
+    init_params = {
+        **{
+            "verify_certs": False,
+            "request_timeout": ELASTIC_TIMEOUT,
+            "retry_on_timeout": True,
+        },
+        **connection_params,
+    }
+    if host.startswith("http"):
+        url = ""
+    else:
+        url = "http://"
+    url += f"{host}:{ELASTIC_PORT}"
+    if ELASTIC_API_KEY is None:
+        client = Elasticsearch(
+            url,
+            basic_auth=(ELASTIC_USER, ELASTIC_PASSWORD),
+            **init_params,
+        )
+    else:
+        client = Elasticsearch(
+            url,
+            api_key=ELASTIC_API_KEY,
+            **init_params,
+        )
+    return client
diff --git a/engine/clients/elasticsearch/configure.py b/engine/clients/elasticsearch/configure.py
index 76f64eb8..a4f08425 100644
--- a/engine/clients/elasticsearch/configure.py
+++ b/engine/clients/elasticsearch/configure.py
@@ -6,9 +6,7 @@
 from engine.base_client.distances import Distance
 from engine.clients.elasticsearch.config import (
     ELASTIC_INDEX,
-    ELASTIC_PASSWORD,
-    ELASTIC_PORT,
-    ELASTIC_USER,
+    get_es_client,
 )
 
 
@@ -25,19 +23,7 @@ class ElasticConfigurator(BaseConfigurator):
 
     def __init__(self, host, collection_params: dict, connection_params: dict):
         super().__init__(host, collection_params, connection_params)
-        init_params = {
-            **{
-                "verify_certs": False,
-                "request_timeout": 90,
-                "retry_on_timeout": True,
-            },
-            **connection_params,
-        }
-        self.client = Elasticsearch(
-            f"http://{host}:{ELASTIC_PORT}",
-            basic_auth=(ELASTIC_USER, ELASTIC_PASSWORD),
-            **init_params,
-        )
+        self.client = get_es_client(host, connection_params)
 
     def clean(self):
         try:
diff --git a/engine/clients/elasticsearch/search.py b/engine/clients/elasticsearch/search.py
index 29d20ec5..ed15113c 100644
--- a/engine/clients/elasticsearch/search.py
+++ b/engine/clients/elasticsearch/search.py
@@ -7,9 +7,7 @@
 from engine.base_client.search import BaseSearcher
 from engine.clients.elasticsearch.config import (
     ELASTIC_INDEX,
-    ELASTIC_PASSWORD,
-    ELASTIC_PORT,
-    ELASTIC_USER,
+    get_es_client,
 )
 from engine.clients.elasticsearch.parser import ElasticConditionParser
 
@@ -38,11 +36,7 @@ def init_client(cls, host, distance, connection_params: dict, search_params: dic
             },
             **connection_params,
         }
-        cls.client: Elasticsearch = Elasticsearch(
-            f"http://{host}:{ELASTIC_PORT}",
-            basic_auth=(ELASTIC_USER, ELASTIC_PASSWORD),
-            **init_params,
-        )
+        cls.client = get_es_client(host, connection_params)
         cls.search_params = search_params
 
     @classmethod
diff --git a/engine/clients/elasticsearch/upload.py b/engine/clients/elasticsearch/upload.py
index 0d5c6f2b..78842a25 100644
--- a/engine/clients/elasticsearch/upload.py
+++ b/engine/clients/elasticsearch/upload.py
@@ -7,9 +7,7 @@
 from engine.base_client.upload import BaseUploader
 from engine.clients.elasticsearch.config import (
     ELASTIC_INDEX,
-    ELASTIC_PASSWORD,
-    ELASTIC_PORT,
-    ELASTIC_USER,
+    get_es_client,
 )
 
 
@@ -28,19 +26,7 @@ def get_mp_start_method(cls):
 
     @classmethod
     def init_client(cls, host, distance, connection_params, upload_params):
-        init_params = {
-            **{
-                "verify_certs": False,
-                "request_timeout": 90,
-                "retry_on_timeout": True,
-            },
-            **connection_params,
-        }
-        cls.client = Elasticsearch(
-            f"http://{host}:{ELASTIC_PORT}",
-            basic_auth=(ELASTIC_USER, ELASTIC_PASSWORD),
-            **init_params,
-        )
+        cls.client = get_es_client(host, connection_params)
         cls.upload_params = upload_params
 
     @classmethod

From 6549035af6246e912a785a33d876d42e7303c4e6 Mon Sep 17 00:00:00 2001
From: filipecosta90 <filipecosta.90@gmail.com>
Date: Mon, 4 Mar 2024 12:47:06 +0000
Subject: [PATCH 02/12] Fixes per pre-commit hook

---
 engine/clients/elasticsearch/config.py    | 1 +
 engine/clients/elasticsearch/configure.py | 5 +----
 engine/clients/elasticsearch/search.py    | 5 +----
 engine/clients/elasticsearch/upload.py    | 5 +----
 4 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/engine/clients/elasticsearch/config.py b/engine/clients/elasticsearch/config.py
index 0658ed1e..a988eee7 100644
--- a/engine/clients/elasticsearch/config.py
+++ b/engine/clients/elasticsearch/config.py
@@ -1,4 +1,5 @@
 import os
+
 from elasticsearch import Elasticsearch
 
 ELASTIC_PORT = int(os.getenv("ELASTIC_PORT", 9200))
diff --git a/engine/clients/elasticsearch/configure.py b/engine/clients/elasticsearch/configure.py
index a4f08425..c07e5939 100644
--- a/engine/clients/elasticsearch/configure.py
+++ b/engine/clients/elasticsearch/configure.py
@@ -4,10 +4,7 @@
 from engine.base_client import IncompatibilityError
 from engine.base_client.configure import BaseConfigurator
 from engine.base_client.distances import Distance
-from engine.clients.elasticsearch.config import (
-    ELASTIC_INDEX,
-    get_es_client,
-)
+from engine.clients.elasticsearch.config import ELASTIC_INDEX, get_es_client
 
 
 class ElasticConfigurator(BaseConfigurator):
diff --git a/engine/clients/elasticsearch/search.py b/engine/clients/elasticsearch/search.py
index ed15113c..a20b2121 100644
--- a/engine/clients/elasticsearch/search.py
+++ b/engine/clients/elasticsearch/search.py
@@ -5,10 +5,7 @@
 from elasticsearch import Elasticsearch
 
 from engine.base_client.search import BaseSearcher
-from engine.clients.elasticsearch.config import (
-    ELASTIC_INDEX,
-    get_es_client,
-)
+from engine.clients.elasticsearch.config import ELASTIC_INDEX, get_es_client
 from engine.clients.elasticsearch.parser import ElasticConditionParser
 
 
diff --git a/engine/clients/elasticsearch/upload.py b/engine/clients/elasticsearch/upload.py
index 78842a25..9b24ca5d 100644
--- a/engine/clients/elasticsearch/upload.py
+++ b/engine/clients/elasticsearch/upload.py
@@ -5,10 +5,7 @@
 from elasticsearch import Elasticsearch
 
 from engine.base_client.upload import BaseUploader
-from engine.clients.elasticsearch.config import (
-    ELASTIC_INDEX,
-    get_es_client,
-)
+from engine.clients.elasticsearch.config import ELASTIC_INDEX, get_es_client
 
 
 class ClosableElastic(Elasticsearch):

From 3f314d82659cac25d0c983161147706506061879 Mon Sep 17 00:00:00 2001
From: filipecosta90 <filipecosta.90@gmail.com>
Date: Mon, 4 Mar 2024 13:57:43 +0000
Subject: [PATCH 03/12] popping the parallel config from a deep copy of
 search_params

---
 engine/clients/elasticsearch/search.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/engine/clients/elasticsearch/search.py b/engine/clients/elasticsearch/search.py
index a20b2121..b7b09e2f 100644
--- a/engine/clients/elasticsearch/search.py
+++ b/engine/clients/elasticsearch/search.py
@@ -1,3 +1,4 @@
+import copy
 import multiprocessing as mp
 import uuid
 from typing import List, Tuple
@@ -34,7 +35,9 @@ def init_client(cls, host, distance, connection_params: dict, search_params: dic
             **connection_params,
         }
         cls.client = get_es_client(host, connection_params)
-        cls.search_params = search_params
+        cls.search_params = copy.deepcopy(search_params)
+        # pop parallel
+        cls.search_params.pop("parallel", "1")
 
     @classmethod
     def search_one(cls, vector, meta_conditions, top) -> List[Tuple[int, float]]:

From 6bd120fa70c8c7edca555a240a5d7b2d26e9595f Mon Sep 17 00:00:00 2001
From: filipe oliveira <filipecosta.90@gmail.com>
Date: Fri, 22 Mar 2024 10:46:32 +0000
Subject: [PATCH 04/12] Update engine/clients/elasticsearch/config.py

Co-authored-by: Kumar Shivendu <kshivendu1@gmail.com>
---
 engine/clients/elasticsearch/config.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/engine/clients/elasticsearch/config.py b/engine/clients/elasticsearch/config.py
index a988eee7..2e8ffe4e 100644
--- a/engine/clients/elasticsearch/config.py
+++ b/engine/clients/elasticsearch/config.py
@@ -13,11 +13,9 @@
 def get_es_client(host, connection_params):
     client: Elasticsearch = None
     init_params = {
-        **{
-            "verify_certs": False,
-            "request_timeout": ELASTIC_TIMEOUT,
-            "retry_on_timeout": True,
-        },
+        "verify_certs": False,
+        "request_timeout": ELASTIC_TIMEOUT,
+        "retry_on_timeout": True,
         **connection_params,
     }
     if host.startswith("http"):

From d976cf6df2243036a65517d699e70d16451f5b4f Mon Sep 17 00:00:00 2001
From: filipe oliveira <filipecosta.90@gmail.com>
Date: Fri, 22 Mar 2024 10:47:09 +0000
Subject: [PATCH 05/12] Update engine/clients/elasticsearch/config.py

Co-authored-by: Kumar Shivendu <kshivendu1@gmail.com>
---
 engine/clients/elasticsearch/config.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/engine/clients/elasticsearch/config.py b/engine/clients/elasticsearch/config.py
index 2e8ffe4e..37ea2682 100644
--- a/engine/clients/elasticsearch/config.py
+++ b/engine/clients/elasticsearch/config.py
@@ -11,7 +11,6 @@
 
 
 def get_es_client(host, connection_params):
-    client: Elasticsearch = None
     init_params = {
         "verify_certs": False,
         "request_timeout": ELASTIC_TIMEOUT,

From 06ac0f8e846ace388970546a86d365b392b13fc7 Mon Sep 17 00:00:00 2001
From: filipe oliveira <filipecosta.90@gmail.com>
Date: Fri, 22 Mar 2024 10:51:33 +0000
Subject: [PATCH 06/12] Update engine/clients/elasticsearch/config.py

Co-authored-by: Kumar Shivendu <kshivendu1@gmail.com>
---
 engine/clients/elasticsearch/config.py | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/engine/clients/elasticsearch/config.py b/engine/clients/elasticsearch/config.py
index 37ea2682..0f31d0f6 100644
--- a/engine/clients/elasticsearch/config.py
+++ b/engine/clients/elasticsearch/config.py
@@ -23,15 +23,6 @@ def get_es_client(host, connection_params):
         url = "http://"
     url += f"{host}:{ELASTIC_PORT}"
     if ELASTIC_API_KEY is None:
-        client = Elasticsearch(
-            url,
-            basic_auth=(ELASTIC_USER, ELASTIC_PASSWORD),
-            **init_params,
-        )
+        return Elasticsearch(url, basic_auth=(ELASTIC_USER, ELASTIC_PASSWORD), **init_params)
     else:
-        client = Elasticsearch(
-            url,
-            api_key=ELASTIC_API_KEY,
-            **init_params,
-        )
-    return client
+        return Elasticsearch(url, api_key=ELASTIC_API_KEY, **init_params)

From 9751080444412920f2b918a0dd83d24b68209772 Mon Sep 17 00:00:00 2001
From: filipecosta90 <filipecosta.90@gmail.com>
Date: Thu, 4 Apr 2024 10:34:21 +0100
Subject: [PATCH 07/12] Removed api_key usage per review

---
 engine/clients/elasticsearch/config.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/engine/clients/elasticsearch/config.py b/engine/clients/elasticsearch/config.py
index 0f31d0f6..9084c745 100644
--- a/engine/clients/elasticsearch/config.py
+++ b/engine/clients/elasticsearch/config.py
@@ -6,7 +6,6 @@
 ELASTIC_INDEX = os.getenv("ELASTIC_INDEX", "bench")
 ELASTIC_USER = os.getenv("ELASTIC_USER", "elastic")
 ELASTIC_PASSWORD = os.getenv("ELASTIC_PASSWORD", "passwd")
-ELASTIC_API_KEY = os.getenv("ELASTIC_API_KEY", None)
 ELASTIC_TIMEOUT = int(os.getenv("ELASTIC_TIMEOUT", 90))
 
 
@@ -22,7 +21,6 @@ def get_es_client(host, connection_params):
     else:
         url = "http://"
     url += f"{host}:{ELASTIC_PORT}"
-    if ELASTIC_API_KEY is None:
-        return Elasticsearch(url, basic_auth=(ELASTIC_USER, ELASTIC_PASSWORD), **init_params)
-    else:
-        return Elasticsearch(url, api_key=ELASTIC_API_KEY, **init_params)
+    return Elasticsearch(
+        url, basic_auth=(ELASTIC_USER, ELASTIC_PASSWORD), **init_params
+    )

From 5d7e5176db98fffd8a067fe5120c563e3637deb1 Mon Sep 17 00:00:00 2001
From: filipecosta90 <filipecosta.90@gmail.com>
Date: Thu, 4 Apr 2024 11:06:03 +0100
Subject: [PATCH 08/12] Enable specifying the elastic index timeouts. disabled
 ssl warnings

---
 engine/clients/elasticsearch/config.py    | 38 +++++++++++++++++++----
 engine/clients/elasticsearch/configure.py | 20 +++++++++---
 engine/clients/elasticsearch/upload.py    | 31 +++++++++++++++---
 3 files changed, 74 insertions(+), 15 deletions(-)

diff --git a/engine/clients/elasticsearch/config.py b/engine/clients/elasticsearch/config.py
index 9084c745..024ca97b 100644
--- a/engine/clients/elasticsearch/config.py
+++ b/engine/clients/elasticsearch/config.py
@@ -1,19 +1,28 @@
 import os
+import time
 
+import urllib3
 from elasticsearch import Elasticsearch
 
 ELASTIC_PORT = int(os.getenv("ELASTIC_PORT", 9200))
 ELASTIC_INDEX = os.getenv("ELASTIC_INDEX", "bench")
 ELASTIC_USER = os.getenv("ELASTIC_USER", "elastic")
 ELASTIC_PASSWORD = os.getenv("ELASTIC_PASSWORD", "passwd")
-ELASTIC_TIMEOUT = int(os.getenv("ELASTIC_TIMEOUT", 90))
+ELASTIC_TIMEOUT = int(os.getenv("ELASTIC_TIMEOUT", 300))
+ELASTIC_INDEX_TIMEOUT = os.getenv("ELASTIC_INDEX_TIMEOUT", "30m")
+ELASTIC_INDEX_REFRESH_INTERVAL = os.getenv("ELASTIC_INDEX_REFRESH_INTERVAL", "-1")
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 
 
 def get_es_client(host, connection_params):
+    client: Elasticsearch = None
     init_params = {
-        "verify_certs": False,
-        "request_timeout": ELASTIC_TIMEOUT,
-        "retry_on_timeout": True,
+        **{
+            "verify_certs": False,
+            "request_timeout": ELASTIC_TIMEOUT,
+            "retry_on_timeout": True,
+            "ssl_show_warn": False,
+        },
         **connection_params,
     }
     if host.startswith("http"):
@@ -21,6 +30,23 @@ def get_es_client(host, connection_params):
     else:
         url = "http://"
     url += f"{host}:{ELASTIC_PORT}"
-    return Elasticsearch(
-        url, basic_auth=(ELASTIC_USER, ELASTIC_PASSWORD), **init_params
+    client = Elasticsearch(
+        url,
+        basic_auth=(ELASTIC_USER, ELASTIC_PASSWORD),
+        **init_params,
     )
+    assert client.ping()
+    return client
+
+
+def _wait_for_es_status(client, status="yellow"):
+    print(f"waiting for ES {status} status...")
+    for _ in range(100):
+        try:
+            client.cluster.health(wait_for_status=status)
+            return client
+        except ConnectionError:
+            time.sleep(0.1)
+    else:
+        # timeout
+        raise Exception("Elasticsearch failed to start.")
diff --git a/engine/clients/elasticsearch/configure.py b/engine/clients/elasticsearch/configure.py
index c07e5939..d2a09db3 100644
--- a/engine/clients/elasticsearch/configure.py
+++ b/engine/clients/elasticsearch/configure.py
@@ -1,10 +1,15 @@
-from elasticsearch import Elasticsearch, NotFoundError
+from elasticsearch import NotFoundError
 
 from benchmark.dataset import Dataset
 from engine.base_client import IncompatibilityError
 from engine.base_client.configure import BaseConfigurator
 from engine.base_client.distances import Distance
-from engine.clients.elasticsearch.config import ELASTIC_INDEX, get_es_client
+from engine.clients.elasticsearch.config import (
+    ELASTIC_INDEX,
+    ELASTIC_INDEX_REFRESH_INTERVAL,
+    ELASTIC_INDEX_TIMEOUT,
+    get_es_client,
+)
 
 
 class ElasticConfigurator(BaseConfigurator):
@@ -23,12 +28,16 @@ def __init__(self, host, collection_params: dict, connection_params: dict):
         self.client = get_es_client(host, connection_params)
 
     def clean(self):
+        print("Ensuring the index does not exist...")
         try:
             self.client.indices.delete(
-                index=ELASTIC_INDEX, timeout="5m", master_timeout="5m"
+                index=ELASTIC_INDEX,
+                timeout=ELASTIC_INDEX_TIMEOUT,
+                master_timeout=ELASTIC_INDEX_TIMEOUT,
             )
         except NotFoundError:
             pass
+        print("Finished ensuring the index does not exist...")
 
     def recreate(self, dataset: Dataset, collection_params):
         if dataset.config.distance == Distance.DOT:
@@ -39,11 +48,14 @@ def recreate(self, dataset: Dataset, collection_params):
 
         self.client.indices.create(
             index=ELASTIC_INDEX,
+            timeout=ELASTIC_INDEX_TIMEOUT,
+            master_timeout=ELASTIC_INDEX_TIMEOUT,
+            wait_for_active_shards="all",
             settings={
                 "index": {
                     "number_of_shards": 1,
                     "number_of_replicas": 0,
-                    "refresh_interval": -1,
+                    "refresh_interval": ELASTIC_INDEX_REFRESH_INTERVAL,
                 }
             },
             mappings={
diff --git a/engine/clients/elasticsearch/upload.py b/engine/clients/elasticsearch/upload.py
index 9b24ca5d..7c3174ba 100644
--- a/engine/clients/elasticsearch/upload.py
+++ b/engine/clients/elasticsearch/upload.py
@@ -2,10 +2,15 @@
 import uuid
 from typing import List, Optional
 
-from elasticsearch import Elasticsearch
+import elastic_transport
+from elasticsearch import ApiError, Elasticsearch
 
 from engine.base_client.upload import BaseUploader
-from engine.clients.elasticsearch.config import ELASTIC_INDEX, get_es_client
+from engine.clients.elasticsearch.config import (
+    ELASTIC_INDEX,
+    _wait_for_es_status,
+    get_es_client,
+)
 
 
 class ClosableElastic(Elasticsearch):
@@ -48,7 +53,23 @@ def upload_batch(
 
     @classmethod
     def post_upload(cls, _distance):
-        cls.client.indices.forcemerge(
-            index=ELASTIC_INDEX, wait_for_completion=True, max_num_segments=1
-        )
+        print("forcing the merge into 1 segment...")
+        tries = 30
+        for i in range(tries + 1):
+            try:
+                cls.client.indices.forcemerge(
+                    index=ELASTIC_INDEX, wait_for_completion=True, max_num_segments=1
+                )
+            except (elastic_transport.TlsError, ApiError) as e:
+                if i < tries:
+                    print(
+                        "Received the following error during retry {}/{} while waiting for ES index to be ready... {}".format(
+                            i, tries, e.__str__()
+                        )
+                    )
+                    continue
+                else:
+                    raise
+            _wait_for_es_status(cls.client)
+            break
         return {}

From 0a2c99c8cc53c3cd553bb1b39b5632e1bcccf420 Mon Sep 17 00:00:00 2001
From: Weaviate Git Bot <83967968+weaviate-git-bot@users.noreply.github.com>
Date: Tue, 9 Apr 2024 00:45:35 -0500
Subject: [PATCH 09/12] Updated Weaviate Docker image url (auto PR by bot)
 (#109)

* updated the Weaviate Docker image location (automated bot update)

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* updated the Weaviate Docker image location (automated bot update)

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 engine/servers/weaviate-single-node/docker-compose.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/engine/servers/weaviate-single-node/docker-compose.yaml b/engine/servers/weaviate-single-node/docker-compose.yaml
index 9291687a..e5e9e9ec 100644
--- a/engine/servers/weaviate-single-node/docker-compose.yaml
+++ b/engine/servers/weaviate-single-node/docker-compose.yaml
@@ -8,7 +8,7 @@ services:
     - '8090'
     - --scheme
     - http
-    image: semitechnologies/weaviate:1.24.1
+    image: cr.weaviate.io/semitechnologies/weaviate:1.24.1
     network_mode: host
     logging:
       driver: "json-file"

From fa5b6b6197ecccdf9e655109257b4d90cb1d8083 Mon Sep 17 00:00:00 2001
From: Andrew Kane <acekane1@gmail.com>
Date: Wed, 10 Apr 2024 23:49:01 -0700
Subject: [PATCH 10/12] pgvector improvements (#98)

* pgvector improvements

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Updated Postgres parameters

* Use versioned Docker image

* Updated pgvector to 0.6.2

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 engine/clients/pgvector/config.py             |  3 +-
 engine/clients/pgvector/configure.py          | 16 ------
 engine/clients/pgvector/search.py             | 25 ++++------
 engine/clients/pgvector/upload.py             | 25 +++++++++-
 .../pgvector-single-node/docker-compose.yaml  |  8 ++-
 .../configurations/pgvector-single-node.json  | 50 +++++++------------
 6 files changed, 59 insertions(+), 68 deletions(-)

diff --git a/engine/clients/pgvector/config.py b/engine/clients/pgvector/config.py
index dc3b8365..5507745c 100644
--- a/engine/clients/pgvector/config.py
+++ b/engine/clients/pgvector/config.py
@@ -1,6 +1,6 @@
 import os
 
-PGVECTOR_PORT = int(os.getenv("PGVECTOR_PORT", 9200))
+PGVECTOR_PORT = int(os.getenv("PGVECTOR_PORT", 5432))
 PGVECTOR_DB = os.getenv("PGVECTOR_DB", "postgres")
 PGVECTOR_USER = os.getenv("PGVECTOR_USER", "postgres")
 PGVECTOR_PASSWORD = os.getenv("PGVECTOR_PASSWORD", "passwd")
@@ -9,6 +9,7 @@
 def get_db_config(host, connection_params):
     return {
         "host": host or "localhost",
+        "port": PGVECTOR_PORT,
         "dbname": PGVECTOR_DB,
         "user": PGVECTOR_USER,
         "password": PGVECTOR_PASSWORD,
diff --git a/engine/clients/pgvector/configure.py b/engine/clients/pgvector/configure.py
index d5587431..0da692b2 100644
--- a/engine/clients/pgvector/configure.py
+++ b/engine/clients/pgvector/configure.py
@@ -9,11 +9,6 @@
 
 
 class PgVectorConfigurator(BaseConfigurator):
-    DISTANCE_MAPPING = {
-        Distance.L2: "vector_l2_ops",
-        Distance.COSINE: "vector_cosine_ops",
-    }
-
     def __init__(self, host, collection_params: dict, connection_params: dict):
         super().__init__(host, collection_params, connection_params)
         self.conn = psycopg.connect(**get_db_config(host, connection_params))
@@ -38,17 +33,6 @@ def recreate(self, dataset: Dataset, collection_params):
         )
         self.conn.execute("ALTER TABLE items ALTER COLUMN embedding SET STORAGE PLAIN")
 
-        try:
-            hnsw_distance_type = self.DISTANCE_MAPPING[dataset.config.distance]
-        except KeyError:
-            raise IncompatibilityError(
-                f"Unsupported distance metric: {dataset.config.distance}"
-            )
-
-        self.conn.execute(
-            f"CREATE INDEX on items USING hnsw(embedding {hnsw_distance_type}) WITH (m = {collection_params['hnsw_config']['m']}, ef_construction = {collection_params['hnsw_config']['ef_construct']})"
-        )
-
         self.conn.close()
 
     def delete_client(self):
diff --git a/engine/clients/pgvector/search.py b/engine/clients/pgvector/search.py
index fa8bde5a..62f53035 100644
--- a/engine/clients/pgvector/search.py
+++ b/engine/clients/pgvector/search.py
@@ -23,24 +23,19 @@ def init_client(cls, host, distance, connection_params: dict, search_params: dic
         cls.conn = psycopg.connect(**get_db_config(host, connection_params))
         register_vector(cls.conn)
         cls.cur = cls.conn.cursor()
-        cls.distance = distance
-        cls.search_params = search_params["search_params"]
-
-    @classmethod
-    def search_one(cls, vector, meta_conditions, top) -> List[Tuple[int, float]]:
-        cls.cur.execute(f"SET hnsw.ef_search = {cls.search_params['hnsw_ef']}")
-
-        if cls.distance == Distance.COSINE:
-            query = f"SELECT id, embedding <=> %s AS _score FROM items ORDER BY _score LIMIT {top};"
-        elif cls.distance == Distance.L2:
-            query = f"SELECT id, embedding <-> %s AS _score FROM items ORDER BY _score LIMIT {top};"
+        cls.cur.execute(
+            f"SET hnsw.ef_search = {search_params['search_params']['hnsw_ef']}"
+        )
+        if distance == Distance.COSINE:
+            cls.query = f"SELECT id, embedding <=> %s AS _score FROM items ORDER BY _score LIMIT %s"
+        elif distance == Distance.L2:
+            cls.query = f"SELECT id, embedding <-> %s AS _score FROM items ORDER BY _score LIMIT %s"
         else:
             raise NotImplementedError(f"Unsupported distance metric {cls.distance}")
 
-        cls.cur.execute(
-            query,
-            (np.array(vector),),
-        )
+    @classmethod
+    def search_one(cls, vector, meta_conditions, top) -> List[Tuple[int, float]]:
+        cls.cur.execute(cls.query, (np.array(vector), top), binary=True, prepare=True)
         return cls.cur.fetchall()
 
     @classmethod
diff --git a/engine/clients/pgvector/upload.py b/engine/clients/pgvector/upload.py
index 8d59ee7f..c3921e95 100644
--- a/engine/clients/pgvector/upload.py
+++ b/engine/clients/pgvector/upload.py
@@ -4,11 +4,16 @@
 import psycopg
 from pgvector.psycopg import register_vector
 
+from engine.base_client.distances import Distance
 from engine.base_client.upload import BaseUploader
 from engine.clients.pgvector.config import get_db_config
 
 
 class PgVectorUploader(BaseUploader):
+    DISTANCE_MAPPING = {
+        Distance.L2: "vector_l2_ops",
+        Distance.COSINE: "vector_cosine_ops",
+    }
     conn = None
     cur = None
     upload_params = {}
@@ -27,10 +32,28 @@ def upload_batch(
         vectors = np.array(vectors)
 
         # Copy is faster than insert
-        with cls.cur.copy("COPY items (id, embedding) FROM STDIN") as copy:
+        with cls.cur.copy(
+            "COPY items (id, embedding) FROM STDIN WITH (FORMAT BINARY)"
+        ) as copy:
+            copy.set_types(["integer", "vector"])
             for i, embedding in zip(ids, vectors):
                 copy.write_row((i, embedding))
 
+    @classmethod
+    def post_upload(cls, distance):
+        try:
+            hnsw_distance_type = cls.DISTANCE_MAPPING[distance]
+        except KeyError:
+            raise IncompatibilityError(f"Unsupported distance metric: {distance}")
+
+        cls.conn.execute("SET max_parallel_workers = 128")
+        cls.conn.execute("SET max_parallel_maintenance_workers = 128")
+        cls.conn.execute(
+            f"CREATE INDEX ON items USING hnsw (embedding {hnsw_distance_type}) WITH (m = {cls.upload_params['hnsw_config']['m']}, ef_construction = {cls.upload_params['hnsw_config']['ef_construct']})"
+        )
+
+        return {}
+
     @classmethod
     def delete_client(cls):
         if cls.cur:
diff --git a/engine/servers/pgvector-single-node/docker-compose.yaml b/engine/servers/pgvector-single-node/docker-compose.yaml
index ea554d92..13e8b8e6 100644
--- a/engine/servers/pgvector-single-node/docker-compose.yaml
+++ b/engine/servers/pgvector-single-node/docker-compose.yaml
@@ -3,13 +3,17 @@ version: '3.7'
 services:
   pgvector:
     container_name: pgvector
-    image: ankane/pgvector:v0.5.1
+    image: pgvector/pgvector:0.6.2-pg16
     environment:
       - POSTGRES_DB=postgres
       - POSTGRES_USER=postgres
       - POSTGRES_PASSWORD=passwd
       - POSTGRES_HOST_AUTH_METHOD=trust
-      - POSTGRES_MAX_CONNECTIONS=200
+    # shared_buffers should be 25% of memory
+    # maintenance_work_mem should be ~65%
+    command: postgres -c shared_buffers=6GB -c maintenance_work_mem=16GB -c max_connections=200
+    # shm_size should be shared_buffers + maintenance_work_mem
+    shm_size: 22g
     ports:
       - 5432:5432
     logging:
diff --git a/experiments/configurations/pgvector-single-node.json b/experiments/configurations/pgvector-single-node.json
index e1c8e33a..22ced04d 100644
--- a/experiments/configurations/pgvector-single-node.json
+++ b/experiments/configurations/pgvector-single-node.json
@@ -3,104 +3,88 @@
         "name": "pgvector-default",
         "engine": "pgvector",
         "connection_params": {},
-        "collection_params": {
-          "hnsw_config": { "m": 16, "ef_construct": 128 }
-        },
+        "collection_params": {},
         "search_params": [
-          { "parallel": 1, "search_params": { "hnsw_ef": 128 } }
+          { "parallel": 8, "search_params": { "hnsw_ef": 128 } }
         ],
-        "upload_params": { "parallel": 1, "batch_size": 1024 }
+        "upload_params": { "parallel": 16, "batch_size": 1024, "hnsw_config": { "m": 16, "ef_construct": 128 } }
     },
     {
       "name": "pgvector-parallel",
       "engine": "pgvector",
       "connection_params": {},
-      "collection_params": {
-        "hnsw_config": { "m": 16, "ef_construct": 128 }
-      },
+      "collection_params": {},
       "search_params": [
         { "parallel": 8, "search_params": { "hnsw_ef": 128 } },
         { "parallel": 16, "search_params": { "hnsw_ef": 128 } },
         { "parallel": 100, "search_params": { "hnsw_ef": 128 } }
       ],
-      "upload_params": { "parallel": 1, "batch_size": 1024 }
+      "upload_params": { "parallel": 1, "batch_size": 1024, "hnsw_config": { "m": 16, "ef_construct": 128 } }
     },
     {
         "name": "pgvector-m-16-ef-128",
         "engine": "pgvector",
         "connection_params": {},
-        "collection_params": {
-          "hnsw_config": { "m": 16, "ef_construct": 128 }
-        },
+        "collection_params": {},
         "search_params": [
           { "parallel": 1, "search_params": { "hnsw_ef": 64 } }, { "parallel": 1, "search_params": { "hnsw_ef": 128 } }, { "parallel": 1, "search_params": { "hnsw_ef": 256 } }, { "parallel": 1, "search_params": { "hnsw_ef": 512 } },
           { "parallel": 100, "search_params": { "hnsw_ef": 64 } }, { "parallel": 100, "search_params": { "hnsw_ef": 128 } }, { "parallel": 100, "search_params": { "hnsw_ef": 256 } }, { "parallel": 100, "search_params": { "hnsw_ef": 512 } }
         ],
-        "upload_params": { "parallel": 16 }
+        "upload_params": { "parallel": 16, "hnsw_config": { "m": 16, "ef_construct": 128 } }
     },
     {
         "name": "pgvector-m-32-ef-128",
         "engine": "pgvector",
         "connection_params": {},
-        "collection_params": {
-          "hnsw_config": { "m": 32, "ef_construct": 128 }
-        },
+        "collection_params": {},
         "search_params": [
           { "parallel": 1, "search_params": { "hnsw_ef": 64 } }, { "parallel": 1, "search_params": { "hnsw_ef": 128 } }, { "parallel": 1, "search_params": { "hnsw_ef": 256 } }, { "parallel": 1, "search_params": { "hnsw_ef": 512 } },
           { "parallel": 100, "search_params": { "hnsw_ef": 64 } }, { "parallel": 100, "search_params": { "hnsw_ef": 128 } }, { "parallel": 100, "search_params": { "hnsw_ef": 256 } }, { "parallel": 100, "search_params": { "hnsw_ef": 512 } }
         ],
-        "upload_params": { "parallel": 16 }
+        "upload_params": { "parallel": 16, "hnsw_config": { "m": 32, "ef_construct": 128 } }
     },
     {
         "name": "pgvector-m-32-ef-256",
         "engine": "pgvector",
         "connection_params": {},
-        "collection_params": {
-          "hnsw_config": { "m": 32, "ef_construct": 256 }
-        },
+        "collection_params": {},
         "search_params": [
           { "parallel": 1, "search_params": { "hnsw_ef": 64 } }, { "parallel": 1, "search_params": { "hnsw_ef": 128 } }, { "parallel": 1, "search_params": { "hnsw_ef": 256 } }, { "parallel": 1, "search_params": { "hnsw_ef": 512 } },
           { "parallel": 100, "search_params": { "hnsw_ef": 64 } }, { "parallel": 100, "search_params": { "hnsw_ef": 128 } }, { "parallel": 100, "search_params": { "hnsw_ef": 256 } }, { "parallel": 100, "search_params": { "hnsw_ef": 512 } }
         ],
-        "upload_params": { "parallel": 16 }
+        "upload_params": { "parallel": 16, "hnsw_config": { "m": 32, "ef_construct": 256 } }
     },
     {
         "name": "pgvector-m-32-ef-512",
         "engine": "pgvector",
         "connection_params": {},
-        "collection_params": {
-          "hnsw_config": { "m": 32, "ef_construct": 512 }
-        },
+        "collection_params": {},
         "search_params": [
           { "parallel": 1, "search_params": { "hnsw_ef": 64 } }, { "parallel": 1, "search_params": { "hnsw_ef": 128 } }, { "parallel": 1, "search_params": { "hnsw_ef": 256 } }, { "parallel": 1, "search_params": { "hnsw_ef": 512 } },
           { "parallel": 100, "search_params": { "hnsw_ef": 64 } }, { "parallel": 100, "search_params": { "hnsw_ef": 128 } }, { "parallel": 100, "search_params": { "hnsw_ef": 256 } }, { "parallel": 100, "search_params": { "hnsw_ef": 512 } }
         ],
-        "upload_params": { "parallel": 16 }
+        "upload_params": { "parallel": 16, "hnsw_config": { "m": 32, "ef_construct": 512 } }
     },
     {
         "name": "pgvector-m-64-ef-256",
         "engine": "pgvector",
         "connection_params": {},
-        "collection_params": {
-          "hnsw_config": { "m": 64, "ef_construct": 256 }
-        },
+        "collection_params": {},
         "search_params": [
           { "parallel": 1, "search_params": { "hnsw_ef": 64 } }, { "parallel": 1, "search_params": { "hnsw_ef": 128 } }, { "parallel": 1, "search_params": { "hnsw_ef": 256 } }, { "parallel": 1, "search_params": { "hnsw_ef": 512 } },
           { "parallel": 100, "search_params": { "hnsw_ef": 64 } }, { "parallel": 100, "search_params": { "hnsw_ef": 128 } }, { "parallel": 100, "search_params": { "hnsw_ef": 256 } }, { "parallel": 100, "search_params": { "hnsw_ef": 512 } }
         ],
-        "upload_params": { "parallel": 16 }
+        "upload_params": { "parallel": 16, "hnsw_config": { "m": 64, "ef_construct": 256 } }
     },
     {
         "name": "pgvector-m-64-ef-512",
         "engine": "pgvector",
         "connection_params": {},
-        "collection_params": {
-          "hnsw_config": { "m": 64, "ef_construct": 512 }
-        },
+        "collection_params": {},
         "search_params": [
           { "parallel": 1, "search_params": { "hnsw_ef": 64 } }, { "parallel": 1, "search_params": { "hnsw_ef": 128 } }, { "parallel": 1, "search_params": { "hnsw_ef": 256 } }, { "parallel": 1, "search_params": { "hnsw_ef": 512 } },
           { "parallel": 100, "search_params": { "hnsw_ef": 64 } }, { "parallel": 100, "search_params": { "hnsw_ef": 128 } }, { "parallel": 100, "search_params": { "hnsw_ef": 256 } }, { "parallel": 100, "search_params": { "hnsw_ef": 512 } }
         ],
-        "upload_params": { "parallel": 16 }
+        "upload_params": { "parallel": 16, "hnsw_config": { "m": 64, "ef_construct": 512 } }
     }
 ]

From 37b62fb6c45787394c127e732617308ec4b16100 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 11 Apr 2024 13:26:37 +0530
Subject: [PATCH 11/12] [pre-commit.ci] pre-commit suggestions (#47)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/pre-commit/pre-commit-hooks: v4.4.0 → v4.5.0](https://github.com/pre-commit/pre-commit-hooks/compare/v4.4.0...v4.5.0)
- [github.com/psf/black: 22.12.0 → 24.3.0](https://github.com/psf/black/compare/22.12.0...24.3.0)
- [github.com/PyCQA/isort: 5.12.0 → 5.13.2](https://github.com/PyCQA/isort/compare/5.12.0...5.13.2)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 754906cd..6817ea9d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -11,19 +11,19 @@ ci:
 
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.4.0
+    rev: v4.5.0
     hooks:
       - id: trailing-whitespace
       - id: check-added-large-files
 
   - repo: https://github.com/psf/black
-    rev: 22.12.0
+    rev: 24.3.0
     hooks:
       - id: black
         name: "Black: The uncompromising Python code formatter"
 
   - repo: https://github.com/PyCQA/isort
-    rev: 5.12.0
+    rev: 5.13.2
     hooks:
       - id: isort
         name: "Sort Imports"

From 4d6b8b7820aabe37ac822461ea7f089aa441763b Mon Sep 17 00:00:00 2001
From: KShivendu <kshivendu1@gmail.com>
Date: Thu, 11 Apr 2024 14:24:56 +0530
Subject: [PATCH 12/12] refactor: Nest search params in ES config

---
 engine/clients/elasticsearch/config.py        | 20 +++++--------
 engine/clients/elasticsearch/configure.py     |  8 ++----
 engine/clients/elasticsearch/search.py        | 17 ++---------
 engine/clients/elasticsearch/upload.py        |  3 +-
 .../elasticsearch-single-node.json            | 28 +++++++++----------
 5 files changed, 27 insertions(+), 49 deletions(-)

diff --git a/engine/clients/elasticsearch/config.py b/engine/clients/elasticsearch/config.py
index 024ca97b..60bf20bd 100644
--- a/engine/clients/elasticsearch/config.py
+++ b/engine/clients/elasticsearch/config.py
@@ -8,33 +8,27 @@
 ELASTIC_INDEX = os.getenv("ELASTIC_INDEX", "bench")
 ELASTIC_USER = os.getenv("ELASTIC_USER", "elastic")
 ELASTIC_PASSWORD = os.getenv("ELASTIC_PASSWORD", "passwd")
+
 ELASTIC_TIMEOUT = int(os.getenv("ELASTIC_TIMEOUT", 300))
 ELASTIC_INDEX_TIMEOUT = os.getenv("ELASTIC_INDEX_TIMEOUT", "30m")
 ELASTIC_INDEX_REFRESH_INTERVAL = os.getenv("ELASTIC_INDEX_REFRESH_INTERVAL", "-1")
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 
 
 def get_es_client(host, connection_params):
     client: Elasticsearch = None
     init_params = {
-        **{
-            "verify_certs": False,
-            "request_timeout": ELASTIC_TIMEOUT,
-            "retry_on_timeout": True,
-            "ssl_show_warn": False,
-        },
+        "verify_certs": False,
+        "request_timeout": ELASTIC_TIMEOUT,
+        "retry_on_timeout": True,
+        "ssl_show_warn": False,
         **connection_params,
     }
-    if host.startswith("http"):
-        url = ""
-    else:
-        url = "http://"
-    url += f"{host}:{ELASTIC_PORT}"
     client = Elasticsearch(
-        url,
+        f"http://{host}:{ELASTIC_PORT}",
         basic_auth=(ELASTIC_USER, ELASTIC_PASSWORD),
         **init_params,
     )
+    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
     assert client.ping()
     return client
 
diff --git a/engine/clients/elasticsearch/configure.py b/engine/clients/elasticsearch/configure.py
index d2a09db3..c770b4da 100644
--- a/engine/clients/elasticsearch/configure.py
+++ b/engine/clients/elasticsearch/configure.py
@@ -67,12 +67,8 @@ def recreate(self, dataset: Dataset, collection_params):
                         "index": True,
                         "similarity": self.DISTANCE_MAPPING[dataset.config.distance],
                         "index_options": {
-                            **{
-                                "type": "hnsw",
-                                "m": 16,
-                                "ef_construction": 100,
-                            },
-                            **collection_params.get("index_options"),
+                            "type": "hnsw",
+                            **collection_params["index_options"],
                         },
                     },
                     **self._prepare_fields_config(dataset),
diff --git a/engine/clients/elasticsearch/search.py b/engine/clients/elasticsearch/search.py
index b7b09e2f..97dc1d57 100644
--- a/engine/clients/elasticsearch/search.py
+++ b/engine/clients/elasticsearch/search.py
@@ -1,4 +1,3 @@
-import copy
 import multiprocessing as mp
 import uuid
 from typing import List, Tuple
@@ -25,19 +24,9 @@ def get_mp_start_method(cls):
         return "forkserver" if "forkserver" in mp.get_all_start_methods() else "spawn"
 
     @classmethod
-    def init_client(cls, host, distance, connection_params: dict, search_params: dict):
-        init_params = {
-            **{
-                "verify_certs": False,
-                "request_timeout": 90,
-                "retry_on_timeout": True,
-            },
-            **connection_params,
-        }
+    def init_client(cls, host, _distance, connection_params: dict, search_params: dict):
         cls.client = get_es_client(host, connection_params)
-        cls.search_params = copy.deepcopy(search_params)
-        # pop parallel
-        cls.search_params.pop("parallel", "1")
+        cls.search_params = search_params
 
     @classmethod
     def search_one(cls, vector, meta_conditions, top) -> List[Tuple[int, float]]:
@@ -45,7 +34,7 @@ def search_one(cls, vector, meta_conditions, top) -> List[Tuple[int, float]]:
             "field": "vector",
             "query_vector": vector,
             "k": top,
-            **{"num_candidates": 100, **cls.search_params},
+            **cls.search_params['params']
         }
 
         meta_conditions = cls.parser.parse(meta_conditions)
diff --git a/engine/clients/elasticsearch/upload.py b/engine/clients/elasticsearch/upload.py
index 7c3174ba..82709217 100644
--- a/engine/clients/elasticsearch/upload.py
+++ b/engine/clients/elasticsearch/upload.py
@@ -27,7 +27,7 @@ def get_mp_start_method(cls):
         return "forkserver" if "forkserver" in mp.get_all_start_methods() else "spawn"
 
     @classmethod
-    def init_client(cls, host, distance, connection_params, upload_params):
+    def init_client(cls, host, _distance, connection_params, upload_params):
         cls.client = get_es_client(host, connection_params)
         cls.upload_params = upload_params
 
@@ -53,7 +53,6 @@ def upload_batch(
 
     @classmethod
     def post_upload(cls, _distance):
-        print("forcing the merge into 1 segment...")
         tries = 30
         for i in range(tries + 1):
             try:
diff --git a/experiments/configurations/elasticsearch-single-node.json b/experiments/configurations/elasticsearch-single-node.json
index b3f0f609..3ffb4034 100644
--- a/experiments/configurations/elasticsearch-single-node.json
+++ b/experiments/configurations/elasticsearch-single-node.json
@@ -7,8 +7,8 @@
     },
     "collection_params": { "index_options": { "m": 16, "ef_construction": 100 } },
     "search_params": [
-      { "parallel": 1, "num_candidates": 128 }, { "parallel": 1, "num_candidates": 256 }, { "parallel": 1, "num_candidates": 512 },
-      { "parallel": 100, "num_candidates": 128 }, { "parallel": 100, "num_candidates": 256 }, { "parallel": 100, "num_candidates": 512 }
+      { "parallel": 1, "params": {"num_candidates": 128} }, { "parallel": 1, "params": {"num_candidates": 256} }, { "parallel": 1, "params" :{"num_candidates": 512} },
+      { "parallel": 100, "params": {"num_candidates": 128} }, { "parallel": 100, "params": {"num_candidates": 256} }, { "parallel": 100, "params" :{"num_candidates": 512} }
     ],
     "upload_params": { "parallel": 16 }
   },
@@ -20,8 +20,8 @@
     },
     "collection_params": { "index_options": { "m": 16, "ef_construction": 128 } },
     "search_params": [
-      { "parallel": 1, "num_candidates": 128 }, { "parallel": 1, "num_candidates": 256 }, { "parallel": 1, "num_candidates": 512 },
-      { "parallel": 100, "num_candidates": 128 }, { "parallel": 100, "num_candidates": 256 }, { "parallel": 100, "num_candidates": 512 }
+      { "parallel": 1, "params": {"num_candidates": 128} }, { "parallel": 1, "params": {"num_candidates": 256} }, { "parallel": 1, "params" :{"num_candidates": 512} },
+      { "parallel": 100, "params": {"num_candidates": 128} }, { "parallel": 100, "params": {"num_candidates": 256} }, { "parallel": 100, "params" :{"num_candidates": 512} }
     ],
     "upload_params": { "parallel": 16 }
   },
@@ -33,8 +33,8 @@
     },
     "collection_params": { "index_options": { "m": 32, "ef_construction": 128 } },
     "search_params": [
-      { "parallel": 1, "num_candidates": 128 }, { "parallel": 1, "num_candidates": 256 }, { "parallel": 1, "num_candidates": 512 },
-      { "parallel": 100, "num_candidates": 128 }, { "parallel": 100, "num_candidates": 256 }, { "parallel": 100, "num_candidates": 512 }
+      { "parallel": 1, "params": {"num_candidates": 128} }, { "parallel": 1, "params": {"num_candidates": 256} }, { "parallel": 1, "params" :{"num_candidates": 512} },
+      { "parallel": 100, "params": {"num_candidates": 128} }, { "parallel": 100, "params": {"num_candidates": 256} }, { "parallel": 100, "params" :{"num_candidates": 512} }
     ],
     "upload_params": { "parallel": 16 }
   },
@@ -46,8 +46,8 @@
     },
     "collection_params": { "index_options": { "m": 32, "ef_construction": 256 } },
     "search_params": [
-      { "parallel": 1, "num_candidates": 128 }, { "parallel": 1, "num_candidates": 256 }, { "parallel": 1, "num_candidates": 512 },
-      { "parallel": 100, "num_candidates": 128 }, { "parallel": 100, "num_candidates": 256 }, { "parallel": 100, "num_candidates": 512 }
+      { "parallel": 1, "params": {"num_candidates": 128} }, { "parallel": 1, "params": {"num_candidates": 256} }, { "parallel": 1, "params" :{"num_candidates": 512} },
+      { "parallel": 100, "params": {"num_candidates": 128} }, { "parallel": 100, "params": {"num_candidates": 256} }, { "parallel": 100, "params" :{"num_candidates": 512} }
     ],
     "upload_params": { "parallel": 16 }
   },
@@ -59,8 +59,8 @@
     },
     "collection_params": { "index_options": { "m": 32, "ef_construction": 512 } },
     "search_params": [
-      { "parallel": 1, "num_candidates": 128 }, { "parallel": 1, "num_candidates": 256 }, { "parallel": 1, "num_candidates": 512 },
-      { "parallel": 100, "num_candidates": 128 }, { "parallel": 100, "num_candidates": 256 }, { "parallel": 100, "num_candidates": 512 }
+      { "parallel": 1, "params": {"num_candidates": 128} }, { "parallel": 1, "params": {"num_candidates": 256} }, { "parallel": 1, "params" :{"num_candidates": 512} },
+      { "parallel": 100, "params": {"num_candidates": 128} }, { "parallel": 100, "params": {"num_candidates": 256} }, { "parallel": 100, "params" :{"num_candidates": 512} }
     ],
     "upload_params": { "parallel": 16 }
   },
@@ -72,8 +72,8 @@
     },
     "collection_params": { "index_options": { "m": 64, "ef_construction": 256 } },
     "search_params": [
-      { "parallel": 1, "num_candidates": 128 }, { "parallel": 1, "num_candidates": 256 }, { "parallel": 1, "num_candidates": 512 },
-      { "parallel": 100, "num_candidates": 128 }, { "parallel": 100, "num_candidates": 256 }, { "parallel": 100, "num_candidates": 512 }
+      { "parallel": 1, "params": {"num_candidates": 128} }, { "parallel": 1, "params": {"num_candidates": 256} }, { "parallel": 1, "params" :{"num_candidates": 512} },
+      { "parallel": 100, "params": {"num_candidates": 128} }, { "parallel": 100, "params": {"num_candidates": 256} }, { "parallel": 100, "params" :{"num_candidates": 512} }
     ],
     "upload_params": { "parallel": 16 }
   },
@@ -85,8 +85,8 @@
     },
     "collection_params": { "index_options": { "m": 64, "ef_construction": 512 } },
     "search_params": [
-      { "parallel": 1, "num_candidates": 128 }, { "parallel": 1, "num_candidates": 256 }, { "parallel": 1, "num_candidates": 512 },
-      { "parallel": 100, "num_candidates": 128 }, { "parallel": 100, "num_candidates": 256 }, { "parallel": 100, "num_candidates": 512 }
+      { "parallel": 1, "params": {"num_candidates": 128} }, { "parallel": 1, "params": {"num_candidates": 256} }, { "parallel": 1, "params" :{"num_candidates": 512} },
+      { "parallel": 100, "params": {"num_candidates": 128} }, { "parallel": 100, "params": {"num_candidates": 256} }, { "parallel": 100, "params" :{"num_candidates": 512} }
     ],
     "upload_params": { "parallel": 16 }
   }