Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cherry picking some milvus improvements. Updated milvus client to include backoff strategy on ingestion. Added extra configurations to ease remote benchmark run. #170

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion engine/clients/milvus/config.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
from pymilvus import DataType
import os

from pymilvus import DataType, connections

from engine.base_client.distances import Distance

MILVUS_COLLECTION_NAME = "Benchmark"
MILVUS_DEFAULT_ALIAS = "bench"
MILVUS_DEFAULT_PORT = "19530"
MILVUS_PASS = os.getenv("MILVUS_PASS", "")
MILVUS_USER = os.getenv("MILVUS_USER", "")
MILVUS_PORT = os.getenv("MILVUS_PORT", MILVUS_DEFAULT_PORT)

DISTANCE_MAPPING = {
Distance.L2: "L2",
@@ -25,3 +30,22 @@
DataType.FLOAT: 0.0,
DataType.DOUBLE: 0.0,
}


def get_milvus_client(connection_params: dict, host: str, alias: str):
h = ""
uri = ""
if host.startswith("http"):
uri = host
else:
h = host
client = connections.connect(
alias=alias,
host=h,
uri=uri,
port=MILVUS_PORT,
user=MILVUS_USER,
password=MILVUS_PASS,
**connection_params
)
return client
27 changes: 9 additions & 18 deletions engine/clients/milvus/configure.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,4 @@
from pymilvus import (
Collection,
CollectionSchema,
DataType,
FieldSchema,
MilvusException,
connections,
)
from pymilvus import Collection, CollectionSchema, DataType, FieldSchema
from pymilvus.exceptions import DataTypeNotSupportException
from pymilvus.orm import utility

@@ -17,7 +10,7 @@
DTYPE_EXTRAS,
MILVUS_COLLECTION_NAME,
MILVUS_DEFAULT_ALIAS,
MILVUS_DEFAULT_PORT,
get_milvus_client,
)


@@ -32,20 +25,18 @@ class MilvusConfigurator(BaseConfigurator):

def __init__(self, host, collection_params: dict, connection_params: dict):
super().__init__(host, collection_params, connection_params)
self.client = connections.connect(
alias=MILVUS_DEFAULT_ALIAS,
host=host,
port=str(connection_params.get("port", MILVUS_DEFAULT_PORT)),
**connection_params,
)
self.client = get_milvus_client(connection_params, host, MILVUS_DEFAULT_ALIAS)
print("established connection")

def clean(self):
try:
if utility.has_collection(MILVUS_COLLECTION_NAME, using=MILVUS_DEFAULT_ALIAS):
print("dropping collection named {MILVUS_COLLECTION_NAME}...")
utility.drop_collection(MILVUS_COLLECTION_NAME, using=MILVUS_DEFAULT_ALIAS)
print("dropped collection named {MILVUS_COLLECTION_NAME}...")
assert (
utility.has_collection(MILVUS_COLLECTION_NAME, using=MILVUS_DEFAULT_ALIAS)
except MilvusException:
pass
is False
)

def recreate(self, dataset: Dataset, collection_params):
idx = FieldSchema(
9 changes: 2 additions & 7 deletions engine/clients/milvus/search.py
Original file line number Diff line number Diff line change
@@ -9,7 +9,7 @@
DISTANCE_MAPPING,
MILVUS_COLLECTION_NAME,
MILVUS_DEFAULT_ALIAS,
MILVUS_DEFAULT_PORT,
get_milvus_client,
)
from engine.clients.milvus.parser import MilvusConditionParser

@@ -23,12 +23,7 @@ class MilvusSearcher(BaseSearcher):

@classmethod
def init_client(cls, host, distance, connection_params: dict, search_params: dict):
cls.client = connections.connect(
alias=MILVUS_DEFAULT_ALIAS,
host=host,
port=str(connection_params.get("port", MILVUS_DEFAULT_PORT)),
**connection_params
)
cls.client = get_milvus_client(connection_params, host, MILVUS_DEFAULT_ALIAS)
cls.collection = Collection(MILVUS_COLLECTION_NAME, using=MILVUS_DEFAULT_ALIAS)
cls.search_params = search_params
cls.distance = DISTANCE_MAPPING[distance]
24 changes: 11 additions & 13 deletions engine/clients/milvus/upload.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
import logging
import multiprocessing as mp
from typing import List

from pymilvus import (
Collection,
MilvusException,
connections,
wait_for_index_building_complete,
)
import backoff
from pymilvus import Collection, MilvusException, wait_for_index_building_complete

from dataset_reader.base_reader import Record
from engine.base_client.upload import BaseUploader
@@ -15,7 +12,7 @@
DTYPE_DEFAULT,
MILVUS_COLLECTION_NAME,
MILVUS_DEFAULT_ALIAS,
MILVUS_DEFAULT_PORT,
get_milvus_client,
)


@@ -31,12 +28,7 @@ def get_mp_start_method(cls):

@classmethod
def init_client(cls, host, distance, connection_params, upload_params):
cls.client = connections.connect(
alias=MILVUS_DEFAULT_ALIAS,
host=host,
port=str(connection_params.get("port", MILVUS_DEFAULT_PORT)),
**connection_params
)
cls.client = get_milvus_client(connection_params, host, MILVUS_DEFAULT_ALIAS)
cls.collection = Collection(MILVUS_COLLECTION_NAME, using=MILVUS_DEFAULT_ALIAS)
cls.upload_params = upload_params
cls.distance = DISTANCE_MAPPING[distance]
@@ -61,7 +53,13 @@ def upload_batch(cls, batch: List[Record]):
for record in batch:
ids.append(record.id)
vectors.append(record.vector)
cls.upload_with_backoff(field_values, ids, vectors)

@classmethod
@backoff.on_exception(
backoff.expo, MilvusException, max_time=600, backoff_log_level=logging.WARN
)
def upload_with_backoff(cls, field_values, ids, vectors):
cls.collection.insert([ids, vectors] + field_values)

@classmethod
39 changes: 25 additions & 14 deletions experiments/configurations/milvus-single-node.json
Original file line number Diff line number Diff line change
@@ -5,19 +5,30 @@
"connection_params": {},
"collection_params": {},
"search_params": [
{ "parallel": 1, "config": { "ef": 128 } }, { "parallel": 1, "config": { "ef": 256 } }, { "parallel": 1, "config": { "ef": 512 } },
{ "parallel": 100, "config": { "ef": 128 } }, { "parallel": 100, "config": { "ef": 256 } }, { "parallel": 100, "config": { "ef": 512 } }
{ "parallel": 1, "params": { "ef": 64 } }, { "parallel": 1, "params": { "ef": 128 } }, { "parallel": 1, "params": { "ef": 256 } }, { "parallel": 1, "params": { "ef": 512 } },
{ "parallel": 100, "params": { "ef": 64 } }, { "parallel": 100, "params": { "ef": 128 } }, { "parallel": 100, "params": { "ef": 256 } }, { "parallel": 100, "params": { "ef": 512 } }
],
"upload_params": { "parallel": 16, "index_params": { "efConstruction": 100, "M": 16 } }
},
{
"name": "milvus-m-16-ef-64",
"engine": "milvus",
"connection_params": {},
"collection_params": {},
"search_params": [
{ "parallel": 1, "params": { "ef": 64 } }, { "parallel": 1, "params": { "ef": 128 } }, { "parallel": 1, "params": { "ef": 256 } }, { "parallel": 1, "params": { "ef": 512 } },
{ "parallel": 100, "params": { "ef": 64 } }, { "parallel": 100, "params": { "ef": 128 } }, { "parallel": 100, "params": { "ef": 256 } }, { "parallel": 100, "params": { "ef": 512 } }
],
"upload_params": { "parallel": 16, "index_params": { "efConstruction": 64, "M": 16 } }
},
{
"name": "milvus-m-16-ef-128",
"engine": "milvus",
"connection_params": {},
"collection_params": {},
"search_params": [
{ "parallel": 1, "config": { "ef": 128 } }, { "parallel": 1, "config": { "ef": 256 } }, { "parallel": 1, "config": { "ef": 512 } },
{ "parallel": 100, "config": { "ef": 128 } }, { "parallel": 100, "config": { "ef": 256 } }, { "parallel": 100, "config": { "ef": 512 } }
{ "parallel": 1, "params": { "ef": 64 } }, { "parallel": 1, "params": { "ef": 128 } }, { "parallel": 1, "params": { "ef": 256 } }, { "parallel": 1, "params": { "ef": 512 } },
{ "parallel": 100, "params": { "ef": 64 } }, { "parallel": 100, "params": { "ef": 128 } }, { "parallel": 100, "params": { "ef": 256 } }, { "parallel": 100, "params": { "ef": 512 } }
],
"upload_params": { "parallel": 16, "index_params": { "efConstruction": 128, "M": 16 } }
},
@@ -27,8 +38,8 @@
"connection_params": {},
"collection_params": {},
"search_params": [
{ "parallel": 1, "config": { "ef": 128 } }, { "parallel": 1, "config": { "ef": 256 } }, { "parallel": 1, "config": { "ef": 512 } },
{ "parallel": 100, "config": { "ef": 128 } }, { "parallel": 100, "config": { "ef": 256 } }, { "parallel": 100, "config": { "ef": 512 } }
{ "parallel": 1, "params": { "ef": 64 } }, { "parallel": 1, "params": { "ef": 128 } }, { "parallel": 1, "params": { "ef": 256 } }, { "parallel": 1, "params": { "ef": 512 } },
{ "parallel": 100, "params": { "ef": 64 } }, { "parallel": 100, "params": { "ef": 128 } }, { "parallel": 100, "params": { "ef": 256 } }, { "parallel": 100, "params": { "ef": 512 } }
],
"upload_params": { "parallel": 16, "index_params": { "efConstruction": 128, "M": 32 } }
},
@@ -38,8 +49,8 @@
"connection_params": {},
"collection_params": {},
"search_params": [
{ "parallel": 1, "config": { "ef": 128 } }, { "parallel": 1, "config": { "ef": 256 } }, { "parallel": 1, "config": { "ef": 512 } },
{ "parallel": 100, "config": { "ef": 128 } }, { "parallel": 100, "config": { "ef": 256 } }, { "parallel": 100, "config": { "ef": 512 } }
{ "parallel": 1, "params": { "ef": 64 } }, { "parallel": 1, "params": { "ef": 128 } }, { "parallel": 1, "params": { "ef": 256 } }, { "parallel": 1, "params": { "ef": 512 } },
{ "parallel": 100, "params": { "ef": 64 } }, { "parallel": 100, "params": { "ef": 128 } }, { "parallel": 100, "params": { "ef": 256 } }, { "parallel": 100, "params": { "ef": 512 } }
],
"upload_params": { "parallel": 16, "index_params": { "efConstruction": 256, "M": 32 } }
},
@@ -49,8 +60,8 @@
"connection_params": {},
"collection_params": {},
"search_params": [
{ "parallel": 1, "config": { "ef": 128 } }, { "parallel": 1, "config": { "ef": 256 } }, { "parallel": 1, "config": { "ef": 512 } },
{ "parallel": 100, "config": { "ef": 128 } }, { "parallel": 100, "config": { "ef": 256 } }, { "parallel": 100, "config": { "ef": 512 } }
{ "parallel": 1, "params": { "ef": 64 } }, { "parallel": 1, "params": { "ef": 128 } }, { "parallel": 1, "params": { "ef": 256 } }, { "parallel": 1, "params": { "ef": 512 } },
{ "parallel": 100, "params": { "ef": 64 } }, { "parallel": 100, "params": { "ef": 128 } }, { "parallel": 100, "params": { "ef": 256 } }, { "parallel": 100, "params": { "ef": 512 } }
],
"upload_params": { "parallel": 16, "index_params": { "efConstruction": 512, "M": 32 } }
},
@@ -60,8 +71,8 @@
"connection_params": {},
"collection_params": {},
"search_params": [
{ "parallel": 1, "config": { "ef": 128 } }, { "parallel": 1, "config": { "ef": 256 } }, { "parallel": 1, "config": { "ef": 512 } },
{ "parallel": 100, "config": { "ef": 128 } }, { "parallel": 100, "config": { "ef": 256 } }, { "parallel": 100, "config": { "ef": 512 } }
{ "parallel": 1, "params": { "ef": 64 } }, { "parallel": 1, "params": { "ef": 128 } }, { "parallel": 1, "params": { "ef": 256 } }, { "parallel": 1, "params": { "ef": 512 } },
{ "parallel": 100, "params": { "ef": 64 } }, { "parallel": 100, "params": { "ef": 128 } }, { "parallel": 100, "params": { "ef": 256 } }, { "parallel": 100, "params": { "ef": 512 } }
],
"upload_params": { "parallel": 16, "index_params": { "efConstruction": 256, "M": 64 } }
},
@@ -71,8 +82,8 @@
"connection_params": {},
"collection_params": {},
"search_params": [
{ "parallel": 1, "config": { "ef": 128 } }, { "parallel": 1, "config": { "ef": 256 } }, { "parallel": 1, "config": { "ef": 512 } },
{ "parallel": 100, "config": { "ef": 128 } }, { "parallel": 100, "config": { "ef": 256 } }, { "parallel": 100, "config": { "ef": 512 } }
{ "parallel": 1, "params": { "ef": 64 } }, { "parallel": 1, "params": { "ef": 128 } }, { "parallel": 1, "params": { "ef": 256 } }, { "parallel": 1, "params": { "ef": 512 } },
{ "parallel": 100, "params": { "ef": 64 } }, { "parallel": 100, "params": { "ef": 128 } }, { "parallel": 100, "params": { "ef": 256 } }, { "parallel": 100, "params": { "ef": 512 } }
],
"upload_params": { "parallel": 16, "index_params": { "efConstruction": 512, "M": 64 } }
}
29 changes: 26 additions & 3 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -20,6 +20,7 @@ opensearch-py = "^2.3.2"
tqdm = "^4.66.1"
psycopg = {extras = ["binary"], version = "^3.1.17"}
pgvector = "^0.2.4"
backoff = "^2.2.1"

[tool.poetry.dev-dependencies]
pre-commit = "^2.20.0"