Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,7 @@ __MACOSX
.DS_Store
build/
venv/
.venv/
.idea/
results/
results/
logs/
3 changes: 2 additions & 1 deletion install.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import os
import argparse
import os
import subprocess


def docker_tag_base():
return 'vdbbench'

Expand Down
17 changes: 13 additions & 4 deletions vectordb_bench/backend/clients/mongodb/config.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
from enum import Enum

from pydantic import BaseModel, SecretStr

from ..api import DBCaseConfig, DBConfig, IndexType, MetricType


class QuantizationType(Enum):
NONE = "none"
BINARY = "binary"
SCALAR = "scalar"


class MongoDBConfig(DBConfig, BaseModel):
connection_string: SecretStr = "mongodb+srv://<user>:<password>@<cluster_name>.heatl.mongodb.net"
database: str = "vdb_bench"
Expand All @@ -16,9 +24,9 @@ def to_dict(self) -> dict:

class MongoDBIndexConfig(BaseModel, DBCaseConfig):
index: IndexType = IndexType.HNSW # MongoDB uses HNSW for vector search
metric_type: MetricType | None = None
num_candidates: int | None = 1500 # Default numCandidates for vector search
exact_search: bool = False # Whether to use exact (ENN) search
metric_type: MetricType = MetricType.COSINE
num_candidates_ratio: int = 10 # Default numCandidates ratio for vector search
quantization: QuantizationType = QuantizationType.NONE # Quantization type if applicable

def parse_metric(self) -> str:
if self.metric_type == MetricType.L2:
Expand All @@ -36,9 +44,10 @@ def index_param(self) -> dict:
"similarity": self.parse_metric(),
"numDimensions": None, # Will be set in MongoDB class
"path": "vector", # Vector field name
"quantization": self.quantization.value,
}
],
}

def search_param(self) -> dict:
return {"numCandidates": self.num_candidates if not self.exact_search else None, "exact": self.exact_search}
return {"num_candidates_ratio": self.num_candidates_ratio}
9 changes: 4 additions & 5 deletions vectordb_bench/backend/clients/mongodb/mongodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def _create_index(self) -> None:
break
log.info(f"index deleting {indices}")
except Exception:
log.exception("Error dropping index")
log.exception(f"Error dropping index {index_name}")
try:
# Create vector search index
search_index = SearchIndexModel(definition=index_params, name=index_name, type="vectorSearch")
Expand All @@ -104,7 +104,7 @@ def _create_index(self) -> None:
log.info(f"Created index on {self.id_field} field")

except Exception:
log.exception("Error creating index")
log.exception(f"Error creating index {index_name}")
raise

def _wait_for_index_ready(self, index_name: str, check_interval: int = 5) -> None:
Expand Down Expand Up @@ -167,16 +167,15 @@ def search_embedding(
else:
# Set numCandidates based on k value and data size
# For 50K dataset, use higher multiplier for better recall
num_candidates = min(10000, max(k * 20, search_params["numCandidates"] or 0))
num_candidates = min(10000, k * search_params["num_candidates_ratio"])
vector_search["numCandidates"] = num_candidates

# Add filter if specified
if filters:
log.info(f"Applying filter: {filters}")
vector_search["filter"] = {
"id": {"gt": filters["id"]},
"id": {"gte": filters["id"]},
}

pipeline = [
{"$vectorSearch": vector_search},
{
Expand Down
32 changes: 32 additions & 0 deletions vectordb_bench/frontend/config/dbCaseConfigs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1041,6 +1041,26 @@ class CaseConfigInput(BaseModel):
)


CaseConfigParamInput_MongoDBQuantizationType = CaseConfigInput(
label=CaseConfigParamType.mongodb_quantization_type,
inputType=InputType.Option,
inputConfig={
"options": ["none", "scalar", "binary"],
},
)


CaseConfigParamInput_MongoDBNumCandidatesRatio = CaseConfigInput(
label=CaseConfigParamType.mongodb_num_candidates_ratio,
inputType=InputType.Number,
inputConfig={
"min": 10,
"max": 20,
"value": 10,
},
)


MilvusLoadConfig = [
CaseConfigParamInput_IndexType,
CaseConfigParamInput_M,
Expand Down Expand Up @@ -1224,6 +1244,14 @@ class CaseConfigInput(BaseModel):
CaseConfigParamInput_NumCandidates_AliES,
]

MongoDBLoadingConfig = [
CaseConfigParamInput_MongoDBQuantizationType,
]
MongoDBPerformanceConfig = [
CaseConfigParamInput_MongoDBQuantizationType,
CaseConfigParamInput_MongoDBNumCandidatesRatio,
]

CASE_CONFIG_MAP = {
DB.Milvus: {
CaseLabel.Load: MilvusLoadConfig,
Expand Down Expand Up @@ -1272,4 +1300,8 @@ class CaseConfigInput(BaseModel):
CaseLabel.Load: AliyunOpensearchLoadingConfig,
CaseLabel.Performance: AliyunOpenSearchPerformanceConfig,
},
DB.MongoDB: {
CaseLabel.Load: MongoDBLoadingConfig,
CaseLabel.Performance: MongoDBPerformanceConfig,
},
}
17 changes: 15 additions & 2 deletions vectordb_bench/log_util.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
import logging
from logging import config
from pathlib import Path


def init(log_level: str):
# Create logs directory if it doesn't exist
log_dir = Path("logs")
log_dir.mkdir(exist_ok=True)

log_config = {
"version": 1,
"disable_existing_loggers": False,
Expand All @@ -24,15 +29,23 @@ def init(log_level: str):
"class": "logging.StreamHandler",
"formatter": "default",
},
"file": {
"class": "logging.handlers.RotatingFileHandler",
"formatter": "default",
"filename": "logs/vectordb_bench.log",
"maxBytes": 10485760, # 10MB
"backupCount": 5,
"encoding": "utf8",
},
},
"loggers": {
"vectordb_bench": {
"handlers": ["console"],
"handlers": ["console", "file"],
"level": log_level,
"propagate": False,
},
"no_color": {
"handlers": ["no_color_console"],
"handlers": ["no_color_console", "file"],
"level": log_level,
"propagate": False,
},
Expand Down
4 changes: 4 additions & 0 deletions vectordb_bench/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ class CaseConfigParamType(Enum):
numSearchThreads = "num_search_threads"
maxNumPrefetchDatasets = "max_num_prefetch_datasets"

# mongodb params
mongodb_quantization_type = "quantization"
mongodb_num_candidates_ratio = "num_candidates_ratio"


class CustomizedCase(BaseModel):
pass
Expand Down
Loading