Skip to content

Commit

Permalink
Add new param 'max_empty_result_buckets' for IVF range search (#455)
Browse files Browse the repository at this point in the history
Signed-off-by: Yudong Cai <yudong.cai@zilliz.com>
  • Loading branch information
cydrain committed Mar 14, 2024
1 parent a1201b2 commit 3b2fafc
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 3 deletions.
1 change: 1 addition & 0 deletions include/knowhere/comp/index_param.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ constexpr const char* TRACE_ID = "trace_id";
constexpr const char* SPAN_ID = "span_id";
constexpr const char* TRACE_FLAGS = "trace_flags";
constexpr const char* MATERIALIZED_VIEW_SEARCH_INFO = "materialized_view_search_info";
constexpr const char* MAX_EMPTY_RESULT_BUCKETS = "max_empty_result_buckets";
}; // namespace meta

namespace indexparam {
Expand Down
4 changes: 4 additions & 0 deletions src/index/ivf/ivf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -743,6 +743,7 @@ IvfIndexNode<DataType, IndexType>::RangeSearch(const DataSet& dataset, const Con

faiss::IVFSearchParameters ivf_search_params;
ivf_search_params.nprobe = index_->nlist;
ivf_search_params.max_empty_result_buckets = ivf_cfg.max_empty_result_buckets.value();
ivf_search_params.sel = id_selector;

index_->range_search(1, cur_data, radius, &res, &ivf_search_params);
Expand All @@ -756,6 +757,7 @@ IvfIndexNode<DataType, IndexType>::RangeSearch(const DataSet& dataset, const Con
faiss::IVFSearchParameters ivf_search_params;
ivf_search_params.nprobe = index_->nlist;
ivf_search_params.max_codes = 0;
ivf_search_params.max_empty_result_buckets = ivf_cfg.max_empty_result_buckets.value();
ivf_search_params.sel = id_selector;

index_->range_search(1, cur_query, radius, &res, &ivf_search_params);
Expand All @@ -768,6 +770,7 @@ IvfIndexNode<DataType, IndexType>::RangeSearch(const DataSet& dataset, const Con

// todo aguzhva: this is somewhat alogical. Refactor?
faiss::IVFSearchParameters base_search_params;
base_search_params.max_empty_result_buckets = ivf_cfg.max_empty_result_buckets.value();
base_search_params.sel = id_selector;

faiss::IndexScaNNSearchParameters scann_search_params;
Expand All @@ -784,6 +787,7 @@ IvfIndexNode<DataType, IndexType>::RangeSearch(const DataSet& dataset, const Con
faiss::IVFSearchParameters ivf_search_params;
ivf_search_params.nprobe = index_->nlist;
ivf_search_params.max_codes = 0;
ivf_search_params.max_empty_result_buckets = ivf_cfg.max_empty_result_buckets.value();
ivf_search_params.sel = id_selector;

index_->range_search(1, cur_query, radius, &res, &ivf_search_params);
Expand Down
6 changes: 6 additions & 0 deletions src/index/ivf/ivf_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class IvfConfig : public BaseConfig {
CFG_INT nprobe;
CFG_BOOL use_elkan;
CFG_BOOL ensure_topk_full; // only take affect on temp index(IVF_FLAT_CC) now
CFG_INT max_empty_result_buckets;
KNOHWERE_DECLARE_CONFIG(IvfConfig) {
KNOWHERE_CONFIG_DECLARE_FIELD(nlist)
.set_default(128)
Expand All @@ -42,6 +43,11 @@ class IvfConfig : public BaseConfig {
.set_default(true)
.description("whether to make sure topk results full")
.for_search();
KNOWHERE_CONFIG_DECLARE_FIELD(max_empty_result_buckets)
.set_default(1)
.description("the maximum of continuous buckets with empty result")
.for_range_search()
.set_range(1, 65536);
}
};

Expand Down
14 changes: 13 additions & 1 deletion thirdparty/faiss/faiss/IndexBinaryIVF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1150,6 +1150,7 @@ void IndexBinaryIVF::range_search_preassigned(
bool store_pairs = false;
size_t nlistv = 0, ndis = 0;

size_t max_empty_result_buckets = params ? params->max_empty_result_buckets: 1;
IDSelector* sel = params ? params->sel : nullptr;

std::vector<RangeSearchPartialResult*> all_pres(omp_get_max_threads());
Expand Down Expand Up @@ -1201,10 +1202,21 @@ void IndexBinaryIVF::range_search_preassigned(
// Adopt new strategy for faiss IVF range search

size_t prev_nres = qres.nres;
size_t ndup = 0;

for (size_t ik = 0; ik < nprobe; ik++) {
scan_list_func(i, ik, qres);
if (qres.nres == prev_nres) break;

// if no valid results in N continuous buckets,
// skip rest buckets
if (qres.nres == prev_nres) {
ndup++;
} else {
ndup = 0;
}
if (ndup == max_empty_result_buckets) {
break;
}
prev_nres = qres.nres;
}

Expand Down
14 changes: 13 additions & 1 deletion thirdparty/faiss/faiss/IndexIVF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -787,6 +787,7 @@ void IndexIVF::range_search_preassigned(
FAISS_THROW_IF_NOT(nprobe > 0);

idx_t max_codes = params ? params->max_codes : this->max_codes;
size_t max_empty_result_buckets = params ? params->max_empty_result_buckets: 1;
IDSelector* sel = params ? params->sel : nullptr;

FAISS_THROW_IF_NOT_MSG(
Expand Down Expand Up @@ -892,10 +893,21 @@ void IndexIVF::range_search_preassigned(
// Adopt new strategy for faiss IVF range search

size_t prev_nres = qres.nres;
size_t ndup = 0;

for (size_t ik = 0; ik < nprobe; ik++) {
scan_list_func(i, ik, qres);
if (qres.nres == prev_nres) break;

// if no valid results in N continuous buckets,
// skip rest buckets
if (qres.nres == prev_nres) {
ndup++;
} else {
ndup = 0;
}
if (ndup == max_empty_result_buckets) {
break;
}
prev_nres = qres.nres;
}

Expand Down
6 changes: 5 additions & 1 deletion thirdparty/faiss/faiss/IndexIVF.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,11 @@ struct SearchParametersIVF : SearchParameters {
///< it is a bit heavy to further retrieve more buckets
///< therefore to make sure we get topk results, use nprobe=nlist and use max_codes to narrow down the search range
bool ensure_topk_full = false;


///< during IVF range search, if reach 'max_empty_result_buckets' num of
///< continuous buckets with no valid results, terminate range search
size_t max_empty_result_buckets = 0;

SearchParameters* quantizer_params = nullptr;

virtual ~SearchParametersIVF() {}
Expand Down

0 comments on commit 3b2fafc

Please sign in to comment.