Skip to content

Commit

Permalink
Merge branch 'zilliztech:main' into amx_ip
Browse files Browse the repository at this point in the history
  • Loading branch information
mellonyou committed Jun 20, 2024
2 parents 417601b + 0e04323 commit b420761
Show file tree
Hide file tree
Showing 41 changed files with 535 additions and 320 deletions.
6 changes: 4 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,9 @@ include_directories(${folly_INCLUDE_DIRS})
find_package(nlohmann_json REQUIRED)
find_package(glog REQUIRED)
find_package(prometheus-cpp REQUIRED)
find_package(fmt REQUIRED)
if(NOT WITH_RAFT)
find_package(fmt REQUIRED)
endif()
find_package(opentelemetry-cpp REQUIRED)

set(CMAKE_CXX_STANDARD 17)
Expand Down Expand Up @@ -129,7 +131,7 @@ endif()

if(WITH_LIGHT)
knowhere_file_glob(GLOB_RECURSE KNOWHERE_SRCS src/common/*.cc
src/index/hnsw/hnsw.cc src/io/*.cc)
src/index/hnsw/hnsw.cc src/io/*.cc src/index/index_factory.cc)
knowhere_file_glob(GLOB_RECURSE KNOWHERE_TRACER_SRCS src/common/tracer.cc
src/common/prometheus_client.cc)
list(REMOVE_ITEM KNOWHERE_SRCS ${KNOWHERE_TRACER_SRCS})
Expand Down
59 changes: 24 additions & 35 deletions benchmark/hdf5/benchmark_float.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,6 @@
#include "knowhere/comp/local_file_manager.h"
#include "knowhere/dataset.h"

namespace fs = std::filesystem;
std::string kDir = fs::current_path().string() + "/diskann_test";
std::string kRawDataPath = kDir + "/raw_data";
std::string kL2IndexDir = kDir + "/l2_index";
std::string kIPIndexDir = kDir + "/ip_index";
std::string kL2IndexPrefix = kL2IndexDir + "/l2";
std::string kIPIndexPrefix = kIPIndexDir + "/ip";

void
WriteRawDataToDisk(const std::string data_path, const float* raw_data, const uint32_t num, const uint32_t dim) {
std::ofstream writer(data_path.c_str(), std::ios::binary);
writer.write((char*)&num, sizeof(uint32_t));
writer.write((char*)&dim, sizeof(uint32_t));
writer.write((char*)raw_data, sizeof(float) * num * dim);
writer.close();
}

class Benchmark_float : public Benchmark_knowhere, public ::testing::Test {
public:
void
Expand Down Expand Up @@ -117,25 +100,22 @@ class Benchmark_float : public Benchmark_knowhere, public ::testing::Test {
void
test_diskann(const knowhere::Json& cfg) {
auto conf = cfg;
conf["index_prefix"] = (metric_type_ == knowhere::metric::L2 ? kL2IndexPrefix : kIPIndexPrefix);
conf["search_cache_budget_gb"] = 0;
conf["beamwidth"] = 8;

knowhere::BinarySet binset;
index_.Deserialize(binset, conf);

printf("\n[%0.3f s] %s | %s \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str());
printf("================================================================================\n");
for (auto nq : NQs_) {
auto ds_ptr = knowhere::GenDataSet(nq, dim_, xq_);
for (auto k : TOPKs_) {
conf["search_list_size"] = 2 * k;
conf[knowhere::meta::TOPK] = k;
CALC_TIME_SPAN(auto result = index_.value().Search(*ds_ptr, conf, nullptr));
auto ids = result.value()->GetIds();
float recall = CalcRecall(ids, nq, k);
printf(" nq = %4d, k = %4d, elapse = %6.3fs, R@ = %.4f\n", nq, k, t_diff, recall);
std::fflush(stdout);
for (auto search_list_size : SEARCH_LISTs_) {
conf["search_list_size"] = search_list_size;
for (auto nq : NQs_) {
auto ds_ptr = knowhere::GenDataSet(nq, dim_, xq_);
for (auto k : TOPKs_) {
conf[knowhere::meta::TOPK] = k;
CALC_TIME_SPAN(auto result = index_.value().Search(*ds_ptr, conf, nullptr));
auto ids = result.value()->GetIds();
float recall = CalcRecall(ids, nq, k);
printf(" search_list_size = %4d, nq = %4d, k = %4d, elapse = %6.3fs, R@ = %.4f\n",
search_list_size, nq, k, t_diff, recall);
std::fflush(stdout);
}
}
}
printf("================================================================================\n");
Expand Down Expand Up @@ -177,6 +157,9 @@ class Benchmark_float : public Benchmark_knowhere, public ::testing::Test {
const std::vector<int32_t> HNSW_Ms_ = {16};
const std::vector<int32_t> EFCONs_ = {200};
const std::vector<int32_t> EFs_ = {128, 256, 512};

// DISKANN index params
const std::vector<int32_t> SEARCH_LISTs_ = {100, 200, 400};
};

TEST_F(Benchmark_float, TEST_IDMAP) {
Expand Down Expand Up @@ -260,9 +243,10 @@ TEST_F(Benchmark_float, TEST_DISKANN) {
conf["index_prefix"] = (metric_type_ == knowhere::metric::L2 ? kL2IndexPrefix : kIPIndexPrefix);
conf["data_path"] = kRawDataPath;
conf["max_degree"] = 56;
conf["search_list_size"] = 128;
conf["pq_code_budget_gb"] = sizeof(float) * dim_ * nb_ * 0.125 / (1024 * 1024 * 1024);
conf["build_dram_budget_gb"] = 32.0;
conf["search_cache_budget_gb"] = 0;
conf["beamwidth"] = 8;

fs::create_directory(kDir);
fs::create_directory(kL2IndexDir);
Expand All @@ -277,7 +261,12 @@ TEST_F(Benchmark_float, TEST_DISKANN) {
index_type_, knowhere::Version::GetCurrentVersion().VersionNumber(), diskann_index_pack);
printf("[%.3f s] Building all on %d vectors\n", get_time_diff(), nb_);
knowhere::DataSetPtr ds_ptr = nullptr;
index_.Build(*ds_ptr, conf);
index_.value().Build(*ds_ptr, conf);

knowhere::BinarySet binset;
index_.value().Serialize(binset);
index_.value().Deserialize(binset, conf);

test_diskann(conf);
}
#endif
31 changes: 12 additions & 19 deletions benchmark/hdf5/benchmark_float_bitset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,29 +21,12 @@

const int32_t GPU_DEVICE_ID = 0;

namespace fs = std::filesystem;
std::string kDir = fs::current_path().string() + "/diskann_test";
std::string kRawDataPath = kDir + "/raw_data";
std::string kL2IndexDir = kDir + "/l2_index";
std::string kIPIndexDir = kDir + "/ip_index";
std::string kL2IndexPrefix = kL2IndexDir + "/l2";
std::string kIPIndexPrefix = kIPIndexDir + "/ip";

constexpr uint32_t kNumRows = 10000;
constexpr uint32_t kNumQueries = 100;
constexpr uint32_t kDim = 128;
constexpr uint32_t kK = 10;
constexpr float kL2KnnRecall = 0.8;

void
WriteRawDataToDisk(const std::string data_path, const float* raw_data, const uint32_t num, const uint32_t dim) {
std::ofstream writer(data_path.c_str(), std::ios::binary);
writer.write((char*)&num, sizeof(uint32_t));
writer.write((char*)&dim, sizeof(uint32_t));
writer.write((char*)raw_data, sizeof(float) * num * dim);
writer.close();
}

class Benchmark_float_bitset : public Benchmark_knowhere, public ::testing::Test {
public:
void
Expand Down Expand Up @@ -104,10 +87,10 @@ class Benchmark_float_bitset : public Benchmark_knowhere, public ::testing::Test
printf("[%.3f s] Test '%s/%s' done\n\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str());
}

#ifdef KNOWHERE_WITH_DISKANN
void
test_diskann(const knowhere::Json& cfg) {
auto conf = cfg;
conf["index_prefix"] = (metric_type_ == knowhere::metric::L2 ? kL2IndexPrefix : kIPIndexPrefix);

printf("\n[%0.3f s] %s | %s \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str());
printf("================================================================================\n");
Expand All @@ -118,9 +101,11 @@ class Benchmark_float_bitset : public Benchmark_knowhere, public ::testing::Test
auto ds_ptr = knowhere::GenDataSet(nq, dim_, xq_);
for (auto k : TOPKs_) {
conf[knowhere::meta::TOPK] = k;
auto g_result = golden_index_.value().Search(*ds_ptr, conf, bitset);
auto g_ids = g_result.value()->GetIds();
CALC_TIME_SPAN(auto result = index_.value().Search(*ds_ptr, conf, bitset));
auto ids = result.value()->GetIds();
float recall = CalcRecall(ids, nq, k);
float recall = CalcRecall(g_ids, ids, nq, k);
printf(" bitset_per = %3d%%, nq = %4d, k = %4d, elapse = %6.3fs, R@ = %.4f\n", per, nq, k, t_diff,
recall);
std::fflush(stdout);
Expand All @@ -130,6 +115,7 @@ class Benchmark_float_bitset : public Benchmark_knowhere, public ::testing::Test
printf("================================================================================\n");
printf("[%.3f s] Test '%s/%s' done\n\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str());
}
#endif

protected:
void
Expand Down Expand Up @@ -214,6 +200,7 @@ TEST_F(Benchmark_float_bitset, TEST_HNSW) {
test_hnsw(conf);
}

#ifdef KNOWHERE_WITH_DISKANN
TEST_F(Benchmark_float_bitset, TEST_DISKANN) {
index_type_ = knowhere::IndexEnum::INDEX_DISKANN;

Expand All @@ -238,5 +225,11 @@ TEST_F(Benchmark_float_bitset, TEST_DISKANN) {
printf("[%.3f s] Building all on %d vectors\n", get_time_diff(), nb_);
knowhere::DataSetPtr ds_ptr = nullptr;
index_.value().Build(*ds_ptr, conf);

knowhere::BinarySet binset;
index_.value().Serialize(binset);
index_.value().Deserialize(binset, conf);

test_diskann(conf);
}
#endif
80 changes: 75 additions & 5 deletions benchmark/hdf5/benchmark_float_range.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#include "benchmark_knowhere.h"
#include "knowhere/comp/knowhere_config.h"
#include "knowhere/comp/local_file_manager.h"
#include "knowhere/dataset.h"

class Benchmark_float_range : public Benchmark_knowhere, public ::testing::Test {
Expand All @@ -36,7 +37,8 @@ class Benchmark_float_range : public Benchmark_knowhere, public ::testing::Test
CheckDistance(metric_type_, ids, distances, lims, nq);
float recall = CalcRecall(ids, lims, nq);
float accuracy = CalcAccuracy(ids, lims, nq);
printf(" nq = %4d, elapse = %6.3fs, R@ = %.4f, A@ = %.4f\n", nq, t_diff, recall, accuracy);
printf(" nq = %4d, elapse = %6.3fs, R@ = %.4f, A@ = %.4f, L@ = %.2f\n", nq, t_diff, recall, accuracy,
lims[nq] / (float)nq);
std::fflush(stdout);
}
printf("================================================================================\n");
Expand All @@ -61,8 +63,8 @@ class Benchmark_float_range : public Benchmark_knowhere, public ::testing::Test
auto lims = result.value()->GetLims();
float recall = CalcRecall(ids, lims, nq);
float accuracy = CalcAccuracy(ids, lims, nq);
printf(" nprobe = %4d, nq = %4d, elapse = %6.3fs, R@ = %.4f, A@ = %.4f\n", nprobe, nq, t_diff, recall,
accuracy);
printf(" nprobe = %4d, nq = %4d, elapse = %6.3fs, R@ = %.4f, A@ = %.4f, L@ = %.2f\n", nprobe, nq,
t_diff, recall, accuracy, lims[nq] / (float)nq);
std::fflush(stdout);
}
}
Expand All @@ -89,15 +91,43 @@ class Benchmark_float_range : public Benchmark_knowhere, public ::testing::Test
auto lims = result.value()->GetLims();
float recall = CalcRecall(ids, lims, nq);
float accuracy = CalcAccuracy(ids, lims, nq);
printf(" ef = %4d, nq = %4d, elapse = %6.3fs, R@ = %.4f, A@ = %.4f\n", ef, nq, t_diff, recall,
accuracy);
printf(" ef = %4d, nq = %4d, elapse = %6.3fs, R@ = %.4f, A@ = %.4f, L@ = %.2f\n", ef, nq, t_diff,
recall, accuracy, lims[nq] / (float)nq);
std::fflush(stdout);
}
}
printf("================================================================================\n");
printf("[%.3f s] Test '%s/%s' done\n\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str());
}

#ifdef KNOWHERE_WITH_DISKANN
void
test_diskann(const knowhere::Json& cfg) {
auto conf = cfg;
auto radius = conf.at(knowhere::meta::RADIUS).get<float>();

printf("\n[%0.3f s] %s | %s, radius=%.3f\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(),
radius);
printf("================================================================================\n");
for (auto search_list_size : SEARCH_LISTs_) {
conf["search_list_size"] = search_list_size;
for (auto nq : NQs_) {
auto ds_ptr = knowhere::GenDataSet(nq, dim_, xq_);
CALC_TIME_SPAN(auto result = index_.value().RangeSearch(*ds_ptr, conf, nullptr));
auto ids = result.value()->GetIds();
auto lims = result.value()->GetLims();
float recall = CalcRecall(ids, lims, nq);
float accuracy = CalcAccuracy(ids, lims, nq);
printf(" search_list_size = %4d, nq = %4d, elapse = %6.3fs, R@ = %.4f, A@ = %.4f, L@ = %.2f\n",
search_list_size, nq, t_diff, recall, accuracy, lims[nq] / (float)nq);
std::fflush(stdout);
}
}
printf("================================================================================\n");
printf("[%.3f s] Test '%s/%s' done\n\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str());
}
#endif

protected:
void
SetUp() override {
Expand Down Expand Up @@ -138,6 +168,9 @@ class Benchmark_float_range : public Benchmark_knowhere, public ::testing::Test
const std::vector<int32_t> HNSW_Ms_ = {16};
const std::vector<int32_t> EFCONs_ = {200};
const std::vector<int32_t> EFs_ = {16, 32, 64, 128, 256, 512};

// DISKANN index params
const std::vector<int32_t> SEARCH_LISTs_ = {100, 200, 400};
};

// This testcase can be used to generate HDF5 file
Expand Down Expand Up @@ -243,3 +276,40 @@ TEST_F(Benchmark_float_range, TEST_HNSW) {
}
}
}

#ifdef KNOWHERE_WITH_DISKANN
TEST_F(Benchmark_float_range, TEST_DISKANN) {
index_type_ = knowhere::IndexEnum::INDEX_DISKANN;

knowhere::Json conf = cfg_;

conf["index_prefix"] = (metric_type_ == knowhere::metric::L2 ? kL2IndexPrefix : kIPIndexPrefix);
conf["data_path"] = kRawDataPath;
conf["max_degree"] = 56;
conf["pq_code_budget_gb"] = sizeof(float) * dim_ * nb_ * 0.125 / (1024 * 1024 * 1024);
conf["build_dram_budget_gb"] = 32.0;
conf["search_cache_budget_gb"] = 0;
conf["beamwidth"] = 8;

fs::create_directory(kDir);
fs::create_directory(kL2IndexDir);
fs::create_directory(kIPIndexDir);

WriteRawDataToDisk(kRawDataPath, (const float*)xb_, (const uint32_t)nb_, (const uint32_t)dim_);

std::shared_ptr<knowhere::FileManager> file_manager = std::make_shared<knowhere::LocalFileManager>();
auto diskann_index_pack = knowhere::Pack(file_manager);

index_ = knowhere::IndexFactory::Instance().Create<knowhere::fp32>(
index_type_, knowhere::Version::GetCurrentVersion().VersionNumber(), diskann_index_pack);
printf("[%.3f s] Building all on %d vectors\n", get_time_diff(), nb_);
knowhere::DataSetPtr ds_ptr = nullptr;
index_.value().Build(*ds_ptr, conf);

knowhere::BinarySet binset;
index_.value().Serialize(binset);
index_.value().Deserialize(binset, conf);

test_diskann(conf);
}
#endif
Loading

0 comments on commit b420761

Please sign in to comment.