Skip to content

Commit

Permalink
merge merge (#113)
Browse files Browse the repository at this point in the history
* script to test pybind build/

* lint

* fix lint

* lint

* not working but

* w# Changes to be committed:

* at least this works for build

Co-authored-by: Cat1andCat2 <@@>
  • Loading branch information
cat1andcat2 committed Sep 27, 2021
1 parent ba6e043 commit 3cdbab2
Show file tree
Hide file tree
Showing 14 changed files with 3,837 additions and 3,689 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# NIPS Competition
#NIPS Competition

## Prerequisites
* CMake >= 3.10
Expand Down
111 changes: 58 additions & 53 deletions build_bbann.cpp
Original file line number Diff line number Diff line change
@@ -1,72 +1,77 @@
#include "bbann.h"


// build disk-based index
// build disk-based index
// strategy is hnsw + ivf + pq + refine

/*
* args:
* 1. data type(string): float or uint8 or int8
* 2. raw data binary file(string): file name of raw data, include file path and file name
* 3. index path(string): a string end with '/' denotes the directory that where the index related file locates
* 2. raw data binary file(string): file name of raw data, include file path and
* file name
* 3. index path(string): a string end with '/' denotes the directory that where
* the index related file locates
* 4. M(int): parameter 4 hnsw
* 5. efConstruction(int): parameter 4 hnsw
* 6. metric type(string): metric type
* 7. K1(int): number of centroids of the first round kmeans
* 8. page per block: the number of pages in a block
*/

int main(int argc, char** argv) {
if (argc != 9) {
std::cout << "Usage: << " << argv[0]
<< " data_type(float or uint8 or int8)"
<< " binary raw data file"
<< " index output path"
<< " hnsw.M"
<< " hnsw.efConstruction"
<< " metric type(L2 or IP)"
<< " K1"
<< " page per block"
<< std::endl;
return 1;
}
int main(int argc, char **argv) {
if (argc != 9) {
std::cout << "Usage: << " << argv[0] << " data_type(float or uint8 or int8)"
<< " binary raw data file"
<< " index output path"
<< " hnsw.M"
<< " hnsw.efConstruction"
<< " metric type(L2 or IP)"
<< " K1"
<< " page per block" << std::endl;
return 1;
}

// parse parameters
std::string raw_data_bin_file(argv[2]);
std::string output_path(argv[3]);
int hnswM = std::stoi(argv[4]);
int hnswefC = std::stoi(argv[5]);
auto metric_type = get_metric_type_by_name(std::string(argv[6]));
int K1 = std::stoi(argv[7]);
const uint64_t block_size = std::stoul(argv[8]) * PAGESIZE;
// parse parameters
std::string raw_data_bin_file(argv[2]);
std::string output_path(argv[3]);
int hnswM = std::stoi(argv[4]);
int hnswefC = std::stoi(argv[5]);
auto metric_type = get_metric_type_by_name(std::string(argv[6]));
int K1 = std::stoi(argv[7]);
const uint64_t block_size = std::stoul(argv[8]) * PAGESIZE;

if ('/' != *output_path.rbegin())
output_path += '/';
if ('/' != *output_path.rbegin())
output_path += '/';

if (argv[1] == std::string("float")) {
if (MetricType::L2 == metric_type) {
build_bbann<float, float, CMax<float, uint32_t>>
(raw_data_bin_file, output_path, hnswM, hnswefC, metric_type, K1, block_size);
} else if (MetricType::IP == metric_type) {
build_bbann<float, float, CMin<float, uint32_t>>
(raw_data_bin_file, output_path, hnswM, hnswefC, metric_type, K1, block_size);
}
} else if (argv[1] == std::string("uint8")) {
if (MetricType::L2 == metric_type) {
build_bbann<uint8_t, uint32_t, CMax<uint32_t, uint32_t>>
(raw_data_bin_file, output_path, hnswM, hnswefC, metric_type, K1, block_size);
} else if (MetricType::IP == metric_type) {
build_bbann<uint8_t, uint32_t, CMin<uint32_t, uint32_t>>
(raw_data_bin_file, output_path, hnswM, hnswefC, metric_type, K1, block_size);
}
} else if (argv[1] == std::string("int8")) {
if (MetricType::L2 == metric_type) {
build_bbann<int8_t, int32_t, CMax<int32_t, uint32_t>>
(raw_data_bin_file, output_path, hnswM, hnswefC, metric_type, K1, block_size);
} else if (MetricType::IP == metric_type) {
build_bbann<int8_t, int32_t, CMin<int32_t, uint32_t>>
(raw_data_bin_file, output_path, hnswM, hnswefC, metric_type, K1, block_size);
}
if (argv[1] == std::string("float")) {
if (MetricType::L2 == metric_type) {
build_bbann<float, float, CMax<float, uint32_t>>(
raw_data_bin_file, output_path, hnswM, hnswefC, metric_type, K1,
block_size);
} else if (MetricType::IP == metric_type) {
build_bbann<float, float, CMin<float, uint32_t>>(
raw_data_bin_file, output_path, hnswM, hnswefC, metric_type, K1,
block_size);
}
} else if (argv[1] == std::string("uint8")) {
if (MetricType::L2 == metric_type) {
build_bbann<uint8_t, uint32_t, CMax<uint32_t, uint32_t>>(
raw_data_bin_file, output_path, hnswM, hnswefC, metric_type, K1,
block_size);
} else if (MetricType::IP == metric_type) {
build_bbann<uint8_t, uint32_t, CMin<uint32_t, uint32_t>>(
raw_data_bin_file, output_path, hnswM, hnswefC, metric_type, K1,
block_size);
}
} else if (argv[1] == std::string("int8")) {
if (MetricType::L2 == metric_type) {
build_bbann<int8_t, int32_t, CMax<int32_t, uint32_t>>(
raw_data_bin_file, output_path, hnswM, hnswefC, metric_type, K1,
block_size);
} else if (MetricType::IP == metric_type) {
build_bbann<int8_t, int32_t, CMin<int32_t, uint32_t>>(
raw_data_bin_file, output_path, hnswM, hnswefC, metric_type, K1,
block_size);
}
return 0;
}
return 0;
}
125 changes: 65 additions & 60 deletions build_bigann.cpp
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
#include "diskann.h"


// build disk-based index
// build disk-based index
// strategy is hnsw + ivf + pq + refine

/*
* args:
* 1. data type(string): float or uint8 or int8
* 2. raw data binary file(string): file name of raw data, include file path and file name
* 3. index path(string): a string end with '/' denotes the directory that where the index related file locates
* 2. raw data binary file(string): file name of raw data, include file path and
* file name
* 3. index path(string): a string end with '/' denotes the directory that where
* the index related file locates
* 4. M(int): parameter 4 hnsw
* 5. efConstruction(int): parameter 4 hnsw
* 6. PQ.M(int): the number of codebook 4 each vector
Expand All @@ -19,63 +20,67 @@
* 11. PQ type(string): PQ or PQRes
*/

int main(int argc, char** argv) {
if (argc != 12) {
std::cout << "Usage: << " << argv[0]
<< " data_type(float or uint8 or int8)"
<< " binary raw data file"
<< " index output path"
<< " hnsw.M"
<< " hnsw.efConstruction"
<< " PQ.M"
<< " PQ.nbits"
<< " metric type(L2 or IP)"
<< " K1"
<< " bucket split threshold"
<< " quantizer type(PQ | PQRes)"
<< std::endl;
return 1;
}
// parse parameters
std::string raw_data_bin_file(argv[2]);
std::string output_path(argv[3]);
int hnswM = std::stoi(argv[4]);
int hnswefC = std::stoi(argv[5]);
int PQM = std::stoi(argv[6]);
int PQnbits = std::stoi(argv[7]);
auto metric_type = get_metric_type_by_name(std::string(argv[8]));
int K1 = std::stoi(argv[9]);
int threshold = std::stoi(argv[10]);
auto quantizer_type = get_quantizer_type_by_name(std::string(argv[11]));
assert(PQnbits == 8);
int main(int argc, char **argv) {
if (argc != 12) {
std::cout << "Usage: << " << argv[0] << " data_type(float or uint8 or int8)"
<< " binary raw data file"
<< " index output path"
<< " hnsw.M"
<< " hnsw.efConstruction"
<< " PQ.M"
<< " PQ.nbits"
<< " metric type(L2 or IP)"
<< " K1"
<< " bucket split threshold"
<< " quantizer type(PQ | PQRes)" << std::endl;
return 1;
}
// parse parameters
std::string raw_data_bin_file(argv[2]);
std::string output_path(argv[3]);
int hnswM = std::stoi(argv[4]);
int hnswefC = std::stoi(argv[5]);
int PQM = std::stoi(argv[6]);
int PQnbits = std::stoi(argv[7]);
auto metric_type = get_metric_type_by_name(std::string(argv[8]));
int K1 = std::stoi(argv[9]);
int threshold = std::stoi(argv[10]);
auto quantizer_type = get_quantizer_type_by_name(std::string(argv[11]));
assert(PQnbits == 8);

if ('/' != *output_path.rbegin())
output_path += '/';
if ('/' != *output_path.rbegin())
output_path += '/';

if (argv[1] == std::string("float")) {
if (MetricType::L2 == metric_type) {
build_bigann<float, float, CMax<float, uint32_t>>
(raw_data_bin_file, output_path, hnswM, hnswefC, PQM, PQnbits, K1, threshold, metric_type, quantizer_type);
} else if (MetricType::IP == metric_type) {
build_bigann<float, float, CMin<float, uint32_t>>
(raw_data_bin_file, output_path, hnswM, hnswefC, PQM, PQnbits, K1, threshold, metric_type, quantizer_type);
}
} else if (argv[1] == std::string("uint8")) {
if (MetricType::L2 == metric_type) {
build_bigann<uint8_t, uint32_t, CMax<uint32_t, uint32_t>>
(raw_data_bin_file, output_path, hnswM, hnswefC, PQM, PQnbits, K1, threshold, metric_type, quantizer_type);
} else if (MetricType::IP == metric_type) {
build_bigann<uint8_t, uint32_t, CMin<uint32_t, uint32_t>>
(raw_data_bin_file, output_path, hnswM, hnswefC, PQM, PQnbits, K1, threshold, metric_type, quantizer_type);
}
} else if (argv[1] == std::string("int8")) {
if (MetricType::L2 == metric_type) {
build_bigann<int8_t, int32_t, CMax<int32_t, uint32_t>>
(raw_data_bin_file, output_path, hnswM, hnswefC, PQM, PQnbits, K1, threshold, metric_type, quantizer_type);
} else if (MetricType::IP == metric_type) {
build_bigann<int8_t, int32_t, CMin<int32_t, uint32_t>>
(raw_data_bin_file, output_path, hnswM, hnswefC, PQM, PQnbits, K1, threshold, metric_type, quantizer_type);
}
if (argv[1] == std::string("float")) {
if (MetricType::L2 == metric_type) {
build_bigann<float, float, CMax<float, uint32_t>>(
raw_data_bin_file, output_path, hnswM, hnswefC, PQM, PQnbits, K1,
threshold, metric_type, quantizer_type);
} else if (MetricType::IP == metric_type) {
build_bigann<float, float, CMin<float, uint32_t>>(
raw_data_bin_file, output_path, hnswM, hnswefC, PQM, PQnbits, K1,
threshold, metric_type, quantizer_type);
}
} else if (argv[1] == std::string("uint8")) {
if (MetricType::L2 == metric_type) {
build_bigann<uint8_t, uint32_t, CMax<uint32_t, uint32_t>>(
raw_data_bin_file, output_path, hnswM, hnswefC, PQM, PQnbits, K1,
threshold, metric_type, quantizer_type);
} else if (MetricType::IP == metric_type) {
build_bigann<uint8_t, uint32_t, CMin<uint32_t, uint32_t>>(
raw_data_bin_file, output_path, hnswM, hnswefC, PQM, PQnbits, K1,
threshold, metric_type, quantizer_type);
}
} else if (argv[1] == std::string("int8")) {
if (MetricType::L2 == metric_type) {
build_bigann<int8_t, int32_t, CMax<int32_t, uint32_t>>(
raw_data_bin_file, output_path, hnswM, hnswefC, PQM, PQnbits, K1,
threshold, metric_type, quantizer_type);
} else if (MetricType::IP == metric_type) {
build_bigann<int8_t, int32_t, CMin<int32_t, uint32_t>>(
raw_data_bin_file, output_path, hnswM, hnswefC, PQM, PQnbits, K1,
threshold, metric_type, quantizer_type);
}
return 0;
}
return 0;
}
12 changes: 6 additions & 6 deletions install.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env bash
echo "You need sudo to run this script"
echo "I assume the Azure machine using Ubuntu."
echo "==========Start of Installing=========="
sudo apt install git g++ cmake libaio-dev
echo "==========End of Installing=========="
#!/ usr / bin / env bash
echo "You need sudo to run this script" echo
"I assume the Azure machine using Ubuntu." echo
"==========Start of Installing==========" sudo apt install git
g++ cmake libaio -
dev echo "==========End of Installing=========="
Loading

0 comments on commit 3cdbab2

Please sign in to comment.