Skip to content

Commit

Permalink
Refactor: Rename f8 to i8 to match IEEE
Browse files Browse the repository at this point in the history
  • Loading branch information
ashvardanian committed Aug 3, 2023
1 parent 434c1da commit c37f80b
Show file tree
Hide file tree
Showing 23 changed files with 145 additions and 143 deletions.
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ Linux • MacOS • Windows • Docker • WebAssembly
- ✅ SIMD-optimized and [user-defined metrics](#user-defined-functions) with JIT compilation.
- ✅ Variable dimensionality vectors for unique applications, including search over compressed data.
- ✅ Bitwise Tanimoto and Sorensen coefficients for [Genomics and Chemistry applications](#usearch--rdkit--molecular-search).
- ✅ Hardware-agnostic `f16` & `f8` - [half-precision & quarter-precision support](#memory-efficiency-downcasting-and-quantization).
- ✅ Hardware-agnostic `f16` & `i8` - [half-precision & quarter-precision support](#memory-efficiency-downcasting-and-quantization).
-[View large indexes from disk](#disk-based-indexes) without loading into RAM.
- ✅ Space-efficient point-clouds with `uint40_t`, accommodating 4B+ size.
- ✅ Compatible with OpenMP and custom "executors", for fine-grained control over CPU utilization.
Expand Down Expand Up @@ -79,15 +79,15 @@ from usearch.index import Index
index = Index(
ndim=3, # Define the number of dimensions in input vectors
metric='cos', # Choose 'l2sq', 'haversine' or other metric, default = 'ip'
dtype='f32', # Quantize to 'f16' or 'f8' if needed, default = 'f32'
dtype='f32', # Quantize to 'f16' or 'i8' if needed, default = 'f32'
connectivity=16, # Optional: How frequent should the connections in the graph be
expansion_add=128, # Optional: Control the recall of indexing
expansion_search=64, # Optional: Control the quality of search
)

vector = np.array([0.2, 0.6, 0.4])
index.add(42, vector)
matches: Matches = index.search(vector, 10)
matches: SearchResults = index.search(vector, 10)

assert len(index) == 1
assert len(matches) == 1
Expand Down Expand Up @@ -121,10 +121,10 @@ Those, however, are only sometimes reliable, can significantly affect the statis
![USearch uint40_t support](https://github.com/unum-cloud/usearch/blob/main/assets/usearch-neighbor-types.png?raw=true)

Instead, we have focused on high-precision arithmetic over low-precision downcasted vectors.
The same index, and `add` and `search` operations will automatically down-cast or up-cast between `f32_t`, `f16_t`, `f64_t`, and `f8_t` representations, even if the hardware doesn't natively support it.
The same index, and `add` and `search` operations will automatically down-cast or up-cast between `f32_t`, `f16_t`, `f64_t`, and `i8_t` representations, even if the hardware doesn't natively support it.
Continuing the topic of memory efficiency, we provide a `uint40_t` to allow collection with over 4B+ vectors without allocating 8 bytes for every neighbor reference in the proximity graph.

| | FAISS, `f32` | USearch, `f32` | USearch, `f16` | USearch, `f8` |
| | FAISS, `f32` | USearch, `f32` | USearch, `f16` | USearch, `i8` |
| :----------- | -----------: | -------------: | -------------: | ----------------: |
| Batch Insert | 16 K/s | 73 K/s | 100 K/s | 104 K/s **+550%** |
| Batch Search | 82 K/s | 103 K/s | 113 K/s | 134 K/s **+63%** |
Expand Down
8 changes: 4 additions & 4 deletions c/lib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ scalar_kind_t to_native_scalar(usearch_scalar_kind_t kind) {
case usearch_scalar_f32_k: return scalar_kind_t::f32_k;
case usearch_scalar_f64_k: return scalar_kind_t::f64_k;
case usearch_scalar_f16_k: return scalar_kind_t::f16_k;
case usearch_scalar_f8_k: return scalar_kind_t::f8_k;
case usearch_scalar_i8_k: return scalar_kind_t::i8_k;
case usearch_scalar_b1_k: return scalar_kind_t::b1x8_k;
default: return scalar_kind_t::unknown_k;
}
Expand All @@ -52,7 +52,7 @@ add_result_t add_(index_dense_t* index, usearch_key_t key, void const* vector, s
case scalar_kind_t::f32_k: return index->add(key, (f32_t const*)vector);
case scalar_kind_t::f64_k: return index->add(key, (f64_t const*)vector);
case scalar_kind_t::f16_k: return index->add(key, (f16_t const*)vector);
case scalar_kind_t::f8_k: return index->add(key, (f8_bits_t const*)vector);
case scalar_kind_t::i8_k: return index->add(key, (i8_bits_t const*)vector);
case scalar_kind_t::b1x8_k: return index->add(key, (b1x8_t const*)vector);
default: return add_result_t{}.failed("Unknown scalar kind!");
}
Expand All @@ -63,7 +63,7 @@ bool get_(index_dense_t* index, usearch_key_t key, void* vector, scalar_kind_t k
case scalar_kind_t::f32_k: return index->get(key, (f32_t*)vector);
case scalar_kind_t::f64_k: return index->get(key, (f64_t*)vector);
case scalar_kind_t::f16_k: return index->get(key, (f16_t*)vector);
case scalar_kind_t::f8_k: return index->get(key, (f8_bits_t*)vector);
case scalar_kind_t::i8_k: return index->get(key, (i8_bits_t*)vector);
case scalar_kind_t::b1x8_k: return index->get(key, (b1x8_t*)vector);
default: return search_result_t().failed("Unknown scalar kind!");
}
Expand All @@ -74,7 +74,7 @@ search_result_t search_(index_dense_t* index, void const* vector, scalar_kind_t
case scalar_kind_t::f32_k: return index->search((f32_t const*)vector, n);
case scalar_kind_t::f64_k: return index->search((f64_t const*)vector, n);
case scalar_kind_t::f16_k: return index->search((f16_t const*)vector, n);
case scalar_kind_t::f8_k: return index->search((f8_bits_t const*)vector, n);
case scalar_kind_t::i8_k: return index->search((i8_bits_t const*)vector, n);
case scalar_kind_t::b1x8_k: return index->search((b1x8_t const*)vector, n);
default: return search_result_t().failed("Unknown scalar kind!");
}
Expand Down
2 changes: 1 addition & 1 deletion c/usearch.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ USEARCH_EXPORT typedef enum usearch_scalar_kind_t {
usearch_scalar_f32_k = 0,
usearch_scalar_f64_k,
usearch_scalar_f16_k,
usearch_scalar_f8_k,
usearch_scalar_i8_k,
usearch_scalar_b1_k,
usearch_scalar_unknown_k,
} usearch_scalar_kind_t;
Expand Down
8 changes: 4 additions & 4 deletions cpp/bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ struct args_t {
bool big = false;

bool quantize_f16 = false;
bool quantize_f8 = false;
bool quantize_i8 = false;
bool quantize_b1 = false;

bool metric_ip = false;
Expand Down Expand Up @@ -481,8 +481,8 @@ struct args_t {
scalar_kind_t quantization() const noexcept {
if (quantize_f16)
return scalar_kind_t::f16_k;
if (quantize_f8)
return scalar_kind_t::f8_k;
if (quantize_i8)
return scalar_kind_t::i8_k;
if (quantize_b1)
return scalar_kind_t::b1x8_k;
return scalar_kind_t::f32_k;
Expand Down Expand Up @@ -553,7 +553,7 @@ int main(int argc, char** argv) {
(option("--rows-take") & value("integer", args.vectors_to_take)).doc("Number of vectors to take"),
( //
option("-f16", "--f16quant").set(args.quantize_f16).doc("Enable `f16_t` quantization") |
option("-f8", "--f8quant").set(args.quantize_f8).doc("Enable `f8_t` quantization") |
option("-i8", "--i8quant").set(args.quantize_i8).doc("Enable `i8_t` quantization") |
option("-b1", "--b1quant").set(args.quantize_b1).doc("Enable `b1x8_t` quantization")),
( //
option("--ip").set(args.metric_ip).doc("Choose Inner Product metric") |
Expand Down
2 changes: 1 addition & 1 deletion docs/benchmarks.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ The main columns are:
| `f32` x256 | 16 | 128 | 64 | 87'995 | 171'856 | 99.1% |
| `f16` x256 | 16 | 128 | 64 | 87'270 | 153'788 | 98.4% |
| `f16` x256 ✳️ | 16 | 128 | 64 | 71'454 | 132'673 | 98.4% |
| `f8` x256 | 16 | 128 | 64 | 115'923 | 274'653 | 98.9% |
| `i8` x256 | 16 | 128 | 64 | 115'923 | 274'653 | 98.9% |

As seen on the chart, for `f16` quantization, performance may differ depending on native hardware support for that numeric type.
Also worth noting, 8-bit quantization results in almost no quantization loss and may perform better than `f16`.
Expand Down
6 changes: 3 additions & 3 deletions golang/lib.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ const (
f32 Quantization = iota
f16
f64
f8
i8
)

func (a Quantization) String() string {
Expand All @@ -55,8 +55,8 @@ func (a Quantization) String() string {
return "f32"
case f64:
return "f64"
case f8:
return "f8"
case i8:
return "i8"
default:
panic("unknown quantization")
}
Expand Down
20 changes: 10 additions & 10 deletions include/usearch/index_dense.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,13 +295,13 @@ class index_dense_gt {
mutable std::vector<byte_t> cast_buffer_;
struct casts_t {
cast_t from_b1x8;
cast_t from_f8;
cast_t from_i8;
cast_t from_f16;
cast_t from_f32;
cast_t from_f64;

cast_t to_b1x8;
cast_t to_f8;
cast_t to_i8;
cast_t to_f16;
cast_t to_f32;
cast_t to_f64;
Expand Down Expand Up @@ -507,31 +507,31 @@ class index_dense_gt {

// clang-format off
add_result_t add(key_t key, b1x8_t const* vector) { return add_(key, vector, casts_.from_b1x8); }
add_result_t add(key_t key, f8_bits_t const* vector) { return add_(key, vector, casts_.from_f8); }
add_result_t add(key_t key, i8_bits_t const* vector) { return add_(key, vector, casts_.from_i8); }
add_result_t add(key_t key, f16_t const* vector) { return add_(key, vector, casts_.from_f16); }
add_result_t add(key_t key, f32_t const* vector) { return add_(key, vector, casts_.from_f32); }
add_result_t add(key_t key, f64_t const* vector) { return add_(key, vector, casts_.from_f64); }

add_result_t add(key_t key, b1x8_t const* vector, index_dense_update_config_t config) { return add_(key, vector, config, casts_.from_b1x8); }
add_result_t add(key_t key, f8_bits_t const* vector, index_dense_update_config_t config) { return add_(key, vector, config, casts_.from_f8); }
add_result_t add(key_t key, i8_bits_t const* vector, index_dense_update_config_t config) { return add_(key, vector, config, casts_.from_i8); }
add_result_t add(key_t key, f16_t const* vector, index_dense_update_config_t config) { return add_(key, vector, config, casts_.from_f16); }
add_result_t add(key_t key, f32_t const* vector, index_dense_update_config_t config) { return add_(key, vector, config, casts_.from_f32); }
add_result_t add(key_t key, f64_t const* vector, index_dense_update_config_t config) { return add_(key, vector, config, casts_.from_f64); }

search_result_t search(b1x8_t const* vector, std::size_t wanted) const { return search_(vector, wanted, casts_.from_b1x8); }
search_result_t search(f8_bits_t const* vector, std::size_t wanted) const { return search_(vector, wanted, casts_.from_f8); }
search_result_t search(i8_bits_t const* vector, std::size_t wanted) const { return search_(vector, wanted, casts_.from_i8); }
search_result_t search(f16_t const* vector, std::size_t wanted) const { return search_(vector, wanted, casts_.from_f16); }
search_result_t search(f32_t const* vector, std::size_t wanted) const { return search_(vector, wanted, casts_.from_f32); }
search_result_t search(f64_t const* vector, std::size_t wanted) const { return search_(vector, wanted, casts_.from_f64); }

search_result_t search(b1x8_t const* vector, std::size_t wanted, index_search_config_t config) const { return search_(vector, wanted, config, casts_.from_b1x8); }
search_result_t search(f8_bits_t const* vector, std::size_t wanted, index_search_config_t config) const { return search_(vector, wanted, config, casts_.from_f8); }
search_result_t search(i8_bits_t const* vector, std::size_t wanted, index_search_config_t config) const { return search_(vector, wanted, config, casts_.from_i8); }
search_result_t search(f16_t const* vector, std::size_t wanted, index_search_config_t config) const { return search_(vector, wanted, config, casts_.from_f16); }
search_result_t search(f32_t const* vector, std::size_t wanted, index_search_config_t config) const { return search_(vector, wanted, config, casts_.from_f32); }
search_result_t search(f64_t const* vector, std::size_t wanted, index_search_config_t config) const { return search_(vector, wanted, config, casts_.from_f64); }

bool get(key_t key, b1x8_t* vector) const { return get_(key, vector, casts_.to_b1x8); }
bool get(key_t key, f8_bits_t* vector) const { return get_(key, vector, casts_.to_f8); }
bool get(key_t key, i8_bits_t* vector) const { return get_(key, vector, casts_.to_i8); }
bool get(key_t key, f16_t* vector) const { return get_(key, vector, casts_.to_f16); }
bool get(key_t key, f32_t* vector) const { return get_(key, vector, casts_.to_f32); }
bool get(key_t key, f64_t* vector) const { return get_(key, vector, casts_.to_f64); }
Expand Down Expand Up @@ -1297,13 +1297,13 @@ class index_dense_gt {
casts_t result;

result.from_b1x8 = cast_gt<b1x8_t, to_scalar_at>{};
result.from_f8 = cast_gt<f8_bits_t, to_scalar_at>{};
result.from_i8 = cast_gt<i8_bits_t, to_scalar_at>{};
result.from_f16 = cast_gt<f16_t, to_scalar_at>{};
result.from_f32 = cast_gt<f32_t, to_scalar_at>{};
result.from_f64 = cast_gt<f64_t, to_scalar_at>{};

result.to_b1x8 = cast_gt<to_scalar_at, b1x8_t>{};
result.to_f8 = cast_gt<to_scalar_at, f8_bits_t>{};
result.to_i8 = cast_gt<to_scalar_at, i8_bits_t>{};
result.to_f16 = cast_gt<to_scalar_at, f16_t>{};
result.to_f32 = cast_gt<to_scalar_at, f32_t>{};
result.to_f64 = cast_gt<to_scalar_at, f64_t>{};
Expand All @@ -1316,7 +1316,7 @@ class index_dense_gt {
case scalar_kind_t::f64_k: return make_casts_<f64_t>();
case scalar_kind_t::f32_k: return make_casts_<f32_t>();
case scalar_kind_t::f16_k: return make_casts_<f16_t>();
case scalar_kind_t::f8_k: return make_casts_<f8_bits_t>();
case scalar_kind_t::i8_k: return make_casts_<i8_bits_t>();
case scalar_kind_t::b1x8_k: return make_casts_<b1x8_t>();
default: return {};
}
Expand Down
Loading

0 comments on commit c37f80b

Please sign in to comment.