Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
15 changed files
with
158 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
################################################## | ||
# Search index structures and associated methods # | ||
################################################## | ||
""" | ||
IVFADC index type for storing vectors. It is a wrapper around a | ||
`IVFADCIndex` (inverted file system with asymmetric distance computation) | ||
structure and performs a billion-scale search using a distance-based | ||
similarity between vectors. | ||
# References | ||
* [Jègou et al. "Product quantization for nearest neighbor search"](https://hal.inria.fr/file/index/docid/514462/filename/paper_hal.pdf) | ||
* [Baranchuk et al. "Revisiting the inverted indices for billion-scale approximate nearest neighbors"](http://openaccess.thecvf.com/content_ECCV_2018/papers/Dmitry_Baranchuk_Revisiting_the_Inverted_ECCV_2018_paper.pdf) | ||
""" | ||
struct IVFIndex{U,I,Dc,Dr,T,Q} <: AbstractIndex | ||
index::IVFADCIndex{U,I,Dc,Dr,T,Q} | ||
end | ||
|
||
IVFIndex(data::AbstractMatrix; kwargs...) = IVFIndex(IVFADCIndex(data; kwargs...)) | ||
|
||
IVFIndex(data::SparseMatrixCSC{T,I}; kwargs...) where {T<:AbstractFloat, I<:Integer} = | ||
IVFIndex(IVFADCIndex(Matrix{T}(data); kwargs...)) | ||
|
||
|
||
# Nearest neighbor search method | ||
function knn_search(index::IVFIndex{U,I,Dc,Dr,T,Q}, | ||
point::AbstractVector{T}, | ||
k::Int, | ||
keep::Vector{Int}=collect(1:length(index)); | ||
w::Int=1 | ||
) where {U,I,Dc,Dr,T,Q} | ||
# Uses Euclidean distance by default | ||
_idxs, scores = knn_search(index.index, Vector(point), k; w=w) | ||
idxs = Int.(_idxs) .+ 1 | ||
if length(keep) == length(index) | ||
# all data points are valid | ||
return idxs, scores | ||
else | ||
# this bit is slow if 'keep' is large | ||
mask = map(idx->in(idx, keep), idxs) | ||
return idxs[mask], scores[mask] | ||
end | ||
end | ||
|
||
|
||
# Length method | ||
length(index::IVFIndex) = length(index.index) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
@testset "Index: $IndexType" for IndexType in [NaiveIndex, BruteTreeIndex, KDTreeIndex, HNSWIndex, IVFIndex] | ||
data = eltype(1.0)[0 0 0 5 5 5; 0 1 2 10 11 12] | ||
spdata = sparse(data) | ||
point = eltype(data)[5.1, 10] | ||
true_length = size(data, 2) | ||
|
||
if IndexType === IVFIndex | ||
_idxfunc = d->IVFIndex(d; kc=4, k=2, m=1) | ||
idx = _idxfunc(data) | ||
spidx = _idxfunc(spdata) | ||
else | ||
idx = IndexType(data) | ||
spidx = IndexType(data) | ||
end | ||
@test idx isa IndexType | ||
idxs, scores = Garamond.knn_search(idx, point, 10; w=4) | ||
@test idxs isa Vector{Int} && all(i in idxs for i in 1:true_length) | ||
@test scores isa Vector{eltype(data)} | ||
|
||
@test length(idx) == length(spidx) == true_length | ||
|
||
# Test not implemented interface | ||
@test_throws Garamond.IndexOperationException pop!(idx) | ||
@test_throws Garamond.IndexOperationException push!(idx) | ||
@test_throws Garamond.IndexOperationException pushfirst!(idx) | ||
@test_throws Garamond.IndexOperationException popfirst!(idx) | ||
@test_throws Garamond.IndexOperationException Garamond.delete_from_index!(idx, [1,2]) | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters