Skip to content
This repository was archived by the owner on Aug 25, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -109,17 +110,50 @@ public List<Map<String, Object>> fetchData(String query, List<Object> params) {
String collectionName = (String) queryMap.remove("collection-name");
String vectorIndexName = (String) queryMap.remove("index-name");
Map<String, Object> filter = (Map<String, Object>) queryMap.get("filter");
String filterField = filter.keySet().iterator().next();
String filterValue = (String) filter.get(filterField);
SearchRequest vectorSearchRequest;
// if the values in the filter are empty then remove them from the map
log.info("Filter: {}", filter);
if (filter != null) {
filter.entrySet()
.removeIf(
entry ->
entry.getValue() == null
|| entry.getValue().toString().isEmpty());
}
// if (filter == null || filter.isEmpty()) {
// queryMap.remove("filter");
// }
log.info("filter after removing empty values: {}", filter);
if (queryMap.containsKey("filter") && filter != null && !filter.isEmpty()) {
List<SearchQuery> filterQueries = new ArrayList<>();

for (Map.Entry<String, Object> entry : filter.entrySet()) {
String filterField = entry.getKey();
String filterValue = entry.getValue().toString();
filterQueries.add(SearchQuery.match(filterValue).field(filterField));
}

// Perform the vector search on the filtered documents
SearchRequest vectorSearchRequest =
SearchRequest.create(SearchQuery.match(filterValue).field(filterField))
.vectorSearch(
VectorSearch.create(
VectorQuery.create("vector", vector)
.numCandidates(topK)));
// Combine all filter queries into a conjunctive query
SearchQuery searchQuery =
SearchQuery.conjuncts(filterQueries.toArray(new SearchQuery[0]));
// print search query
log.info("Search query: {}", searchQuery);

// Perform the vector search on the filtered documents
vectorSearchRequest =
SearchRequest.create(searchQuery)
.vectorSearch(
VectorSearch.create(
VectorQuery.create("vector", vector)
.numCandidates(topK)));
} else {
// Perform the vector search without any filter
vectorSearchRequest =
SearchRequest.create(
VectorSearch.create(
VectorQuery.create("vector", vector)
.numCandidates(topK)));
}
SearchResult vectorSearchResult =
cluster.search(
bucketName + "." + scopeName + "." + vectorIndexName,
Expand Down Expand Up @@ -161,10 +195,58 @@ public List<Map<String, Object>> fetchData(String query, List<Object> params) {
// remove the embeddings array from the
// output
content.removeKey("vector");
// ensure filter field is = to the query
// filter value
if (content.getString(filterField)
.equals(filterValue)) {
// Ensure all filter fields match their
// corresponding filter values
if (filter != null && !filter.isEmpty()) {
// Ensure all filter fields match their
// corresponding filter values

boolean filtersMatch = true;
for (Map.Entry<String, Object> entry :
filter.entrySet()) {
String field = entry.getKey();
String value =
(String) entry.getValue();
log.info("Filter field: {}", field);
log.info("Filter value: {}", value);
log.info(
"(filter) content value {}",
content.getString(field));
// Ensure the filter field exists in
// the document and isn't ""
if (content.containsKey(field)
&& !content.getString(field)
.isEmpty()
&& !content.getString(field)
.equals(value)) {
filtersMatch = false;
log.info(
"Document {} has {} {} instead of {}",
documentId,
field,
content.getString(
field),
value);
break;
}
}

if (filtersMatch) {
result.put("id", hit.id());

// Calculate and add cosine
// similarity
double cosineSimilarity =
computeCosineSimilarity(
vector, embeddings);
result.put(
"similarity",
cosineSimilarity);
result.putAll(content.toMap());
}
} else {
// If there are no filters, process the
// result directly
result.put("id", hit.id());
// Calculate and add cosine similarity
double cosineSimilarity =
Expand All @@ -174,14 +256,6 @@ public List<Map<String, Object>> fetchData(String query, List<Object> params) {
"similarity", cosineSimilarity);
result.putAll(content.toMap());
}

} else {
log.info(
"Document {} has {} {} instead of {}",
documentId,
filterField,
content.getString(filterField),
filterValue);
}
}
} catch (DocumentNotFoundException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ public CompletableFuture<Void> upsert(Record record, Map<String, Object> context
metadataFunctions.forEach(
(key, evaluator) -> {
Object value = evaluator.evaluate(mutableRecord);
content.put(key, value);
if (value != null) content.put(key, value);
});

// Perform the upsert
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -304,13 +304,12 @@ void testCouchbaseWrite() throws Exception {
"vector": ?,
"topK": 5,
"bucket-name": "testbucket",
"vecPlanId": "12345",
"scope-name": "_default",
"collection-name": "_default",
"index-name": "semantic",
"filter":
{"vecPlanId": "12345"}
}
{ "vecPlanId": "12345"}
}
""";
List<Object> params = List.of(vector);
List<Map<String, Object>> results = implementation.fetchData(query, params);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ elasticsearch = "^8.14.0"
pinecone-client = {extras = ["grpc"], version = "^4.1.1"}
"discord.py" = "^2.3.2"
transformers = "^4.42.1"
couchbase = "^4.3.0"

# workaround for https://github.com/python-poetry/poetry-plugin-export/issues/183
urllib3 = "<2"
Expand Down