diff --git a/langstream-agents/langstream-vector-agents/src/main/java/ai/langstream/agents/vector/couchbase/CouchbaseDataSource.java b/langstream-agents/langstream-vector-agents/src/main/java/ai/langstream/agents/vector/couchbase/CouchbaseDataSource.java index 8a00d994b..dd614169c 100644 --- a/langstream-agents/langstream-vector-agents/src/main/java/ai/langstream/agents/vector/couchbase/CouchbaseDataSource.java +++ b/langstream-agents/langstream-vector-agents/src/main/java/ai/langstream/agents/vector/couchbase/CouchbaseDataSource.java @@ -32,6 +32,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; +import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; @@ -109,17 +110,50 @@ public List> fetchData(String query, List params) { String collectionName = (String) queryMap.remove("collection-name"); String vectorIndexName = (String) queryMap.remove("index-name"); Map filter = (Map) queryMap.get("filter"); - String filterField = filter.keySet().iterator().next(); - String filterValue = (String) filter.get(filterField); + SearchRequest vectorSearchRequest; + // if the values in the filter are empty then remove them from the map + log.info("Filter: {}", filter); + if (filter != null) { + filter.entrySet() + .removeIf( + entry -> + entry.getValue() == null + || entry.getValue().toString().isEmpty()); + } + // if (filter == null || filter.isEmpty()) { + // queryMap.remove("filter"); + // } + log.info("filter after removing empty values: {}", filter); + if (queryMap.containsKey("filter") && filter != null && !filter.isEmpty()) { + List filterQueries = new ArrayList<>(); + + for (Map.Entry entry : filter.entrySet()) { + String filterField = entry.getKey(); + String filterValue = entry.getValue().toString(); + filterQueries.add(SearchQuery.match(filterValue).field(filterField)); + } - // Perform the vector search on the filtered documents - SearchRequest vectorSearchRequest = - SearchRequest.create(SearchQuery.match(filterValue).field(filterField)) - .vectorSearch( - VectorSearch.create( - VectorQuery.create("vector", vector) - .numCandidates(topK))); + // Combine all filter queries into a conjunctive query + SearchQuery searchQuery = + SearchQuery.conjuncts(filterQueries.toArray(new SearchQuery[0])); + // print search query + log.info("Search query: {}", searchQuery); + // Perform the vector search on the filtered documents + vectorSearchRequest = + SearchRequest.create(searchQuery) + .vectorSearch( + VectorSearch.create( + VectorQuery.create("vector", vector) + .numCandidates(topK))); + } else { + // Perform the vector search without any filter + vectorSearchRequest = + SearchRequest.create( + VectorSearch.create( + VectorQuery.create("vector", vector) + .numCandidates(topK))); + } SearchResult vectorSearchResult = cluster.search( bucketName + "." + scopeName + "." + vectorIndexName, @@ -161,10 +195,58 @@ public List> fetchData(String query, List params) { // remove the embeddings array from the // output content.removeKey("vector"); - // ensure filter field is = to the query - // filter value - if (content.getString(filterField) - .equals(filterValue)) { + // Ensure all filter fields match their + // corresponding filter values + if (filter != null && !filter.isEmpty()) { + // Ensure all filter fields match their + // corresponding filter values + + boolean filtersMatch = true; + for (Map.Entry entry : + filter.entrySet()) { + String field = entry.getKey(); + String value = + (String) entry.getValue(); + log.info("Filter field: {}", field); + log.info("Filter value: {}", value); + log.info( + "(filter) content value {}", + content.getString(field)); + // Ensure the filter field exists in + // the document and isn't "" + if (content.containsKey(field) + && !content.getString(field) + .isEmpty() + && !content.getString(field) + .equals(value)) { + filtersMatch = false; + log.info( + "Document {} has {} {} instead of {}", + documentId, + field, + content.getString( + field), + value); + break; + } + } + + if (filtersMatch) { + result.put("id", hit.id()); + + // Calculate and add cosine + // similarity + double cosineSimilarity = + computeCosineSimilarity( + vector, embeddings); + result.put( + "similarity", + cosineSimilarity); + result.putAll(content.toMap()); + } + } else { + // If there are no filters, process the + // result directly result.put("id", hit.id()); // Calculate and add cosine similarity double cosineSimilarity = @@ -174,14 +256,6 @@ public List> fetchData(String query, List params) { "similarity", cosineSimilarity); result.putAll(content.toMap()); } - - } else { - log.info( - "Document {} has {} {} instead of {}", - documentId, - filterField, - content.getString(filterField), - filterValue); } } } catch (DocumentNotFoundException e) { diff --git a/langstream-agents/langstream-vector-agents/src/main/java/ai/langstream/agents/vector/couchbase/CouchbaseWriter.java b/langstream-agents/langstream-vector-agents/src/main/java/ai/langstream/agents/vector/couchbase/CouchbaseWriter.java index 3719f9654..11347e555 100644 --- a/langstream-agents/langstream-vector-agents/src/main/java/ai/langstream/agents/vector/couchbase/CouchbaseWriter.java +++ b/langstream-agents/langstream-vector-agents/src/main/java/ai/langstream/agents/vector/couchbase/CouchbaseWriter.java @@ -150,7 +150,7 @@ public CompletableFuture upsert(Record record, Map context metadataFunctions.forEach( (key, evaluator) -> { Object value = evaluator.evaluate(mutableRecord); - content.put(key, value); + if (value != null) content.put(key, value); }); // Perform the upsert diff --git a/langstream-agents/langstream-vector-agents/src/test/java/ai/langstream/agents/vector/datasource/impl/CouchbaseWriterTest.java b/langstream-agents/langstream-vector-agents/src/test/java/ai/langstream/agents/vector/datasource/impl/CouchbaseWriterTest.java index 06d167bc9..7c41fd077 100644 --- a/langstream-agents/langstream-vector-agents/src/test/java/ai/langstream/agents/vector/datasource/impl/CouchbaseWriterTest.java +++ b/langstream-agents/langstream-vector-agents/src/test/java/ai/langstream/agents/vector/datasource/impl/CouchbaseWriterTest.java @@ -304,13 +304,12 @@ void testCouchbaseWrite() throws Exception { "vector": ?, "topK": 5, "bucket-name": "testbucket", - "vecPlanId": "12345", "scope-name": "_default", "collection-name": "_default", "index-name": "semantic", "filter": - {"vecPlanId": "12345"} - } + { "vecPlanId": "12345"} + } """; List params = List.of(vector); List> results = implementation.fetchData(query, params); diff --git a/langstream-runtime/langstream-runtime-impl/src/main/python/pyproject.toml b/langstream-runtime/langstream-runtime-impl/src/main/python/pyproject.toml index 756fda519..8e7a1b54c 100644 --- a/langstream-runtime/langstream-runtime-impl/src/main/python/pyproject.toml +++ b/langstream-runtime/langstream-runtime-impl/src/main/python/pyproject.toml @@ -77,6 +77,7 @@ elasticsearch = "^8.14.0" pinecone-client = {extras = ["grpc"], version = "^4.1.1"} "discord.py" = "^2.3.2" transformers = "^4.42.1" +couchbase = "^4.3.0" # workaround for https://github.com/python-poetry/poetry-plugin-export/issues/183 urllib3 = "<2"