From 125255f8d3c8f7a9b7b1f0056513b3b538c186a0 Mon Sep 17 00:00:00 2001 From: Michelangelo Partipilo Date: Tue, 29 Apr 2025 15:39:17 +0200 Subject: [PATCH 1/8] Test Fixes and NearText --- src/Weaviate.Client.Tests/Tests.cs | 253 ++++++++++++------- src/Weaviate.Client/CollectionsClient.cs | 22 +- src/Weaviate.Client/Models/WeaviateObject.cs | 2 + src/Weaviate.Client/QueryClient.cs | 16 +- src/Weaviate.Client/gRPC/Search.cs | 66 ++++- 5 files changed, 253 insertions(+), 106 deletions(-) diff --git a/src/Weaviate.Client.Tests/Tests.cs b/src/Weaviate.Client.Tests/Tests.cs index f702b838..f1d5c935 100644 --- a/src/Weaviate.Client.Tests/Tests.cs +++ b/src/Weaviate.Client.Tests/Tests.cs @@ -1,7 +1,7 @@ +using System.Numerics; +using Weaviate.Client.Grpc; using Weaviate.Client.Models; -// using TestData = dynamic; - namespace Weaviate.Client.Tests; internal class TestData @@ -10,141 +10,123 @@ internal class TestData } [Collection("BasicTests")] -public class WeaviateClientTest +public class WeaviateClientTest : IDisposable { - [Fact] - public async ValueTask TestBasicCollectionCreation() + WeaviateClient _weaviate; + + public WeaviateClientTest() { - // Arrange - var vectorizerConfigNone = new VectorConfig + _weaviate = new WeaviateClient(); + } + + async Task> CollectionFactory(string name, string description, IList properties, IDictionary? vectorConfig = null) + { + if (string.IsNullOrEmpty(name)) + { + name = TestContext.Current.TestMethod?.MethodName ?? string.Empty; + } + + ArgumentException.ThrowIfNullOrEmpty(name); + + if (vectorConfig is null) { - Vectorizer = new Dictionary + vectorConfig = new Dictionary { - { "none", new object { } } - }, - VectorIndexType = "hnsw", - }; + { + "default", new VectorConfig { + Vectorizer = new Dictionary { { "none", new { } } }, + VectorIndexType = "hnsw" + } + } + }; + } - var VectorConfigs = new Dictionary + var c = new Collection { - { "default", vectorizerConfigNone } + Name = name, + Description = description, + Properties = properties, + VectorConfig = vectorConfig, }; - // Act - var weaviate = new WeaviateClient(); + await _weaviate.Collections.Delete(name); - var testName = TestContext.Current.TestMethod?.MethodName ?? "TestNameNotFound"; + var collectionClient = await _weaviate.Collections.Create(c); - await weaviate.Collections.Use("TestCollection").Delete(); + return collectionClient; + } - await weaviate.Collections.Create(c => - { - c.Name = "TestCollection"; - c.Description = "Test collection description"; - c.Properties = [Property.Text("Name")]; - c.VectorConfig = VectorConfigs; - }); + async Task> CollectionFactory(string name, string description, IList properties, IDictionary? vectorConfig = null) + { + return await CollectionFactory(name, description, properties, vectorConfig); + } + + [Fact] + public async Task TestBasicCollectionCreation() + { + // Arrange + + // Act + var collectionClient = await CollectionFactory("", "Test collection description", [ + Property.Text("Name") + ]); // Assert - var collection = await weaviate.Collections.Use("TestCollection").Get(); + var collection = await _weaviate.Collections.Use(collectionClient.Name).Get(); Assert.NotNull(collection); - Assert.Equal("TestCollection", collection.Name); + Assert.Equal("TestBasicCollectionCreation", collection.Name); Assert.Equal("Test collection description", collection.Description); } [Fact] public async Task TestBasicObjectCreation() { - var vectorizerConfigNone = new VectorConfig - { - Vectorizer = new Dictionary - { - { "none", new object { } } - }, - VectorIndexType = "hnsw", - }; - - var VectorConfigs = new Dictionary - { - { "default", vectorizerConfigNone } - }; - - var weaviate = new WeaviateClient(); - - // Delete any existing "TestCollection2" class - await weaviate.Collections.Use("TestCollection2").Delete(); - - var collection = await weaviate.Collections.Create(c => - { - c.Name = "TestCollection2"; - c.Description = "Test collection description"; - c.Properties = [Property.Text("Name")]; - c.VectorConfig = VectorConfigs; - }); + // Arrange + var collectionClient = await CollectionFactory("", "Test collection description", [ + Property.Text("Name") + ]); - // Create an object in the collection + // Act var id = Guid.NewGuid(); - var obj = await collection.Data.Insert(new WeaviateObject() + var obj = await collectionClient.Data.Insert(new WeaviateObject() { Data = new TestData { Name = "TestObject" }, ID = id, }); + // Assert + // Assert object exists - var retrieved = await collection.Query.FetchObjectByID(id); + var retrieved = await collectionClient.Query.FetchObjectByID(id); Assert.NotNull(retrieved); - Assert.Equal("TestObject", retrieved.Data?.Name); Assert.Equal(id, retrieved.ID); + Assert.Equal("TestObject", retrieved.Data?.Name); - // delete after usage - await collection.Data.Delete(id); - retrieved = await collection.Query.FetchObjectByID(id); + // Delete after usage + await collectionClient.Data.Delete(id); + retrieved = await collectionClient.Query.FetchObjectByID(id); Assert.Null(retrieved); - } - - + } [Fact] public async Task TestBasicNearVectorSearch() { - var vectorizerConfigNone = new VectorConfig - { - Vectorizer = new Dictionary - { - { "none", new object { } } - }, - VectorIndexType = "hnsw", - }; - - var VectorConfigs = new Dictionary - { - { "default", vectorizerConfigNone } - }; - - var weaviate = new WeaviateClient(); - - // Delete any existing "TestCollection2" class - await weaviate.Collections.Use("TestCollection3").Delete(); - - var collection = await weaviate.Collections.Create(c => - { - c.Name = "TestCollection3"; - c.Description = "Test collection description"; - c.Properties = [Property.Text("Name")]; - c.VectorConfig = VectorConfigs; - }); + // Arrange + var collectionClient = await CollectionFactory("", "Test collection description", [ + Property.Text("Name") + ]); - // Create an object in the collection - await collection.Data.Insert(new WeaviateObject() + // Act + await collectionClient.Data.Insert(new WeaviateObject() { - Data = new TestData { Name = "TestObject1" }, + Data = new { Name = "TestObject1" }, Vectors = new Dictionary> { { "default", new float[] { 0.1f, 0.2f, 0.3f } } } }); - await collection.Data.Insert(new WeaviateObject() + await collectionClient.Data.Insert(new WeaviateObject() { Data = new TestData { Name = "TestObject2" }, Vectors = new Dictionary> @@ -153,7 +135,7 @@ await collection.Data.Insert(new WeaviateObject() } }); - await collection.Data.Insert(new WeaviateObject() + await collectionClient.Data.Insert(new WeaviateObject() { Data = new TestData { Name = "TestObject3" }, Vectors = new Dictionary> @@ -162,8 +144,8 @@ await collection.Data.Insert(new WeaviateObject() } }); - // Assert object exists - var retrieved = collection.Query.NearVector(new float[] { 0.1f, 0.2f, 0.3f }); + // Assert + var retrieved = collectionClient.Query.NearVector(new float[] { 0.1f, 0.2f, 0.3f }); Assert.NotNull(retrieved); await foreach (var obj in retrieved) @@ -173,4 +155,83 @@ await collection.Data.Insert(new WeaviateObject() break; } } + + [Fact] + public async Task TestBasicNearTextGroupBySearch() + { + // Arrange + CollectionClient? collectionClient = await CollectionFactory("", "Test collection description", [ + Property.Text("value") + ], new Dictionary + { + { + "default", new VectorConfig + { + Vectorizer = new Dictionary { { "text2vec-contextionary", new { + vectorizeClassName = false + } } }, + VectorIndexType = "hnsw" + } + } + }); + + Guid[] objects = [ + await collectionClient.Data.Insert(new WeaviateObject() + { + Data = new { Value = "Apple" }, + }), + + await collectionClient.Data.Insert(new WeaviateObject() + { + Data = new { Value = "Mountain climbing" }, + }), + + await collectionClient.Data.Insert(new WeaviateObject() + { + Data = new { Value = "apple cake" }, + }), + + await collectionClient.Data.Insert(new WeaviateObject() + { + Data = new { Value = "cake" }, + }) + ]; + + // Act + var retrieved = await collectionClient.Query.NearText( + "cake", + new GroupByConstraint + { + PropertyName = "value", + NumberOfGroups = 2, + ObjectsPerGroup = 100, + } + ); + + // Assert + Assert.NotNull(retrieved.Item1); + Assert.NotNull(retrieved.Item2); + + var retrievedObjects = retrieved.Item1.ToArray(); + + Assert.Equal(2, retrieved.Item1.Count()); + Assert.Equal(2, retrieved.Item2.Count()); + + var obj = await collectionClient.Query.FetchObjectByID(objects[3]); + Assert.NotNull(obj); + Assert.Equal(objects[3], obj.ID); + Assert.Contains("default", obj.Vectors.Keys); + + Assert.Equal(objects[3], retrievedObjects[0].ID); + Assert.Contains("default", retrievedObjects[0].Vectors.Keys); + Assert.Equal("cake", retrievedObjects[0].BelongsToGroup); + Assert.Equal(objects[2], retrievedObjects[1].ID); + Assert.Contains("default", retrievedObjects[1].Vectors.Keys); + Assert.Equal("apple cake", retrievedObjects[1].BelongsToGroup); + } + + public void Dispose() + { + _weaviate.Dispose(); + } } \ No newline at end of file diff --git a/src/Weaviate.Client/CollectionsClient.cs b/src/Weaviate.Client/CollectionsClient.cs index 6719e262..9ac58d0e 100644 --- a/src/Weaviate.Client/CollectionsClient.cs +++ b/src/Weaviate.Client/CollectionsClient.cs @@ -1,3 +1,4 @@ + namespace Weaviate.Client; public struct CollectionsClient @@ -9,17 +10,27 @@ internal CollectionsClient(WeaviateClient client) _client = client; } - public async Task> Create(Action collectionConfigurator) + public async Task> Create(Models.Collection collection) { - var collection = new Models.Collection(); + var response = await _client.RestClient.CollectionCreate(collection.ToDto()); - collectionConfigurator(collection); + return new CollectionClient(_client, response.ToModel()); + } + public async Task> Create(Models.Collection collection) + { var response = await _client.RestClient.CollectionCreate(collection.ToDto()); return new CollectionClient(_client, response.ToModel()); } + public async Task Delete(string collectionName) + { + ArgumentException.ThrowIfNullOrEmpty(collectionName); + + await _client.RestClient.CollectionDelete(collectionName); + } + public async IAsyncEnumerable List() { var response = await _client.RestClient.CollectionList(); @@ -30,6 +41,11 @@ public async Task> Create(Action Use(string name) + { + return new CollectionClient(_client, name); + } + public CollectionClient Use(string? name = null) { name = name ?? typeof(TData).Name; diff --git a/src/Weaviate.Client/Models/WeaviateObject.cs b/src/Weaviate.Client/Models/WeaviateObject.cs index 23f13e44..002678b5 100644 --- a/src/Weaviate.Client/Models/WeaviateObject.cs +++ b/src/Weaviate.Client/Models/WeaviateObject.cs @@ -61,3 +61,5 @@ public WeaviateObject(CollectionClient? collection = null) : base(collect public WeaviateObject(string collectionName) : base(collectionName) { } } + +public class WeaviateObject : WeaviateObject { } diff --git a/src/Weaviate.Client/QueryClient.cs b/src/Weaviate.Client/QueryClient.cs index 527bb9cb..5f22f72c 100644 --- a/src/Weaviate.Client/QueryClient.cs +++ b/src/Weaviate.Client/QueryClient.cs @@ -55,9 +55,21 @@ public async IAsyncEnumerable> FetchObjectsByIDs(ISet NearText(string text, int? limit = null) + public async Task<(IEnumerable, IDictionary)> NearText(string text, GroupByConstraint groupBy, float? distance = null, + float? certainty = null, uint? limit = null, string[]? fields = null, + string[]? metadata = null) { - return await Task.FromResult(new object { }); + var results = + await _client.GrpcClient.SearchNearTextWithGroupBy( + _collectionClient.Name, + text, + groupBy, + distance: distance, + certainty: certainty, + limit: limit + ); + + return results; } public async IAsyncEnumerable NearVector(float[] vector, float? distance = null, float? certainty = null, uint? limit = null, string[]? fields = null, string[]? metadata = null) diff --git a/src/Weaviate.Client/gRPC/Search.cs b/src/Weaviate.Client/gRPC/Search.cs index 151ada26..91232c47 100644 --- a/src/Weaviate.Client/gRPC/Search.cs +++ b/src/Weaviate.Client/gRPC/Search.cs @@ -58,11 +58,6 @@ internal async Task> FetchObjects(string collection, Filte }).ToList(); } - public string SearchNearText(string text, int? limit = null) - { - return ""; - } - // TODO Find a way to make IntelliSense know that it's either Distance or Certainty, but not both. public async Task> SearchNearVector(string collection, float[] vector, float? distance = null, float? certainty = null, uint? limit = null) { @@ -188,6 +183,67 @@ public async Task> SearchNearVector(string collectio var objects = groups.Values.SelectMany(g => g.Objects).ToList(); + return (objects, groups); + } + + internal async Task<(IEnumerable, IDictionary)> SearchNearTextWithGroupBy(string collection, string query, GroupByConstraint groupBy, float? distance, float? certainty, uint? limit) + { + var request = BaseSearchRequest(collection, filter: null, limit: limit); + + request.GroupBy = new GroupBy() + { + Path = { groupBy.PropertyName }, + NumberOfGroups = Convert.ToInt32(groupBy.NumberOfGroups), + ObjectsPerGroup = Convert.ToInt32(groupBy.ObjectsPerGroup), + }; + + request.NearText = new NearTextSearch + { + Query = { query }, + // Targets = null, + // VectorForTargets = { }, + }; + + if (distance.HasValue) + { + request.NearText.Distance = distance.Value; + } + + if (certainty.HasValue) + { + request.NearText.Certainty = certainty.Value; + } + + + SearchReply? reply = await _grpcClient.SearchAsync(request); + + if (!reply.GroupByResults.Any()) + { + return (new List(), new Dictionary()); + } + + var groupsEnum = reply.GroupByResults.Select(v => new WeaviateGroup() + { + Name = v.Name, + Objects = v.Objects.Select(obj => new WeaviateGroupByObject + { + ID = Guid.Parse(obj.Metadata.Id), + Vector = obj.Metadata.Vector, + Vectors = obj.Metadata.Vectors.ToDictionary(v => v.Name, v => + { + using (var ms = new MemoryStream(v.VectorBytes.ToByteArray())) + { + return ms.FromStream().ToList().AsEnumerable(); + } + }), + Properties = buildObjectFromProperties(obj.Properties.NonRefProps), + BelongsToGroup = v.Name, + }).ToArray() + }); + + var groups = groupsEnum.ToDictionary(k => k.Name, v => v); + var objects = groupsEnum.SelectMany(g => g.Objects); + return (objects, groups); } } \ No newline at end of file From 1624810fb6cc1c257d31100820bc781cc272474c Mon Sep 17 00:00:00 2001 From: Michelangelo Partipilo Date: Tue, 29 Apr 2025 17:26:28 +0200 Subject: [PATCH 2/8] Fix "And" filter --- src/Weaviate.Client/gRPC/Filter.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Weaviate.Client/gRPC/Filter.cs b/src/Weaviate.Client/gRPC/Filter.cs index c3a67580..dd58eb4e 100644 --- a/src/Weaviate.Client/gRPC/Filter.cs +++ b/src/Weaviate.Client/gRPC/Filter.cs @@ -24,7 +24,7 @@ public static class Filter internal static Filters And(IEnumerable filters) => new Filters { - Operator = Filters.Types.Operator.Or, + Operator = Filters.Types.Operator.And, Filters_ = { filters } }; } \ No newline at end of file From 5518e4a0e9ab5f2cb9cdb444cf7f571f6f01f736 Mon Sep 17 00:00:00 2001 From: Michelangelo Partipilo Date: Tue, 29 Apr 2025 17:28:38 +0200 Subject: [PATCH 3/8] Unused namespace --- src/Weaviate.Client.Tests/Tests.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Weaviate.Client.Tests/Tests.cs b/src/Weaviate.Client.Tests/Tests.cs index f1d5c935..dc3606ab 100644 --- a/src/Weaviate.Client.Tests/Tests.cs +++ b/src/Weaviate.Client.Tests/Tests.cs @@ -1,4 +1,3 @@ -using System.Numerics; using Weaviate.Client.Grpc; using Weaviate.Client.Models; From 62e65f77dd175fa5840d45ac93861730f030dbda Mon Sep 17 00:00:00 2001 From: Michelangelo Partipilo Date: Wed, 30 Apr 2025 11:37:23 +0200 Subject: [PATCH 4/8] Update Weaviate version used in tests --- .github/workflows/main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 4585d7ea..c18006fb 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -15,7 +15,7 @@ on: pull_request: env: - WEAVIATE: 1.30.1 + WEAVIATE: 1.30.2 jobs: integration-tests: From c95ca77c6f8f1fc7839575e962ad34670a464d4a Mon Sep 17 00:00:00 2001 From: Michelangelo Partipilo Date: Thu, 1 May 2025 16:42:39 +0200 Subject: [PATCH 5/8] Add reference to System.Linq.Async --- src/Weaviate.Client.Tests/Weaviate.Client.Tests.csproj | 1 + src/Weaviate.Client.Tests/packages.lock.json | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/src/Weaviate.Client.Tests/Weaviate.Client.Tests.csproj b/src/Weaviate.Client.Tests/Weaviate.Client.Tests.csproj index 9412148d..8424c5c6 100644 --- a/src/Weaviate.Client.Tests/Weaviate.Client.Tests.csproj +++ b/src/Weaviate.Client.Tests/Weaviate.Client.Tests.csproj @@ -23,6 +23,7 @@ + diff --git a/src/Weaviate.Client.Tests/packages.lock.json b/src/Weaviate.Client.Tests/packages.lock.json index c6e2005b..07ccff1a 100644 --- a/src/Weaviate.Client.Tests/packages.lock.json +++ b/src/Weaviate.Client.Tests/packages.lock.json @@ -12,6 +12,15 @@ "Microsoft.TestPlatform.TestHost": "17.13.0" } }, + "System.Linq.Async": { + "type": "Direct", + "requested": "[6.0.1, )", + "resolved": "6.0.1", + "contentHash": "0YhHcaroWpQ9UCot3Pizah7ryAzQhNvobLMSxeDIGmnXfkQn8u5owvpOH0K6EVB+z9L7u6Cc4W17Br/+jyttEQ==", + "dependencies": { + "Microsoft.Bcl.AsyncInterfaces": "6.0.0" + } + }, "xunit.runner.visualstudio": { "type": "Direct", "requested": "[3.0.2, )", From fdae334bd48a10c5ea1d984119c20bb7b37dbe89 Mon Sep 17 00:00:00 2001 From: Michelangelo Partipilo Date: Fri, 2 May 2025 10:00:26 +0200 Subject: [PATCH 6/8] Fix example program --- src/Example/Program.cs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/Example/Program.cs b/src/Example/Program.cs index 06cb6ece..4b0be5ea 100644 --- a/src/Example/Program.cs +++ b/src/Example/Program.cs @@ -101,13 +101,15 @@ static async Task Main() { "default", vectorizerConfigNone } }; - collection = await weaviate.Collections.Create(c => + var catCollection = new Collection() { - c.Description = "Lots of Cats of multiple breeds"; - c.Name = "Cat"; - c.Properties = [Property.Text("Name"), Property.Text("Color"), Property.Text("Breed"), Property.Int("Counter")]; - c.VectorConfig = VectorConfigs; - }); + Name = "Cat", + Description = "Lots of Cats of multiple breeds", + Properties = [Property.Text("Name"), Property.Text("Color"), Property.Text("Breed"), Property.Int("Counter")], + VectorConfig = VectorConfigs + }; + + collection = await weaviate.Collections.Create(catCollection); await foreach (var c in weaviate.Collections.List()) { @@ -121,7 +123,7 @@ static async Task Main() Console.WriteLine("Cats to store: " + cats.Count()); foreach (var cat in cats) { - cat.Vectors = new Dictionary> + cat.Vectors = new Dictionary> { { "default", cat.Vector } }; From 14223a195c87646a30e1a1ac1d1742b87bb4ce28 Mon Sep 17 00:00:00 2001 From: Michelangelo Partipilo Date: Fri, 2 May 2025 11:40:22 +0200 Subject: [PATCH 7/8] Types improvements --- src/Weaviate.Client.Tests/Tests.cs | 7 ++- src/Weaviate.Client/DataClient.cs | 1 - src/Weaviate.Client/Extensions.cs | 50 +++++++++++++------ src/Weaviate.Client/Models/GroupBy.cs | 8 +++ src/Weaviate.Client/Models/NearText.cs | 20 ++++++++ src/Weaviate.Client/Models/WeaviateGroupBy.cs | 26 ++++++++++ src/Weaviate.Client/Models/WeaviateObject.cs | 24 +++------ src/Weaviate.Client/QueryClient.cs | 6 +-- .../Rest/Dto/WeaviateGroupBy.cs | 12 ----- .../Rest/Dto/WeaviateObject.cs | 2 +- src/Weaviate.Client/gRPC/Client.cs | 7 +-- src/Weaviate.Client/gRPC/Search.cs | 34 ++++++------- 12 files changed, 125 insertions(+), 72 deletions(-) create mode 100644 src/Weaviate.Client/Models/GroupBy.cs create mode 100644 src/Weaviate.Client/Models/NearText.cs create mode 100644 src/Weaviate.Client/Models/WeaviateGroupBy.cs delete mode 100644 src/Weaviate.Client/Rest/Dto/WeaviateGroupBy.cs diff --git a/src/Weaviate.Client.Tests/Tests.cs b/src/Weaviate.Client.Tests/Tests.cs index dc3606ab..4dcc104b 100644 --- a/src/Weaviate.Client.Tests/Tests.cs +++ b/src/Weaviate.Client.Tests/Tests.cs @@ -1,4 +1,3 @@ -using Weaviate.Client.Grpc; using Weaviate.Client.Models; namespace Weaviate.Client.Tests; @@ -119,7 +118,7 @@ public async Task TestBasicNearVectorSearch() await collectionClient.Data.Insert(new WeaviateObject() { Data = new { Name = "TestObject1" }, - Vectors = new Dictionary> + Vectors = new Dictionary> { { "default", new float[] { 0.1f, 0.2f, 0.3f } } } @@ -128,7 +127,7 @@ await collectionClient.Data.Insert(new WeaviateObject() await collectionClient.Data.Insert(new WeaviateObject() { Data = new TestData { Name = "TestObject2" }, - Vectors = new Dictionary> + Vectors = new Dictionary> { { "default", new float[] { 0.3f, 0.4f, 0.5f } } } @@ -137,7 +136,7 @@ await collectionClient.Data.Insert(new WeaviateObject() await collectionClient.Data.Insert(new WeaviateObject() { Data = new TestData { Name = "TestObject3" }, - Vectors = new Dictionary> + Vectors = new Dictionary> { { "default", new float[] { 0.5f, 0.6f, 0.7f } } } diff --git a/src/Weaviate.Client/DataClient.cs b/src/Weaviate.Client/DataClient.cs index 285c1ab2..9f5b23b8 100644 --- a/src/Weaviate.Client/DataClient.cs +++ b/src/Weaviate.Client/DataClient.cs @@ -1,4 +1,3 @@ -using Weaviate.Client.Grpc; using Weaviate.Client.Models; namespace Weaviate.Client; diff --git a/src/Weaviate.Client/Extensions.cs b/src/Weaviate.Client/Extensions.cs index 36140261..6acc897d 100644 --- a/src/Weaviate.Client/Extensions.cs +++ b/src/Weaviate.Client/Extensions.cs @@ -12,6 +12,23 @@ public static class WeaviateExtensions WriteIndented = true, // For readability }; + public static WeaviateObject ToWeaviateObject(this WeaviateObject data) + { + var obj = (T)BuildConcreteTypeObjectFromProperties(data.Data); + + return new WeaviateObject(data.CollectionName ?? string.Empty) + { + Data = obj, + ID = data.ID, + Additional = data.Additional, + CreationTime = data.CreationTime, + LastUpdateTime = data.LastUpdateTime, + Tenant = data.Tenant, + Vector = data.Vector, + Vectors = data.Vectors, + }; + } + public static WeaviateObject ToWeaviateObject(this Rest.Dto.WeaviateObject data) { return new WeaviateObject(data.Class ?? string.Empty) @@ -86,12 +103,17 @@ public static WeaviateObject ToWeaviateObject(this Rest.Dto.WeaviateObject return props; } + internal static IEnumerable> ToObjects(this IEnumerable> list) + { + return list.Select(ToWeaviateObject); + } + internal static IEnumerable> ToObjects(this IEnumerable list) { return list.Select(ToWeaviateObject); } - internal static Rest.Dto.CollectionGeneric ToDto(this Models.Collection collection) + internal static Rest.Dto.CollectionGeneric ToDto(this Collection collection) { var data = new Rest.Dto.CollectionGeneric() { @@ -124,7 +146,7 @@ internal static Rest.Dto.CollectionGeneric ToDto(this Models.Collection collecti }); } - if (collection.ReplicationConfig is Models.ReplicationConfig rc) + if (collection.ReplicationConfig is ReplicationConfig rc) { data.ReplicationConfig = new Rest.Dto.ReplicationConfig() { @@ -134,7 +156,7 @@ internal static Rest.Dto.CollectionGeneric ToDto(this Models.Collection collecti }; } - if (collection.MultiTenancyConfig is Models.MultiTenancyConfig mtc) + if (collection.MultiTenancyConfig is MultiTenancyConfig mtc) { data.MultiTenancyConfig = new Rest.Dto.MultiTenancyConfig() { @@ -169,27 +191,27 @@ internal static Rest.Dto.CollectionGeneric ToDto(this Models.Collection collecti return data; } - internal static Models.Collection ToModel(this Rest.Dto.CollectionGeneric collection) + internal static Collection ToModel(this Rest.Dto.CollectionGeneric collection) { - return new Models.Collection() + return new Collection() { Name = collection.Class, Description = collection.Description, - Properties = collection.Properties.Select(p => new Models.Property() + Properties = collection.Properties.Select(p => new Property() { Name = p.Name, DataType = p.DataType.ToList() }).ToList(), InvertedIndexConfig = (collection.InvertedIndexConfig is Rest.Dto.InvertedIndexConfig iic) - ? new Models.InvertedIndexConfig() + ? new InvertedIndexConfig() { - Bm25 = iic.Bm25 == null ? null : new Models.BM25Config + Bm25 = iic.Bm25 == null ? null : new BM25Config { B = iic.Bm25.B, K1 = iic.Bm25.K1, }, Stopwords = (iic.Stopwords is Rest.Dto.StopwordConfig swc) - ? new Models.StopwordConfig + ? new StopwordConfig { Additions = swc.Additions, Preset = swc.Preset, @@ -203,14 +225,14 @@ internal static Models.Collection ToModel(this Rest.Dto.CollectionGeneric collec ShardingConfig = collection.ShardingConfig, ModuleConfig = collection.ModuleConfig, ReplicationConfig = (collection.ReplicationConfig is Rest.Dto.ReplicationConfig rc) - ? new Models.ReplicationConfig + ? new ReplicationConfig { AsyncEnabled = rc.AsyncEnabled, Factor = rc.Factor, - DeletionStrategy = (Models.DeletionStrategy?)rc.DeletionStrategy, + DeletionStrategy = (DeletionStrategy?)rc.DeletionStrategy, } : null, MultiTenancyConfig = (collection.MultiTenancyConfig is Rest.Dto.MultiTenancyConfig mtc) - ? new Models.MultiTenancyConfig + ? new MultiTenancyConfig { Enabled = mtc.Enabled, AutoTenantActivation = mtc.AutoTenantActivation, @@ -220,13 +242,13 @@ internal static Models.Collection ToModel(this Rest.Dto.CollectionGeneric collec collection.VectorConfig?.ToList() .ToDictionary( e => e.Key, - e => new Models.VectorConfig + e => new VectorConfig { VectorIndexConfig = e.Value.VectorIndexConfig, VectorIndexType = e.Value.VectorIndexType, Vectorizer = e.Value.Vectorizer, } - ) ?? new Dictionary(), + ) ?? new Dictionary(), Vectorizer = collection.Vectorizer, VectorIndexType = collection.VectorIndexType, VectorIndexConfig = collection.VectorIndexConfig, diff --git a/src/Weaviate.Client/Models/GroupBy.cs b/src/Weaviate.Client/Models/GroupBy.cs new file mode 100644 index 00000000..4993d4a0 --- /dev/null +++ b/src/Weaviate.Client/Models/GroupBy.cs @@ -0,0 +1,8 @@ +namespace Weaviate.Client.Models; + +public record GroupByConstraint +{ + public required string PropertyName { get; set; } + public uint NumberOfGroups { get; set; } + public uint ObjectsPerGroup { get; set; } +} diff --git a/src/Weaviate.Client/Models/NearText.cs b/src/Weaviate.Client/Models/NearText.cs new file mode 100644 index 00000000..e2e5d268 --- /dev/null +++ b/src/Weaviate.Client/Models/NearText.cs @@ -0,0 +1,20 @@ +namespace Weaviate.Client.Models; + +public record Move +{ + public float Force { get; } + public Guid? Objects { get; } + public string? Concepts { get; } + + public Move(float force, Guid? objects = null, string? concepts = null) + { + if (objects is null && string.IsNullOrEmpty(concepts)) + { + throw new ArgumentException("Either objects or concepts need to be given"); + } + + Force = force; + Objects = objects; + Concepts = concepts; + } +} \ No newline at end of file diff --git a/src/Weaviate.Client/Models/WeaviateGroupBy.cs b/src/Weaviate.Client/Models/WeaviateGroupBy.cs new file mode 100644 index 00000000..ca33f8ba --- /dev/null +++ b/src/Weaviate.Client/Models/WeaviateGroupBy.cs @@ -0,0 +1,26 @@ +namespace Weaviate.Client.Models; + +public record WeaviateGroup +{ + public required string Name { get; init; } + public required WeaviateGroupByObject[] Objects { get; init; } = Array.Empty(); +} + +public record WeaviateGroupByObject : WeaviateObject +{ + public required string BelongsToGroup { get; init; } + public WeaviateGroupByObject(string collectionName) : base(collectionName) { } +} + +public record GroupByResult(IEnumerable Objects, IDictionary Groups) +{ + public static implicit operator (IEnumerable Objects, IDictionary Groups)(GroupByResult value) + { + return (value.Objects, value.Groups); + } + + public static implicit operator GroupByResult((IEnumerable Objects, IDictionary Groups) value) + { + return new GroupByResult(value.Objects, value.Groups); + } +} \ No newline at end of file diff --git a/src/Weaviate.Client/Models/WeaviateObject.cs b/src/Weaviate.Client/Models/WeaviateObject.cs index 002678b5..7b239459 100644 --- a/src/Weaviate.Client/Models/WeaviateObject.cs +++ b/src/Weaviate.Client/Models/WeaviateObject.cs @@ -1,6 +1,6 @@ namespace Weaviate.Client.Models; -public class WeaviateObject +public record WeaviateObject { public CollectionClient? Collection { get; } @@ -18,7 +18,7 @@ public class WeaviateObject public string? Tenant { get; set; } - public IDictionary> Vectors { get; set; } = new Dictionary>(); + public IDictionary> Vectors { get; set; } = new Dictionary>(); public WeaviateObject(CollectionClient? collection = null) : this(collection?.Name ?? typeof(TData).Name) { @@ -33,19 +33,6 @@ public WeaviateObject(string collectionName) /// [Obsolete("Use Vectors instead.")] public IList? Vector { get; set; } = new List(); - // { - // get - // { - // return Vectors.ContainsKey("default") ? Vectors["default"] : Vectors["default"] = []; - // } - // set - // { - // if (value != null) - // { - // Vectors["default"] = value; - // } - // } - // } public static IList EmptyVector() @@ -54,7 +41,7 @@ public static IList EmptyVector() } } -public class WeaviateObject : WeaviateObject +public record WeaviateObject : WeaviateObject { [System.Text.Json.Serialization.JsonConstructor] public WeaviateObject(CollectionClient? collection = null) : base(collection) { } @@ -62,4 +49,7 @@ public WeaviateObject(CollectionClient? collection = null) : base(collect public WeaviateObject(string collectionName) : base(collectionName) { } } -public class WeaviateObject : WeaviateObject { } +public record WeaviateObject : WeaviateObject +{ + public WeaviateObject(string collectionName) : base(collectionName) { } +} diff --git a/src/Weaviate.Client/QueryClient.cs b/src/Weaviate.Client/QueryClient.cs index 5f22f72c..7d66e235 100644 --- a/src/Weaviate.Client/QueryClient.cs +++ b/src/Weaviate.Client/QueryClient.cs @@ -55,7 +55,7 @@ public async IAsyncEnumerable> FetchObjectsByIDs(ISet, IDictionary)> NearText(string text, GroupByConstraint groupBy, float? distance = null, + public async Task<(IEnumerable, IDictionary)> NearText(string text, Models.GroupByConstraint groupBy, float? distance = null, float? certainty = null, uint? limit = null, string[]? fields = null, string[]? metadata = null) { @@ -72,7 +72,7 @@ await _client.GrpcClient.SearchNearTextWithGroupBy( return results; } - public async IAsyncEnumerable NearVector(float[] vector, float? distance = null, float? certainty = null, uint? limit = null, string[]? fields = null, string[]? metadata = null) + public async IAsyncEnumerable NearVector(float[] vector, float? distance = null, float? certainty = null, uint? limit = null, string[]? fields = null, string[]? metadata = null) { var results = await _client.GrpcClient.SearchNearVector( @@ -88,7 +88,7 @@ await _client.GrpcClient.SearchNearVector( } } - public async Task<(IEnumerable, IDictionary)> NearVector(float[] vector, GroupByConstraint groupBy, float? distance = null, + public async Task<(IEnumerable, IDictionary)> NearVector(float[] vector, Models.GroupByConstraint groupBy, float? distance = null, float? certainty = null, uint? limit = null, string[]? fields = null, string[]? metadata = null) { diff --git a/src/Weaviate.Client/Rest/Dto/WeaviateGroupBy.cs b/src/Weaviate.Client/Rest/Dto/WeaviateGroupBy.cs deleted file mode 100644 index 1dc03806..00000000 --- a/src/Weaviate.Client/Rest/Dto/WeaviateGroupBy.cs +++ /dev/null @@ -1,12 +0,0 @@ -namespace Weaviate.Client.Rest.Dto; - -public class WeaviateGroup -{ - public string Name { get; set; } - public WeaviateGroupByObject[] Objects { get; set; } -} - -public class WeaviateGroupByObject : WeaviateObject -{ - public string BelongsToGroup { get; set; } -} diff --git a/src/Weaviate.Client/Rest/Dto/WeaviateObject.cs b/src/Weaviate.Client/Rest/Dto/WeaviateObject.cs index 13c8ad89..c1d20568 100644 --- a/src/Weaviate.Client/Rest/Dto/WeaviateObject.cs +++ b/src/Weaviate.Client/Rest/Dto/WeaviateObject.cs @@ -81,5 +81,5 @@ public class WeaviateObject /// Vectors associated with the Object. /// [JsonPropertyName("vectors")] - public IDictionary>? Vectors { get; set; } = new Dictionary>(); + public IDictionary>? Vectors { get; set; } = new Dictionary>(); } \ No newline at end of file diff --git a/src/Weaviate.Client/gRPC/Client.cs b/src/Weaviate.Client/gRPC/Client.cs index 5da9ece4..d85aa480 100644 --- a/src/Weaviate.Client/gRPC/Client.cs +++ b/src/Weaviate.Client/gRPC/Client.cs @@ -45,9 +45,10 @@ private static IList buildListFromListValue(ListValue list) } } - private static object? buildObjectFromProperties(Properties result) + private static ExpandoObject buildObjectFromProperties(Properties result) { - var eo = new ExpandoObject() as IDictionary; + var eoBase = new ExpandoObject(); + var eo = eoBase as IDictionary; foreach (var r in result.Fields) { @@ -96,7 +97,7 @@ private static IList buildListFromListValue(ListValue list) } } - return eo; + return eoBase; } public void Dispose() diff --git a/src/Weaviate.Client/gRPC/Search.cs b/src/Weaviate.Client/gRPC/Search.cs index 91232c47..23db4988 100644 --- a/src/Weaviate.Client/gRPC/Search.cs +++ b/src/Weaviate.Client/gRPC/Search.cs @@ -1,5 +1,5 @@ using Google.Protobuf; -using Weaviate.Client.Rest.Dto; +using Weaviate.Client.Models; using Weaviate.V1; namespace Weaviate.Client.Grpc; @@ -32,7 +32,7 @@ internal SearchRequest BaseSearchRequest(string collection, Filters? filter = nu }; } - internal async Task> FetchObjects(string collection, Filters? filter = null, uint? limit = null) + internal async Task> FetchObjects(string collection, Filters? filter = null, uint? limit = null) { var req = BaseSearchRequest(collection, filter, limit); @@ -43,7 +43,7 @@ internal async Task> FetchObjects(string collection, Filte return []; } - return reply.Results.Select(result => new WeaviateObject + return reply.Results.Select(result => new Models.WeaviateObject(collection) { ID = Guid.Parse(result.Metadata.Id), Vector = result.Metadata.Vector, @@ -51,15 +51,15 @@ internal async Task> FetchObjects(string collection, Filte { using (var ms = new MemoryStream(v.VectorBytes.ToByteArray())) { - return ms.FromStream().ToList().AsEnumerable(); + return (IList)ms.FromStream().ToList(); } }), - Properties = buildObjectFromProperties(result.Properties.NonRefProps), + Data = buildObjectFromProperties(result.Properties.NonRefProps), }).ToList(); } // TODO Find a way to make IntelliSense know that it's either Distance or Certainty, but not both. - public async Task> SearchNearVector(string collection, float[] vector, float? distance = null, float? certainty = null, uint? limit = null) + public async Task> SearchNearVector(string collection, float[] vector, float? distance = null, float? certainty = null, uint? limit = null) { var request = BaseSearchRequest(collection, filter: null, limit: limit); @@ -99,7 +99,7 @@ public async Task> SearchNearVector(string collectio return []; } - return reply.Results.Select(result => new WeaviateObject + return reply.Results.Select(result => new Models.WeaviateObject(collection) { ID = Guid.Parse(result.Metadata.Id), Vector = result.Metadata.Vector, @@ -107,14 +107,14 @@ public async Task> SearchNearVector(string collectio { using (var ms = new MemoryStream(v.VectorBytes.ToByteArray())) { - return ms.FromStream().ToList().AsEnumerable(); + return (IList)ms.FromStream().ToList(); } }), - Properties = buildObjectFromProperties(result.Properties.NonRefProps), + Data = buildObjectFromProperties(result.Properties.NonRefProps), }).ToList(); } - public async Task<(IEnumerable, IDictionary)> SearchNearVectorWithGroupBy(string collection, float[] vector, GroupByConstraint groupBy, float? distance = null, float? certainty = null, uint? limit = null) + public async Task<(IEnumerable, IDictionary)> SearchNearVectorWithGroupBy(string collection, float[] vector, Models.GroupByConstraint groupBy, float? distance = null, float? certainty = null, uint? limit = null) { var request = BaseSearchRequest(collection, filter: null, limit: limit); @@ -164,7 +164,7 @@ public async Task> SearchNearVector(string collectio var groups = reply.GroupByResults.ToDictionary(k => k.Name, v => new WeaviateGroup() { Name = v.Name, - Objects = v.Objects.Select(obj => new WeaviateGroupByObject + Objects = v.Objects.Select(obj => new WeaviateGroupByObject(collection) { ID = Guid.Parse(obj.Metadata.Id), Vector = obj.Metadata.Vector, @@ -172,10 +172,10 @@ public async Task> SearchNearVector(string collectio { using (var ms = new MemoryStream(v.VectorBytes.ToByteArray())) { - return ms.FromStream().ToList().AsEnumerable(); + return (IList)ms.FromStream().ToList(); } }), - Properties = buildObjectFromProperties(obj.Properties.NonRefProps), + Data = buildObjectFromProperties(obj.Properties.NonRefProps), BelongsToGroup = v.Name, }).ToArray() }); @@ -186,7 +186,7 @@ public async Task> SearchNearVector(string collectio return (objects, groups); } - internal async Task<(IEnumerable, IDictionary)> SearchNearTextWithGroupBy(string collection, string query, GroupByConstraint groupBy, float? distance, float? certainty, uint? limit) + internal async Task<(IEnumerable, IDictionary)> SearchNearTextWithGroupBy(string collection, string query, Models.GroupByConstraint groupBy, float? distance, float? certainty, uint? limit) { var request = BaseSearchRequest(collection, filter: null, limit: limit); @@ -225,7 +225,7 @@ public async Task> SearchNearVector(string collectio var groupsEnum = reply.GroupByResults.Select(v => new WeaviateGroup() { Name = v.Name, - Objects = v.Objects.Select(obj => new WeaviateGroupByObject + Objects = v.Objects.Select(obj => new WeaviateGroupByObject(collection) { ID = Guid.Parse(obj.Metadata.Id), Vector = obj.Metadata.Vector, @@ -233,10 +233,10 @@ public async Task> SearchNearVector(string collectio { using (var ms = new MemoryStream(v.VectorBytes.ToByteArray())) { - return ms.FromStream().ToList().AsEnumerable(); + return (IList)ms.FromStream().ToList(); } }), - Properties = buildObjectFromProperties(obj.Properties.NonRefProps), + Data = buildObjectFromProperties(obj.Properties.NonRefProps), BelongsToGroup = v.Name, }).ToArray() }); From 847b9861c5cee245a7aec821c07e90f49b76720a Mon Sep 17 00:00:00 2001 From: Michelangelo Partipilo Date: Fri, 2 May 2025 16:31:25 +0200 Subject: [PATCH 8/8] NearText search --- src/Weaviate.Client.Tests/Tests.cs | 132 ++++++---- src/Weaviate.Client/Extensions.cs | 74 ++++-- src/Weaviate.Client/QueryClient.cs | 37 ++- src/Weaviate.Client/Rest/Client.cs | 9 +- .../Rest/Dto/WeaviateObject.cs | 2 +- src/Weaviate.Client/gRPC/Search.cs | 240 +++++++----------- 6 files changed, 270 insertions(+), 224 deletions(-) diff --git a/src/Weaviate.Client.Tests/Tests.cs b/src/Weaviate.Client.Tests/Tests.cs index 4dcc104b..7b2fcb42 100644 --- a/src/Weaviate.Client.Tests/Tests.cs +++ b/src/Weaviate.Client.Tests/Tests.cs @@ -7,6 +7,11 @@ internal class TestData public string Name { get; set; } = string.Empty; } +internal class TestDataValue +{ + public string Value { get; set; } = string.Empty; +} + [Collection("BasicTests")] public class WeaviateClientTest : IDisposable { @@ -17,6 +22,11 @@ public WeaviateClientTest() _weaviate = new WeaviateClient(); } + public void Dispose() + { + _weaviate.Dispose(); + } + async Task> CollectionFactory(string name, string description, IList properties, IDictionary? vectorConfig = null) { if (string.IsNullOrEmpty(name)) @@ -59,6 +69,14 @@ async Task> CollectionFactory(string name, string desc return await CollectionFactory(name, description, properties, vectorConfig); } + WeaviateObject DataFactory(TData value) + { + return new WeaviateObject() + { + Data = value + }; + } + [Fact] public async Task TestBasicCollectionCreation() { @@ -110,21 +128,21 @@ public async Task TestBasicObjectCreation() public async Task TestBasicNearVectorSearch() { // Arrange - var collectionClient = await CollectionFactory("", "Test collection description", [ + var collectionClient = await CollectionFactory("", "Test collection description", [ Property.Text("Name") ]); // Act - await collectionClient.Data.Insert(new WeaviateObject() + await collectionClient.Data.Insert(new WeaviateObject() { - Data = new { Name = "TestObject1" }, + Data = new TestData { Name = "TestObject1" }, Vectors = new Dictionary> { { "default", new float[] { 0.1f, 0.2f, 0.3f } } } }); - await collectionClient.Data.Insert(new WeaviateObject() + await collectionClient.Data.Insert(new WeaviateObject() { Data = new TestData { Name = "TestObject2" }, Vectors = new Dictionary> @@ -133,7 +151,7 @@ await collectionClient.Data.Insert(new WeaviateObject() } }); - await collectionClient.Data.Insert(new WeaviateObject() + await collectionClient.Data.Insert(new WeaviateObject() { Data = new TestData { Name = "TestObject3" }, Vectors = new Dictionary> @@ -148,52 +166,83 @@ await collectionClient.Data.Insert(new WeaviateObject() await foreach (var obj in retrieved) { - var lobj = obj.ToWeaviateObject(); - Assert.Equal("TestObject1", lobj.Data!.Name); + Assert.Equal("TestObject1", obj.Data!.Name); break; } } [Fact] - public async Task TestBasicNearTextGroupBySearch() + public async Task TestBasicNearTextSearch() { // Arrange - CollectionClient? collectionClient = await CollectionFactory("", "Test collection description", [ + var collectionClient = await CollectionFactory("", "Test collection description", [ Property.Text("value") ], new Dictionary { { "default", new VectorConfig { - Vectorizer = new Dictionary { { "text2vec-contextionary", new { - vectorizeClassName = false - } } }, + Vectorizer = new Dictionary { + { + "text2vec-contextionary", new { + vectorizeClassName = false + } + } + }, VectorIndexType = "hnsw" } } }); - Guid[] objects = [ - await collectionClient.Data.Insert(new WeaviateObject() - { - Data = new { Value = "Apple" }, - }), + string[] values = ["Apple", "Mountain climbing", "apple cake", "cake"]; + var tasks = values.Select(s => new TestDataValue { Value = s }).Select(DataFactory).Select(collectionClient.Data.Insert); + Guid[] guids = await Task.WhenAll(tasks); + var concepts = "hiking"; - await collectionClient.Data.Insert(new WeaviateObject() - { - Data = new { Value = "Mountain climbing" }, - }), + // Act + var retriever = collectionClient.Query.NearText( + "cake", + moveTo: new Move(1.0f, objects: guids[0]), + moveAway: new Move(0.5f, concepts: concepts), + fields: ["value"] + ); + var retrieved = await retriever.ToListAsync(TestContext.Current.CancellationToken); - await collectionClient.Data.Insert(new WeaviateObject() - { - Data = new { Value = "apple cake" }, - }), + // Assert + Assert.NotNull(retrieved); + Assert.Equal(4, retrieved.Count()); + + Assert.Equal(retrieved[0].ID, guids[2]); + Assert.Contains("default", retrieved[0].Vectors.Keys); + Assert.Equal("apple cake", retrieved[0].Data?.Value); + } - await collectionClient.Data.Insert(new WeaviateObject() + [Fact] + public async Task TestBasicNearTextGroupBySearch() + { + // Arrange + CollectionClient? collectionClient = await CollectionFactory("", "Test collection description", [ + Property.Text("value") + ], new Dictionary + { { - Data = new { Value = "cake" }, - }) - ]; + "default", new VectorConfig + { + Vectorizer = new Dictionary { + { + "text2vec-contextionary", new { + vectorizeClassName = false + } + } + }, + VectorIndexType = "hnsw" + } + } + }); + + string[] values = ["Apple", "Mountain climbing", "apple cake", "cake"]; + var tasks = values.Select(s => new { Value = s }).Select(DataFactory).Select(collectionClient.Data.Insert); + Guid[] guids = await Task.WhenAll(tasks); // Act var retrieved = await collectionClient.Query.NearText( @@ -207,29 +256,24 @@ await collectionClient.Data.Insert(new WeaviateObject() ); // Assert - Assert.NotNull(retrieved.Item1); - Assert.NotNull(retrieved.Item2); + Assert.NotNull(retrieved.Objects); + Assert.NotNull(retrieved.Groups); - var retrievedObjects = retrieved.Item1.ToArray(); + var retrievedObjects = retrieved.Objects.ToArray(); - Assert.Equal(2, retrieved.Item1.Count()); - Assert.Equal(2, retrieved.Item2.Count()); + Assert.Equal(2, retrieved.Objects.Count()); + Assert.Equal(2, retrieved.Groups.Count()); - var obj = await collectionClient.Query.FetchObjectByID(objects[3]); + var obj = await collectionClient.Query.FetchObjectByID(guids[3]); Assert.NotNull(obj); - Assert.Equal(objects[3], obj.ID); + Assert.Equal(guids[3], obj.ID); Assert.Contains("default", obj.Vectors.Keys); - Assert.Equal(objects[3], retrievedObjects[0].ID); + Assert.Equal(guids[3], retrievedObjects[0].ID); Assert.Contains("default", retrievedObjects[0].Vectors.Keys); Assert.Equal("cake", retrievedObjects[0].BelongsToGroup); - Assert.Equal(objects[2], retrievedObjects[1].ID); + Assert.Equal(guids[2], retrievedObjects[1].ID); Assert.Contains("default", retrievedObjects[1].Vectors.Keys); Assert.Equal("apple cake", retrievedObjects[1].BelongsToGroup); } - - public void Dispose() - { - _weaviate.Dispose(); - } -} \ No newline at end of file +} diff --git a/src/Weaviate.Client/Extensions.cs b/src/Weaviate.Client/Extensions.cs index 6acc897d..4f656862 100644 --- a/src/Weaviate.Client/Extensions.cs +++ b/src/Weaviate.Client/Extensions.cs @@ -44,28 +44,39 @@ public static WeaviateObject ToWeaviateObject(this Rest.Dto.WeaviateObject }; } - internal static T? BuildConcreteTypeObjectFromProperties(object? data) + public static WeaviateObject ToWeaviateObject(this Models.WeaviateObject data) { - T? props = default; + var obj = (T)BuildConcreteTypeObjectFromProperties(data.Data); + + return new WeaviateObject(data.CollectionName ?? string.Empty) + { + Data = obj, + ID = data.ID, + Additional = data.Additional, + CreationTime = data.CreationTime, + LastUpdateTime = data.LastUpdateTime, + Tenant = data.Tenant, + Vector = data.Vector, + Vectors = data.Vectors, + }; + } + internal static T? BuildConcreteTypeObjectFromProperties(object? data) + { switch (data) { case JsonElement properties: - props = properties.Deserialize(_defaultJsonSerializationOptions); - break; - case IDictionary dict: - props = UnmarshallProperties(dict); - break; + return properties.Deserialize(_defaultJsonSerializationOptions); + case IDictionary dict: + return UnmarshallProperties(dict); case null: - return props; + return default; default: throw new NotSupportedException($"Unsupported type for properties: {data?.GetType()}"); } - - return props; } - private static T? UnmarshallProperties(IDictionary dict) + private static T? UnmarshallProperties(IDictionary dict) { if (dict == null) throw new ArgumentNullException(nameof(dict)); @@ -73,6 +84,26 @@ public static WeaviateObject ToWeaviateObject(this Rest.Dto.WeaviateObject // Create an instance of T using the default constructor var props = Activator.CreateInstance(); + if (typeof(T) == typeof(IDictionary)) + { + var target = (IDictionary)props; + + foreach (var kvp in dict) + { + if (kvp.Value is IDictionary subDict) + { + dynamic? v = UnmarshallProperties(subDict); + + target[Capitalize(kvp.Key)] = v ?? subDict; + } + else + { + target[Capitalize(kvp.Key)] = kvp.Value; + } + } + return props; + } + var type = typeof(T); var properties = type.GetProperties(); @@ -255,15 +286,13 @@ internal static Collection ToModel(this Rest.Dto.CollectionGeneric collection) }; } - internal static IEnumerable FromStream(this Stream stream) where T : struct + internal static IEnumerable FromByteString(this Google.Protobuf.ByteString byteString) where T : struct { - // Ensure the stream is readable and seekable for the Length check - if (!stream.CanRead) - throw new ArgumentException("Stream must be readable.", nameof(stream)); - if (!stream.CanSeek) - throw new ArgumentException("Stream must be seekable to check Length.", nameof(stream)); + using var stream = new MemoryStream(); + + byteString.WriteTo(stream); + stream.Seek(0, SeekOrigin.Begin); // Reset the stream position to the beginning - // Keep the stream open after the reader is disposed using var reader = new BinaryReader(stream); while (stream.Position < stream.Length) @@ -308,12 +337,11 @@ internal static MemoryStream ToStream(this IEnumerable items) where T : st return stream; } - internal static byte[] ToByteArray(this IEnumerable items) where T : struct + internal static Google.Protobuf.ByteString ToByteString(this IEnumerable items) where T : struct { - using (var stream = items.ToStream()) - { - return stream.ToArray(); - } + using var stream = items.ToStream(); + + return Google.Protobuf.ByteString.FromStream(stream); } public static string Capitalize(this string str) diff --git a/src/Weaviate.Client/QueryClient.cs b/src/Weaviate.Client/QueryClient.cs index 7d66e235..09ffccf8 100644 --- a/src/Weaviate.Client/QueryClient.cs +++ b/src/Weaviate.Client/QueryClient.cs @@ -46,21 +46,41 @@ public async IAsyncEnumerable> FetchObjectsByIDs(ISet()) + foreach (var r in list.Select(x => x.ToWeaviateObject())) { - yield return data; + yield return r; } } #endregion #region Search - public async Task<(IEnumerable, IDictionary)> NearText(string text, Models.GroupByConstraint groupBy, float? distance = null, + public async IAsyncEnumerable> NearText(string text, float? distance = null, float? certainty = null, uint? limit = null, string[]? fields = null, + string[]? metadata = null, Move? moveTo = null, Move? moveAway = null) + { + var results = + await _client.GrpcClient.SearchNearText( + _collectionClient.Name, + text, + distance: distance, + certainty: certainty, + limit: limit, + moveTo: moveTo, + moveAway: moveAway + ); + + foreach (var r in results.Select(x => x.ToWeaviateObject())) + { + yield return r; + } + } + + public async Task NearText(string text, Models.GroupByConstraint groupBy, float? distance = null, float? certainty = null, uint? limit = null, string[]? fields = null, string[]? metadata = null) { var results = - await _client.GrpcClient.SearchNearTextWithGroupBy( + await _client.GrpcClient.SearchNearText( _collectionClient.Name, text, groupBy, @@ -72,7 +92,7 @@ await _client.GrpcClient.SearchNearTextWithGroupBy( return results; } - public async IAsyncEnumerable NearVector(float[] vector, float? distance = null, float? certainty = null, uint? limit = null, string[]? fields = null, string[]? metadata = null) + public async IAsyncEnumerable> NearVector(float[] vector, float? distance = null, float? certainty = null, uint? limit = null, string[]? fields = null, string[]? metadata = null) { var results = await _client.GrpcClient.SearchNearVector( @@ -82,18 +102,19 @@ await _client.GrpcClient.SearchNearVector( certainty: certainty, limit: limit ); + foreach (var r in results) { - yield return r; + yield return r.ToWeaviateObject(); } } - public async Task<(IEnumerable, IDictionary)> NearVector(float[] vector, Models.GroupByConstraint groupBy, float? distance = null, + public async Task NearVector(float[] vector, GroupByConstraint groupBy, float? distance = null, float? certainty = null, uint? limit = null, string[]? fields = null, string[]? metadata = null) { var results = - await _client.GrpcClient.SearchNearVectorWithGroupBy( + await _client.GrpcClient.SearchNearVector( _collectionClient.Name, vector, groupBy, diff --git a/src/Weaviate.Client/Rest/Client.cs b/src/Weaviate.Client/Rest/Client.cs index df562410..70523391 100644 --- a/src/Weaviate.Client/Rest/Client.cs +++ b/src/Weaviate.Client/Rest/Client.cs @@ -124,7 +124,8 @@ private void ValidateResponseStatusCode(HttpResponseMessage response, ExpectedSt } } - internal async Task GetAsync(string requestUri, ExpectedStatusCodes expectedStatusCodes){ + internal async Task GetAsync(string requestUri, ExpectedStatusCodes expectedStatusCodes) + { var response = await _httpClient.GetAsync(requestUri); ValidateResponseStatusCode(response, expectedStatusCodes); @@ -132,7 +133,8 @@ internal async Task GetAsync(string requestUri, ExpectedSta return response; } - internal async Task DeleteAsync(string requestUri, ExpectedStatusCodes expectedStatusCodes){ + internal async Task DeleteAsync(string requestUri, ExpectedStatusCodes expectedStatusCodes) + { var response = await _httpClient.DeleteAsync(requestUri); ValidateResponseStatusCode(response, expectedStatusCodes); @@ -140,7 +142,8 @@ internal async Task DeleteAsync(string requestUri, Expected return response; } - internal async Task PostAsJsonAsync(string? requestUri, TValue value, ExpectedStatusCodes expectedStatusCodes){ + internal async Task PostAsJsonAsync(string? requestUri, TValue value, ExpectedStatusCodes expectedStatusCodes) + { var response = await _httpClient.PostAsJsonAsync(requestUri, value); ValidateResponseStatusCode(response, expectedStatusCodes); diff --git a/src/Weaviate.Client/Rest/Dto/WeaviateObject.cs b/src/Weaviate.Client/Rest/Dto/WeaviateObject.cs index c1d20568..316d0ce4 100644 --- a/src/Weaviate.Client/Rest/Dto/WeaviateObject.cs +++ b/src/Weaviate.Client/Rest/Dto/WeaviateObject.cs @@ -6,7 +6,7 @@ namespace Weaviate.Client.Rest.Dto; /// /// Represents a Weaviate object with its associated properties and metadata. /// -public class WeaviateObject +public record WeaviateObject { /// /// Additional properties associated with the object. diff --git a/src/Weaviate.Client/gRPC/Search.cs b/src/Weaviate.Client/gRPC/Search.cs index 23db4988..266e8886 100644 --- a/src/Weaviate.Client/gRPC/Search.cs +++ b/src/Weaviate.Client/gRPC/Search.cs @@ -1,19 +1,13 @@ -using Google.Protobuf; using Weaviate.Client.Models; using Weaviate.V1; -namespace Weaviate.Client.Grpc; +using WeaviateObject = Weaviate.Client.Models.WeaviateObject; -public class GroupByConstraint -{ - public required string PropertyName { get; set; } - public uint NumberOfGroups { get; set; } - public uint ObjectsPerGroup { get; set; } -} +namespace Weaviate.Client.Grpc; public partial class WeaviateGrpcClient { - internal SearchRequest BaseSearchRequest(string collection, Filters? filter = null, uint? limit = null) + internal SearchRequest BaseSearchRequest(string collection, Filters? filter = null, uint? limit = null, GroupByConstraint? groupBy = null) { return new SearchRequest() { @@ -23,6 +17,12 @@ internal SearchRequest BaseSearchRequest(string collection, Filters? filter = nu Uses125Api = true, Uses127Api = true, Limit = limit ?? 0, + GroupBy = groupBy is not null ? new GroupBy() + { + Path = { groupBy.PropertyName }, + NumberOfGroups = Convert.ToInt32(groupBy.NumberOfGroups), + ObjectsPerGroup = Convert.ToInt32(groupBy.ObjectsPerGroup), + } : null, Metadata = new MetadataRequest() { Uuid = true, @@ -32,12 +32,8 @@ internal SearchRequest BaseSearchRequest(string collection, Filters? filter = nu }; } - internal async Task> FetchObjects(string collection, Filters? filter = null, uint? limit = null) + private static IEnumerable BuildResult(string collection, SearchReply reply) { - var req = BaseSearchRequest(collection, filter, limit); - - SearchReply? reply = await _grpcClient.SearchAsync(req); - if (!reply.Results.Any()) { return []; @@ -47,88 +43,82 @@ internal SearchRequest BaseSearchRequest(string collection, Filters? filter = nu { ID = Guid.Parse(result.Metadata.Id), Vector = result.Metadata.Vector, - Vectors = result.Metadata.Vectors.ToDictionary(v => v.Name, v => - { - using (var ms = new MemoryStream(v.VectorBytes.ToByteArray())) - { - return (IList)ms.FromStream().ToList(); - } - }), + Vectors = result.Metadata.Vectors.ToDictionary(v => v.Name, v => (IList)v.VectorBytes.FromByteString().ToList()), Data = buildObjectFromProperties(result.Properties.NonRefProps), - }).ToList(); + }); } - // TODO Find a way to make IntelliSense know that it's either Distance or Certainty, but not both. - public async Task> SearchNearVector(string collection, float[] vector, float? distance = null, float? certainty = null, uint? limit = null) + private static Models.GroupByResult BuildGroupByResult(string collection, SearchReply reply) { - var request = BaseSearchRequest(collection, filter: null, limit: limit); + if (!reply.GroupByResults.Any()) + { + return (new List(), new Dictionary()); + } + + var groups = reply.GroupByResults.ToDictionary(k => k.Name, v => new WeaviateGroup() + { + Name = v.Name, + Objects = v.Objects.Select(obj => new WeaviateGroupByObject(collection) + { + ID = Guid.Parse(obj.Metadata.Id), + Vector = obj.Metadata.Vector, + Vectors = obj.Metadata.Vectors.ToDictionary(v => v.Name, v => (IList)v.VectorBytes.FromByteString().ToList()), + Data = buildObjectFromProperties(obj.Properties.NonRefProps), + BelongsToGroup = v.Name, + }).ToArray() + }); - var vectorStream = vector.ToStream(); - var vectorBytes = ByteString.FromStream(stream: vectorStream); - vectorStream.Dispose(); + var objects = groups.Values.SelectMany(g => g.Objects).ToList(); - request.NearVector = new NearVector + return (objects, groups); + } + + private static void BuildNearText(string query, double? distance, double? certainty, SearchRequest request, Move? moveTo, Move? moveAway) + { + request.NearText = new NearTextSearch { - Vector = { vector }, - Vectors = { - new Vectors { - Name = "default", - Type = Vectors.Types.VectorType.SingleFp32, - VectorBytes = vectorBytes, - } - }, + Query = { query }, // Targets = null, // VectorForTargets = { }, }; - if (distance.HasValue) + if (moveTo is not null) { - request.NearVector.Distance = distance.Value; + var uuids = moveTo.Objects is null ? [] : (new Guid?[] { moveTo.Objects }).Select(x => x.ToString()); + var concepts = moveTo.Concepts is null ? new string[] { } : [moveTo.Concepts]; + request.NearText.MoveTo = new NearTextSearch.Types.Move + { + Uuids = { uuids }, + Concepts = { concepts }, + Force = moveTo.Force, + }; } - if (certainty.HasValue) + if (moveAway is not null) { - request.NearVector.Certainty = certainty.Value; + var uuids = moveAway.Objects is null ? [] : (new Guid?[] { moveAway.Objects }).Select(x => x.ToString()); + var concepts = moveAway.Concepts is null ? new string[] { } : [moveAway.Concepts]; + request.NearText.MoveAway = new NearTextSearch.Types.Move + { + Uuids = { uuids }, + Concepts = { concepts }, + Force = moveAway.Force, + }; } - - SearchReply? reply = await _grpcClient.SearchAsync(request); - - if (!reply.Results.Any()) + if (distance is not null) { - return []; + request.NearText.Distance = distance.Value; } - return reply.Results.Select(result => new Models.WeaviateObject(collection) + if (certainty.HasValue) { - ID = Guid.Parse(result.Metadata.Id), - Vector = result.Metadata.Vector, - Vectors = result.Metadata.Vectors.ToDictionary(v => v.Name, v => - { - using (var ms = new MemoryStream(v.VectorBytes.ToByteArray())) - { - return (IList)ms.FromStream().ToList(); - } - }), - Data = buildObjectFromProperties(result.Properties.NonRefProps), - }).ToList(); + request.NearText.Certainty = certainty.Value; + } } - public async Task<(IEnumerable, IDictionary)> SearchNearVectorWithGroupBy(string collection, float[] vector, Models.GroupByConstraint groupBy, float? distance = null, float? certainty = null, uint? limit = null) + private static void BuildNearVector(float[] vector, float? distance, float? certainty, SearchRequest request) { - var request = BaseSearchRequest(collection, filter: null, limit: limit); - - var vectorStream = vector.ToStream(); - var vectorBytes = ByteString.FromStream(stream: vectorStream); - vectorStream.Dispose(); - - request.GroupBy = new GroupBy() - { - Path = { groupBy.PropertyName }, - NumberOfGroups = Convert.ToInt32(groupBy.NumberOfGroups), - ObjectsPerGroup = Convert.ToInt32(groupBy.ObjectsPerGroup), - }; - request.NearVector = new NearVector { Vector = { vector }, @@ -136,7 +126,7 @@ internal SearchRequest BaseSearchRequest(string collection, Filters? filter = nu new Vectors { Name = "default", Type = Vectors.Types.VectorType.SingleFp32, - VectorBytes = vectorBytes, + VectorBytes = vector.ToByteString(), } }, // Targets = null, @@ -152,98 +142,58 @@ internal SearchRequest BaseSearchRequest(string collection, Filters? filter = nu { request.NearVector.Certainty = certainty.Value; } + } + internal async Task> FetchObjects(string collection, Filters? filter = null, uint? limit = null) + { + var req = BaseSearchRequest(collection, filter, limit); - SearchReply? reply = await _grpcClient.SearchAsync(request); + SearchReply? reply = await _grpcClient.SearchAsync(req); - if (!reply.GroupByResults.Any()) - { - return (new List(), new Dictionary()); - } + return BuildResult(collection, reply); + } - var groups = reply.GroupByResults.ToDictionary(k => k.Name, v => new WeaviateGroup() - { - Name = v.Name, - Objects = v.Objects.Select(obj => new WeaviateGroupByObject(collection) - { - ID = Guid.Parse(obj.Metadata.Id), - Vector = obj.Metadata.Vector, - Vectors = obj.Metadata.Vectors.ToDictionary(v => v.Name, v => - { - using (var ms = new MemoryStream(v.VectorBytes.ToByteArray())) - { - return (IList)ms.FromStream().ToList(); - } - }), - Data = buildObjectFromProperties(obj.Properties.NonRefProps), - BelongsToGroup = v.Name, - }).ToArray() - }); + public async Task> SearchNearVector(string collection, float[] vector, float? distance = null, float? certainty = null, uint? limit = null) + { + var request = BaseSearchRequest(collection, filter: null, limit: limit); - var objects = groups.Values.SelectMany(g => g.Objects).ToList(); + BuildNearVector(vector, distance, certainty, request); + SearchReply? reply = await _grpcClient.SearchAsync(request); - return (objects, groups); + return BuildResult(collection, reply); } - internal async Task<(IEnumerable, IDictionary)> SearchNearTextWithGroupBy(string collection, string query, Models.GroupByConstraint groupBy, float? distance, float? certainty, uint? limit) + internal async Task> SearchNearText(string collection, string query, float? distance, float? certainty, uint? limit, Move? moveTo, Move? moveAway) { var request = BaseSearchRequest(collection, filter: null, limit: limit); - request.GroupBy = new GroupBy() - { - Path = { groupBy.PropertyName }, - NumberOfGroups = Convert.ToInt32(groupBy.NumberOfGroups), - ObjectsPerGroup = Convert.ToInt32(groupBy.ObjectsPerGroup), - }; + BuildNearText(query, distance, certainty, request, moveTo, moveAway); - request.NearText = new NearTextSearch - { - Query = { query }, - // Targets = null, - // VectorForTargets = { }, - }; + SearchReply? reply = await _grpcClient.SearchAsync(request); - if (distance.HasValue) - { - request.NearText.Distance = distance.Value; - } + return BuildResult(collection, reply); + } - if (certainty.HasValue) - { - request.NearText.Certainty = certainty.Value; - } + public async Task SearchNearVector(string collection, float[] vector, GroupByConstraint groupBy, float? distance = null, float? certainty = null, uint? limit = null) + { + var request = BaseSearchRequest(collection, filter: null, limit: limit, groupBy: groupBy); + BuildNearVector(vector, distance, certainty, request); SearchReply? reply = await _grpcClient.SearchAsync(request); - if (!reply.GroupByResults.Any()) - { - return (new List(), new Dictionary()); - } + return BuildGroupByResult(collection, reply); + } - var groupsEnum = reply.GroupByResults.Select(v => new WeaviateGroup() - { - Name = v.Name, - Objects = v.Objects.Select(obj => new WeaviateGroupByObject(collection) - { - ID = Guid.Parse(obj.Metadata.Id), - Vector = obj.Metadata.Vector, - Vectors = obj.Metadata.Vectors.ToDictionary(v => v.Name, v => - { - using (var ms = new MemoryStream(v.VectorBytes.ToByteArray())) - { - return (IList)ms.FromStream().ToList(); - } - }), - Data = buildObjectFromProperties(obj.Properties.NonRefProps), - BelongsToGroup = v.Name, - }).ToArray() - }); + internal async Task SearchNearText(string collection, string query, GroupByConstraint groupBy, float? distance, float? certainty, uint? limit) + { + var request = BaseSearchRequest(collection, filter: null, limit: limit, groupBy: groupBy); - var groups = groupsEnum.ToDictionary(k => k.Name, v => v); - var objects = groupsEnum.SelectMany(g => g.Objects); + BuildNearText(query, distance, certainty, request, moveTo: null, moveAway: null); - return (objects, groups); + SearchReply? reply = await _grpcClient.SearchAsync(request); + + return BuildGroupByResult(collection, reply); } -} \ No newline at end of file +}