diff --git a/src/it/java/io/weaviate/integration/AggregationITest.java b/src/it/java/io/weaviate/integration/AggregationITest.java index e29da810f..19d8f3460 100644 --- a/src/it/java/io/weaviate/integration/AggregationITest.java +++ b/src/it/java/io/weaviate/integration/AggregationITest.java @@ -35,7 +35,7 @@ public static void beforeAll() throws IOException { .properties( Property.text("category"), Property.integer("price")) - .vectors(Vectorizers.none())); + .vectors(Vectorizers.selfProvided())); var things = client.collections.use(COLLECTION); for (var category : List.of("Shoes", "Hat", "Jacket")) { diff --git a/src/it/java/io/weaviate/integration/CollectionsITest.java b/src/it/java/io/weaviate/integration/CollectionsITest.java index 072bb30cb..732f63c95 100644 --- a/src/it/java/io/weaviate/integration/CollectionsITest.java +++ b/src/it/java/io/weaviate/integration/CollectionsITest.java @@ -18,7 +18,7 @@ import io.weaviate.client6.v1.api.collections.config.Shard; import io.weaviate.client6.v1.api.collections.config.ShardStatus; import io.weaviate.client6.v1.api.collections.vectorindex.Hnsw; -import io.weaviate.client6.v1.api.collections.vectorizers.NoneVectorizer; +import io.weaviate.client6.v1.api.collections.vectorizers.SelfProvidedVectorizer; import io.weaviate.containers.Container; public class CollectionsITest extends ConcurrentTest { @@ -30,7 +30,7 @@ public void testCreateGetDelete() throws IOException { client.collections.create(collectionName, col -> col .properties(Property.text("username"), Property.integer("age")) - .vectors(Vectorizers.none())); + .vectors(Vectorizers.selfProvided())); var thingsCollection = client.collections.getConfig(collectionName); @@ -40,7 +40,7 @@ public void testCreateGetDelete() throws IOException { .as("default vector").extractingByKey("default") .satisfies(defaultVector -> { Assertions.assertThat(defaultVector) - .as("has none vectorizer").isInstanceOf(NoneVectorizer.class); + .as("has none vectorizer").isInstanceOf(SelfProvidedVectorizer.class); Assertions.assertThat(defaultVector).extracting(Vectorizer::vectorIndex) .isInstanceOf(Hnsw.class); }); diff --git a/src/it/java/io/weaviate/integration/DataITest.java b/src/it/java/io/weaviate/integration/DataITest.java index cac817c17..3bc1921c0 100644 --- a/src/it/java/io/weaviate/integration/DataITest.java +++ b/src/it/java/io/weaviate/integration/DataITest.java @@ -117,7 +117,7 @@ private static void createTestCollections() throws IOException { Property.integer("age")) .references( Property.reference("hasAwards", awardsGrammy, awardsOscar)) - .vectors(Vectorizers.none(VECTOR_INDEX))); + .vectors(Vectorizers.selfProvided(VECTOR_INDEX))); } @Test @@ -233,7 +233,7 @@ public void testUpdate() throws IOException { collection -> collection .properties(Property.text("title"), Property.integer("year")) .references(Property.reference("writtenBy", nsAuthors)) - .vectors(Vectorizers.none())); + .vectors(Vectorizers.selfProvided())); var authors = client.collections.use(nsAuthors); var walter = authors.data.insert(Map.of("name", "walter scott")); diff --git a/src/it/java/io/weaviate/integration/SearchITest.java b/src/it/java/io/weaviate/integration/SearchITest.java index 2d5d78bb9..057501844 100644 --- a/src/it/java/io/weaviate/integration/SearchITest.java +++ b/src/it/java/io/weaviate/integration/SearchITest.java @@ -132,7 +132,7 @@ private static Map populateTest(int n) throws IOException { private static void createTestCollection() throws IOException { client.collections.create(COLLECTION, cfg -> cfg .properties(Property.text("category")) - .vectors(Vectorizers.none(VECTOR_INDEX))); + .vectors(Vectorizers.selfProvided(VECTOR_INDEX))); } @Test diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/ObjectMetadata.java b/src/main/java/io/weaviate/client6/v1/api/collections/ObjectMetadata.java index 6732b5256..db21e34a5 100644 --- a/src/main/java/io/weaviate/client6/v1/api/collections/ObjectMetadata.java +++ b/src/main/java/io/weaviate/client6/v1/api/collections/ObjectMetadata.java @@ -25,17 +25,20 @@ public static class Builder implements ObjectBuilder { private String uuid; private Vectors vectors; + /** Assign a custom UUID for the object. */ public Builder uuid(UUID uuid) { return uuid(uuid.toString()); } + /** Assign a custom UUID for the object. */ public Builder uuid(String uuid) { this.uuid = uuid; return this; } - public Builder vectors(Vectors vectors) { - this.vectors = vectors; + /** Attach custom vectors to the object.. */ + public Builder vectors(Vectors... vectors) { + this.vectors = new Vectors(vectors); return this; } diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/Vectorizer.java b/src/main/java/io/weaviate/client6/v1/api/collections/Vectorizer.java index b0cc27d2e..fed6d21d3 100644 --- a/src/main/java/io/weaviate/client6/v1/api/collections/Vectorizer.java +++ b/src/main/java/io/weaviate/client6/v1/api/collections/Vectorizer.java @@ -16,7 +16,7 @@ import io.weaviate.client6.v1.api.collections.vectorizers.Img2VecNeuralVectorizer; import io.weaviate.client6.v1.api.collections.vectorizers.Multi2VecClipVectorizer; -import io.weaviate.client6.v1.api.collections.vectorizers.NoneVectorizer; +import io.weaviate.client6.v1.api.collections.vectorizers.SelfProvidedVectorizer; import io.weaviate.client6.v1.api.collections.vectorizers.Text2VecContextionaryVectorizer; import io.weaviate.client6.v1.api.collections.vectorizers.Text2VecWeaviateVectorizer; import io.weaviate.client6.v1.internal.json.JsonEnum; @@ -63,7 +63,7 @@ private final void addAdapter(Gson gson, Vectorizer.Kind kind, Class none() { - return none(VectorIndex.DEFAULT_VECTOR_NAME); + /** Create a bring-your-own-vector vector index. */ + public static Map.Entry selfProvided() { + return selfProvided(VectorIndex.DEFAULT_VECTOR_NAME); } - public static Map.Entry none( - Function> fn) { - return none(VectorIndex.DEFAULT_VECTOR_NAME, fn); + /** + * Create a bring-your-own-vector vector index. + * + * @param fn Lambda expression for optional parameters. + */ + public static Map.Entry selfProvided( + Function> fn) { + return selfProvided(VectorIndex.DEFAULT_VECTOR_NAME, fn); } - public static Map.Entry none(String vectorName) { - return Map.entry(vectorName, NoneVectorizer.of()); + /** + * Create a named bring-your-own-vector vector index. + * + * @param vectorName Vector name. + */ + public static Map.Entry selfProvided(String vectorName) { + return Map.entry(vectorName, SelfProvidedVectorizer.of()); } - public static Map.Entry none(String vectorName, - Function> fn) { - return Map.entry(vectorName, NoneVectorizer.of(fn)); + /** + * Create a named bring-your-own-vector vector index. + * + * @param vectorName Vector name. + * @param fn Lambda expression for optional parameters. + */ + public static Map.Entry selfProvided(String vectorName, + Function> fn) { + return Map.entry(vectorName, SelfProvidedVectorizer.of(fn)); } + /** Create a vector index with an {@code img2vec-neural} vectorizer. */ public static Map.Entry img2vecNeural() { return img2vecNeural(VectorIndex.DEFAULT_VECTOR_NAME); } + /** + * Create a vector index with an {@code img2vec-neural} vectorizer. + * + * @param fn Lambda expression for optional parameters. + */ public static Map.Entry img2vecNeural( Function> fn) { return img2vecNeural(VectorIndex.DEFAULT_VECTOR_NAME, fn); } + /** + * Create a named vector index with an {@code img2vec-neural} vectorizer. + * + * @param vectorName Vector name. + */ public static Map.Entry img2vecNeural(String vectorName) { return Map.entry(vectorName, Img2VecNeuralVectorizer.of()); } + /** + * Create a vector index with an {@code img2vec-neural} vectorizer. + * + * @param vectorName Vector name. + * @param fn Lambda expression for optional parameters. + */ public static Map.Entry img2vecNeural(String vectorName, Function> fn) { return Map.entry(vectorName, Img2VecNeuralVectorizer.of(fn)); } + /** Create a vector index with an {@code multi2vec-clip} vectorizer. */ public static Map.Entry multi2vecClip() { return multi2vecClip(VectorIndex.DEFAULT_VECTOR_NAME); } + /** + * Create a vector index with an {@code multi2vec-clip} vectorizer. + * + * @param fn Lambda expression for optional parameters. + */ public static Map.Entry multi2vecClip( Function> fn) { return multi2vecClip(VectorIndex.DEFAULT_VECTOR_NAME, fn); } + /** + * Create a named vector index with an {@code multi2vec-clip} vectorizer. + * + * @param vectorName Vector name. + */ public static Map.Entry multi2vecClip(String vectorName) { return Map.entry(vectorName, Multi2VecClipVectorizer.of()); } + /** + * Create a named vector index with an {@code multi2vec-clip} vectorizer. + * + * @param vectorName Vector name. + * @param fn Lambda expression for optional parameters. + */ public static Map.Entry multi2vecClip(String vectorName, Function> fn) { return Map.entry(vectorName, Multi2VecClipVectorizer.of(fn)); } + /** Create a vector index with an {@code text2vec-contextionary} vectorizer. */ public static Map.Entry text2vecContextionary() { return text2vecContextionary(VectorIndex.DEFAULT_VECTOR_NAME); } + /** + * Create a vector index with an {@code text2vec-contextionary} vectorizer. + * + * @param fn Lambda expression for optional parameters. + */ public static Map.Entry text2vecContextionary( Function> fn) { return text2vecContextionary(VectorIndex.DEFAULT_VECTOR_NAME, fn); } + /** + * Create a named vector index with an {@code text2vec-contextionary} + * vectorizer. + * + * @param vectorName Vector name. + */ public static Map.Entry text2vecContextionary(String vectorName) { return Map.entry(vectorName, Text2VecContextionaryVectorizer.of()); } + /** + * Create a named vector index with an {@code text2vec-contextionary} + * vectorizer. + * + * @param vectorName Vector name. + * @param fn Lambda expression for optional parameters. + */ public static Map.Entry text2vecContextionary(String vectorName, Function> fn) { return Map.entry(vectorName, Text2VecContextionaryVectorizer.of(fn)); } + /** Create a vector index with an {@code text2vec-weaviate} vectorizer. */ public static Map.Entry text2VecWeaviate() { return text2VecWeaviate(VectorIndex.DEFAULT_VECTOR_NAME); } + /** + * Create a vector index with an {@code text2vec-weaviate} vectorizer. + * + * @param fn Lambda expression for optional parameters. + */ public static Map.Entry text2VecWeaviate( Function> fn) { return text2VecWeaviate(VectorIndex.DEFAULT_VECTOR_NAME, fn); } + /** + * Create a named vector index with an {@code text2vec-weaviate} vectorizer. + * + * @param vectorName Vector name. + */ public static Map.Entry text2VecWeaviate(String vectorName) { return Map.entry(vectorName, Text2VecWeaviateVectorizer.of()); } + /** + * Create a named vector index with an {@code text2vec-weaviate} vectorizer. + * + * @param vectorName Vector name. + * @param fn Lambda expression for optional parameters. + */ public static Map.Entry text2VecWeaviate(String vectorName, Function> fn) { return Map.entry(vectorName, Text2VecWeaviateVectorizer.of(fn)); diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/Vectors.java b/src/main/java/io/weaviate/client6/v1/api/collections/Vectors.java index 5c3a6a778..97551e6e4 100644 --- a/src/main/java/io/weaviate/client6/v1/api/collections/Vectors.java +++ b/src/main/java/io/weaviate/client6/v1/api/collections/Vectors.java @@ -4,7 +4,6 @@ import java.util.Collections; import java.util.HashMap; import java.util.Map; -import java.util.function.Function; import com.google.gson.Gson; import com.google.gson.JsonArray; @@ -28,30 +27,26 @@ public class Vectors { /** Elements of this map must only be {@code float[]} or {@code float[][]}. */ private final Map namedVectors; + /** Create a 1-dimensional vector. */ public static Vectors of(float[] vector) { return of(VectorIndex.DEFAULT_VECTOR_NAME, vector); } + /** Create a named 1-dimensional vector. */ public static Vectors of(String name, float[] vector) { return new Vectors(name, vector); } + /** Create a 2-dimensional vector. */ public static Vectors of(float[][] vector) { return of(VectorIndex.DEFAULT_VECTOR_NAME, vector); } + /** Create a named 2-dimensional vector. */ public static Vectors of(String name, float[][] vector) { return new Vectors(name, vector); } - public static Vectors of(Function> fn) { - return fn.apply(new Builder()).build(); - } - - public Vectors(Builder builder) { - this.namedVectors = builder.namedVectors; - } - /** * Create a single named vector. * @@ -80,23 +75,13 @@ private Vectors(Map namedVectors) { this.namedVectors = namedVectors; } - public static class Builder implements ObjectBuilder { - private final Map namedVectors = new HashMap<>(); - - public Builder vector(String name, float[] vector) { - this.namedVectors.put(name, vector); - return this; - } - - public Builder vector(String name, float[][] vector) { - this.namedVectors.put(name, vector); - return this; - } - - @Override - public Vectors build() { - return new Vectors(this); + /** Merge all vectors in a single vector map. */ + public Vectors(Vectors... vectors) { + var namedVectors = new HashMap(); + for (var vec : vectors) { + namedVectors.putAll(vec.asMap()); } + this.namedVectors = namedVectors; } /** diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/data/InsertObjectRequest.java b/src/main/java/io/weaviate/client6/v1/api/collections/data/InsertObjectRequest.java index 972be0183..fc57c2882 100644 --- a/src/main/java/io/weaviate/client6/v1/api/collections/data/InsertObjectRequest.java +++ b/src/main/java/io/weaviate/client6/v1/api/collections/data/InsertObjectRequest.java @@ -63,7 +63,7 @@ public Builder uuid(String uuid) { return this; } - public Builder vectors(Vectors vectors) { + public Builder vectors(Vectors... vectors) { this.metadata.vectors(vectors); return this; } diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/data/ReplaceObjectRequest.java b/src/main/java/io/weaviate/client6/v1/api/collections/data/ReplaceObjectRequest.java index e327c4999..0a786e1e2 100644 --- a/src/main/java/io/weaviate/client6/v1/api/collections/data/ReplaceObjectRequest.java +++ b/src/main/java/io/weaviate/client6/v1/api/collections/data/ReplaceObjectRequest.java @@ -54,7 +54,7 @@ public Builder properties(T properties) { return this; } - public Builder vectors(Vectors vectors) { + public Builder vectors(Vectors... vectors) { this.metadata.vectors(vectors); return this; } diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/data/UpdateObjectRequest.java b/src/main/java/io/weaviate/client6/v1/api/collections/data/UpdateObjectRequest.java index f50a76ad2..2d0a76d78 100644 --- a/src/main/java/io/weaviate/client6/v1/api/collections/data/UpdateObjectRequest.java +++ b/src/main/java/io/weaviate/client6/v1/api/collections/data/UpdateObjectRequest.java @@ -54,7 +54,7 @@ public Builder properties(T properties) { return this; } - public Builder vectors(Vectors vectors) { + public Builder vectors(Vectors... vectors) { this.metadata.vectors(vectors); return this; } diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/query/QueryRequest.java b/src/main/java/io/weaviate/client6/v1/api/collections/query/QueryRequest.java index 80918fd43..f38ef3b26 100644 --- a/src/main/java/io/weaviate/client6/v1/api/collections/query/QueryRequest.java +++ b/src/main/java/io/weaviate/client6/v1/api/collections/query/QueryRequest.java @@ -177,22 +177,23 @@ private static WeaviateObject unmarshalWithRefere var metadataBuilder = new ObjectMetadata.Builder() .uuid(metadataResult.getId()); - var vectors = new Vectors.Builder(); + var vectors = new Vectors[metadataResult.getVectorsList().size()]; + var i = 0; for (final var vector : metadataResult.getVectorsList()) { var vectorName = vector.getName(); var vbytes = vector.getVectorBytes(); switch (vector.getType()) { case VECTOR_TYPE_SINGLE_FP32: - vectors.vector(vectorName, ByteStringUtil.decodeVectorSingle(vbytes)); + vectors[i++] = Vectors.of(vectorName, ByteStringUtil.decodeVectorSingle(vbytes)); break; case VECTOR_TYPE_MULTI_FP32: - vectors.vector(vectorName, ByteStringUtil.decodeVectorMulti(vbytes)); + vectors[i++] = Vectors.of(vectorName, ByteStringUtil.decodeVectorMulti(vbytes)); break; default: continue; } } - metadataBuilder.vectors(vectors.build()); + metadataBuilder.vectors(vectors); metadata = metadataBuilder.build(); } diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/vectorizers/Img2VecNeuralVectorizer.java b/src/main/java/io/weaviate/client6/v1/api/collections/vectorizers/Img2VecNeuralVectorizer.java index 7f5a28a8e..79cf57867 100644 --- a/src/main/java/io/weaviate/client6/v1/api/collections/vectorizers/Img2VecNeuralVectorizer.java +++ b/src/main/java/io/weaviate/client6/v1/api/collections/vectorizers/Img2VecNeuralVectorizer.java @@ -12,7 +12,9 @@ import io.weaviate.client6.v1.internal.ObjectBuilder; public record Img2VecNeuralVectorizer( + /** BLOB properties included in the embedding. */ @SerializedName("imageFields") List imageFields, + /** Vector index configuration. */ VectorIndex vectorIndex) implements Vectorizer { @Override @@ -41,15 +43,24 @@ public static class Builder implements ObjectBuilder { private VectorIndex vectorIndex = VectorIndex.DEFAULT_VECTOR_INDEX; private List imageFields = new ArrayList<>(); + /** Add BLOB properties to include in the embedding. */ public Builder imageFields(List fields) { this.imageFields = fields; return this; } + /** Add BLOB properties to include in the embedding. */ public Builder imageFields(String... fields) { return imageFields(Arrays.asList(fields)); } + /** + * Override default vector index configuration. + * + * HNSW + * is the default vector index. + */ public Builder vectorIndex(VectorIndex vectorIndex) { this.vectorIndex = vectorIndex; return this; diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/vectorizers/Multi2VecClipVectorizer.java b/src/main/java/io/weaviate/client6/v1/api/collections/vectorizers/Multi2VecClipVectorizer.java index 60fc5b87b..6b7fb57a3 100644 --- a/src/main/java/io/weaviate/client6/v1/api/collections/vectorizers/Multi2VecClipVectorizer.java +++ b/src/main/java/io/weaviate/client6/v1/api/collections/vectorizers/Multi2VecClipVectorizer.java @@ -13,14 +13,27 @@ import io.weaviate.client6.v1.internal.ObjectBuilder; public record Multi2VecClipVectorizer( + /** Base URL of the embedding service. */ @SerializedName("inferenceUrl") String inferenceUrl, + /** BLOB properties included in the embedding. */ @SerializedName("imageFields") List imageFields, + /** TEXT properties included in the embedding. */ @SerializedName("textFields") List textFields, + /** Weights of the included properties. */ @SerializedName("weights") Weights weights, + /** Vector index configuration. */ VectorIndex vectorIndex) implements Vectorizer { private static record Weights( + /** + * Weights of the BLOB properties. Values appear in the same order as the + * corresponding property names in {@code imageFields}. + */ @SerializedName("imageWeights") List imageWeights, + /** + * Weights of the TEXT properties. Values appear in the same order as the + * corresponding property names in {@code textFields}. + */ @SerializedName("textWeights") List textWeights) { } @@ -59,39 +72,63 @@ public static class Builder implements ObjectBuilder { private Map imageFields = new HashMap<>(); private Map textFields = new HashMap<>(); + /** Set base URL of the embedding service. */ public Builder inferenceUrl(String inferenceUrl) { this.inferenceUrl = inferenceUrl; return this; } + /** Add BLOB properties to include in the embedding. */ public Builder imageFields(List fields) { fields.forEach(field -> imageFields.put(field, null)); return this; } + /** Add BLOB properties to include in the embedding. */ public Builder imageFields(String... fields) { return imageFields(Arrays.asList(fields)); } + /** + * Add BLOB property to include in the embedding. + * + * @param field Property name. + * @param weight Custom weight between 0.0 and 1.0. + */ public Builder imageField(String field, float weight) { imageFields.put(field, weight); return this; } + /** Add TEXT properties to include in the embedding. */ public Builder textFields(List fields) { fields.forEach(field -> textFields.put(field, null)); return this; } + /** Add TEXT properties to include in the embedding. */ public Builder textFields(String... fields) { return textFields(Arrays.asList(fields)); } + /** + * Add TEXT property to include in the embedding. + * + * @param field Property name. + * @param weight Custom weight between 0.0 and 1.0. + */ public Builder textField(String field, float weight) { textFields.put(field, weight); return this; } + /** + * Override default vector index configuration. + * + * HNSW + * is the default vector index. + */ public Builder vectorIndex(VectorIndex vectorIndex) { this.vectorIndex = vectorIndex; return this; diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/vectorizers/NoneVectorizer.java b/src/main/java/io/weaviate/client6/v1/api/collections/vectorizers/SelfProvidedVectorizer.java similarity index 63% rename from src/main/java/io/weaviate/client6/v1/api/collections/vectorizers/NoneVectorizer.java rename to src/main/java/io/weaviate/client6/v1/api/collections/vectorizers/SelfProvidedVectorizer.java index c75f1c8dd..146557227 100644 --- a/src/main/java/io/weaviate/client6/v1/api/collections/vectorizers/NoneVectorizer.java +++ b/src/main/java/io/weaviate/client6/v1/api/collections/vectorizers/SelfProvidedVectorizer.java @@ -7,7 +7,7 @@ import io.weaviate.client6.v1.api.collections.vectorindex.Hnsw; import io.weaviate.client6.v1.internal.ObjectBuilder; -public record NoneVectorizer(VectorIndex vectorIndex) implements Vectorizer { +public record SelfProvidedVectorizer(VectorIndex vectorIndex) implements Vectorizer { @Override public Kind _kind() { return Vectorizer.Kind.NONE; @@ -18,19 +18,19 @@ public Object _self() { return this; } - public static NoneVectorizer of() { + public static SelfProvidedVectorizer of() { return of(ObjectBuilder.identity()); } - public static NoneVectorizer of(Function> fn) { + public static SelfProvidedVectorizer of(Function> fn) { return fn.apply(new Builder()).build(); } - public NoneVectorizer(Builder builder) { + public SelfProvidedVectorizer(Builder builder) { this(builder.vectorIndex); } - public static class Builder implements ObjectBuilder { + public static class Builder implements ObjectBuilder { private VectorIndex vectorIndex = Hnsw.of(); public Builder vectorIndex(VectorIndex vectorIndex) { @@ -39,8 +39,8 @@ public Builder vectorIndex(VectorIndex vectorIndex) { } @Override - public NoneVectorizer build() { - return new NoneVectorizer(this); + public SelfProvidedVectorizer build() { + return new SelfProvidedVectorizer(this); } } } diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/vectorizers/Text2VecContextionaryVectorizer.java b/src/main/java/io/weaviate/client6/v1/api/collections/vectorizers/Text2VecContextionaryVectorizer.java index aa53dc085..ad7360ea9 100644 --- a/src/main/java/io/weaviate/client6/v1/api/collections/vectorizers/Text2VecContextionaryVectorizer.java +++ b/src/main/java/io/weaviate/client6/v1/api/collections/vectorizers/Text2VecContextionaryVectorizer.java @@ -1,5 +1,9 @@ package io.weaviate.client6.v1.api.collections.vectorizers; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; import java.util.function.Function; import com.google.gson.annotations.SerializedName; @@ -11,13 +15,16 @@ public record Text2VecContextionaryVectorizer( /** * Weaviate defaults to {@code true} if the value is not provided. - * Because text2vec-contextionary cannot handle understores in collection names, + * Because text2vec-contextionary cannot handle underscores in collection names, * this quickly becomes inconvenient. * * To avoid that we send "vectorizeClassName": false all the time * and make it impossible to enable this feature, as it is deprecated. */ @Deprecated @SerializedName("vectorizeClassName") boolean vectorizeCollectionName, + /** Properties included in the embedding. */ + @SerializedName("sourceProperties") List sourceProperties, + /** Vector index configuration. */ VectorIndex vectorIndex) implements Vectorizer { @Override @@ -42,20 +49,41 @@ public static Text2VecContextionaryVectorizer of( /** * Canonical constructor always sets {@link #vectorizeCollectionName} to false. */ - public Text2VecContextionaryVectorizer(boolean vectorizeCollectionName, VectorIndex vectorIndex) { + public Text2VecContextionaryVectorizer(boolean vectorizeCollectionName, List sourceProperties, + VectorIndex vectorIndex) { this.vectorizeCollectionName = false; this.vectorIndex = vectorIndex; + this.sourceProperties = Collections.emptyList(); } public Text2VecContextionaryVectorizer(Builder builder) { - this(builder.vectorizeCollectionName, builder.vectorIndex); + this(builder.vectorizeCollectionName, builder.sourceProperties, builder.vectorIndex); } public static class Builder implements ObjectBuilder { private final boolean vectorizeCollectionName = false; + private List sourceProperties = new ArrayList<>(); private VectorIndex vectorIndex = VectorIndex.DEFAULT_VECTOR_INDEX; + /** Add properties to include in the embedding. */ + public Builder sourceProperties(String... properties) { + return sourceProperties(Arrays.asList(properties)); + } + + /** Add properties to include in the embedding. */ + public Builder sourceProperties(List properties) { + this.sourceProperties.addAll(properties); + return this; + } + + /** + * Override default vector index configuration. + * + * HNSW + * is the default vector index. + */ public Builder vectorIndex(VectorIndex vectorIndex) { this.vectorIndex = vectorIndex; return this; diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/vectorizers/Text2VecWeaviateVectorizer.java b/src/main/java/io/weaviate/client6/v1/api/collections/vectorizers/Text2VecWeaviateVectorizer.java index a8c9e7bd3..00ff9db7d 100644 --- a/src/main/java/io/weaviate/client6/v1/api/collections/vectorizers/Text2VecWeaviateVectorizer.java +++ b/src/main/java/io/weaviate/client6/v1/api/collections/vectorizers/Text2VecWeaviateVectorizer.java @@ -1,5 +1,8 @@ package io.weaviate.client6.v1.api.collections.vectorizers; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; import java.util.function.Function; import com.google.gson.annotations.SerializedName; @@ -9,9 +12,15 @@ import io.weaviate.client6.v1.internal.ObjectBuilder; public record Text2VecWeaviateVectorizer( + /** Weaviate Embeddings Service base URL. */ @SerializedName("baseUrl") String inferenceUrl, + /** Dimensionality of the generated vectors. */ @SerializedName("dimensions") Integer dimensions, + /** Embedding model. */ @SerializedName("model") String model, + /** Properties included in the embedding. */ + @SerializedName("sourceProperties") List sourceProperties, + /** Vector index configuration. */ VectorIndex vectorIndex) implements Vectorizer { @Override @@ -37,33 +46,65 @@ public Text2VecWeaviateVectorizer(Builder builder) { builder.inferenceUrl, builder.dimensions, builder.model, + builder.sourceProperties, builder.vectorIndex); } - public static final String SNOWFLAKE_ARCTIC_EMBED_L_20 = "Snowflake/snowflake-arctic-embed-l-v2.0"; public static final String SNOWFLAKE_ARCTIC_EMBED_M_15 = "Snowflake/snowflake-arctic-embed-m-v1.5"; + public static final String SNOWFLAKE_ARCTIC_EMBED_L_20 = "Snowflake/snowflake-arctic-embed-l-v2.0"; public static class Builder implements ObjectBuilder { private VectorIndex vectorIndex = VectorIndex.DEFAULT_VECTOR_INDEX; private String inferenceUrl; private Integer dimensions; private String model; + private List sourceProperties = new ArrayList<>(); + /** + * Base URL for Weaviate Embeddings Service. This can be omitted when connecting + * to a Weaviate Cloud instance: the client will automatically set the necessary + * headers. + */ public Builder inferenceUrl(String inferenceUrl) { this.inferenceUrl = inferenceUrl; return this; } + /** Set target dimensionality for generated embeddings. */ public Builder dimensions(int dimensions) { this.dimensions = dimensions; return this; } + /** + * Select the embedding model. + * + * @see Text2VecWeaviateVectorizer#SNOWFLAKE_ARCTIC_EMBED_M_15 + * @see Text2VecWeaviateVectorizer#SNOWFLAKE_ARCTIC_EMBED_L_20 + */ public Builder model(String model) { this.model = model; return this; } + /** Add properties to include in the embedding. */ + public Builder sourceProperties(String... properties) { + return sourceProperties(Arrays.asList(properties)); + } + + /** Add properties to include in the embedding. */ + public Builder sourceProperties(List properties) { + this.sourceProperties.addAll(properties); + return this; + } + + /** + * Override default vector index configuration. + * + * HNSW + * is the default vector index. + */ public Builder vectorIndex(VectorIndex vectorIndex) { this.vectorIndex = vectorIndex; return this; diff --git a/src/test/java/io/weaviate/client6/v1/internal/json/JSONTest.java b/src/test/java/io/weaviate/client6/v1/internal/json/JSONTest.java index 45607c4f4..1491f4828 100644 --- a/src/test/java/io/weaviate/client6/v1/internal/json/JSONTest.java +++ b/src/test/java/io/weaviate/client6/v1/internal/json/JSONTest.java @@ -32,7 +32,7 @@ import io.weaviate.client6.v1.api.collections.vectorindex.Hnsw; import io.weaviate.client6.v1.api.collections.vectorizers.Img2VecNeuralVectorizer; import io.weaviate.client6.v1.api.collections.vectorizers.Multi2VecClipVectorizer; -import io.weaviate.client6.v1.api.collections.vectorizers.NoneVectorizer; +import io.weaviate.client6.v1.api.collections.vectorizers.SelfProvidedVectorizer; import io.weaviate.client6.v1.api.collections.vectorizers.Text2VecContextionaryVectorizer; import io.weaviate.client6.v1.api.collections.vectorizers.Text2VecWeaviateVectorizer; @@ -44,7 +44,7 @@ public static Object[][] testCases() { // Vectorizer.CustomTypeAdapterFactory { Vectorizer.class, - NoneVectorizer.of(), + SelfProvidedVectorizer.of(), """ { "vectorIndexType": "hnsw", @@ -101,7 +101,8 @@ public static Object[][] testCases() { "vectorIndexConfig": {}, "vectorizer": { "text2vec-contextionary": { - "vectorizeClassName": false + "vectorizeClassName": false, + "sourceProperties": [] } } } @@ -121,7 +122,9 @@ public static Object[][] testCases() { "text2vec-weaviate": { "baseUrl": "http://example.com", "dimensions": 4, - "model": "very-good-model" + "model": "very-good-model", + "sourceProperties": [] + } } } @@ -131,7 +134,7 @@ public static Object[][] testCases() { // VectorIndex.CustomTypeAdapterFactory { Vectorizer.class, - NoneVectorizer.of(none -> none + SelfProvidedVectorizer.of(none -> none .vectorIndex(Flat.of(flat -> flat .vectorCacheMaxObjects(100)))), """ @@ -144,7 +147,7 @@ public static Object[][] testCases() { }, { Vectorizer.class, - NoneVectorizer.of(none -> none + SelfProvidedVectorizer.of(none -> none .vectorIndex(Hnsw.of(hnsw -> hnsw .distance(Distance.DOT) .ef(1) @@ -207,9 +210,9 @@ public static Object[][] testCases() { }, { Vectors.class, - Vectors.of(named -> named - .vector("1d", new float[] { 1f, 2f }) - .vector("2d", new float[][] { { 1f, 2f }, { 3f, 4f } })), + new Vectors( + Vectors.of("1d", new float[] { 1f, 2f }), + Vectors.of("2d", new float[][] { { 1f, 2f }, { 3f, 4f } })), "{\"1d\": [1.0, 2.0], \"2d\": [[1.0, 2.0], [3.0, 4.0]]}", (CustomAssert) JSONTest::compareVectors, },