From b8766f1a246e85847797986de8f387b441c15652 Mon Sep 17 00:00:00 2001 From: dyma solovei Date: Fri, 12 Sep 2025 14:50:28 +0200 Subject: [PATCH 1/4] feat: support per-property tokenization config --- .../client6/v1/api/collections/Property.java | 9 ++++++++ .../v1/api/collections/Tokenization.java | 22 +++++++++++++++++++ .../client6/v1/internal/json/JSONTest.java | 3 +++ 3 files changed, 34 insertions(+) create mode 100644 src/main/java/io/weaviate/client6/v1/api/collections/Tokenization.java diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/Property.java b/src/main/java/io/weaviate/client6/v1/api/collections/Property.java index 13cde953..848a575f 100644 --- a/src/main/java/io/weaviate/client6/v1/api/collections/Property.java +++ b/src/main/java/io/weaviate/client6/v1/api/collections/Property.java @@ -16,6 +16,7 @@ public record Property( @SerializedName("indexFilterable") Boolean indexFilterable, @SerializedName("indexRangeFilters") Boolean indexRangeFilters, @SerializedName("indexSearchable") Boolean indexSearchable, + @SerializedName("tokenization") Tokenization tokenization, @SerializedName("skipVectorization") Boolean skipVectorization, @SerializedName("vectorizePropertyName") Boolean vectorizePropertyName) { @@ -142,6 +143,7 @@ public Builder edit() { .indexFilterable(indexFilterable) .indexRangeFilters(indexRangeFilters) .indexSearchable(indexSearchable) + .tokenization(tokenization) .skipVectorization(skipVectorization) .vectorizePropertyName(vectorizePropertyName); } @@ -159,6 +161,7 @@ public Property(Builder builder) { builder.indexFilterable, builder.indexRangeFilters, builder.indexSearchable, + builder.tokenization, builder.skipVectorization, builder.vectorizePropertyName); } @@ -174,6 +177,7 @@ public static class Builder implements ObjectBuilder { private Boolean indexFilterable; private Boolean indexRangeFilters; private Boolean indexSearchable; + private Tokenization tokenization; private Boolean skipVectorization; private Boolean vectorizePropertyName; @@ -221,6 +225,11 @@ public Builder indexSearchable(Boolean indexSearchable) { return this; } + public Builder tokenization(Tokenization tokenization) { + this.tokenization = tokenization; + return this; + } + public Builder skipVectorization(Boolean skipVectorization) { this.skipVectorization = skipVectorization; return this; diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/Tokenization.java b/src/main/java/io/weaviate/client6/v1/api/collections/Tokenization.java new file mode 100644 index 00000000..185c2efa --- /dev/null +++ b/src/main/java/io/weaviate/client6/v1/api/collections/Tokenization.java @@ -0,0 +1,22 @@ +package io.weaviate.client6.v1.api.collections; + +import com.google.gson.annotations.SerializedName; + +public enum Tokenization { + @SerializedName("word") + WORD, + @SerializedName("whitespace") + WHITESPACE, + @SerializedName("lowercase") + LOWERCASE, + @SerializedName("field") + FIELD, + @SerializedName("gse") + GSE, + @SerializedName("trigram") + TRIGRAM, + @SerializedName("kagome_ja") + KAGOME_JA, + @SerializedName("kagome_kr") + KAGOME_KR; +} diff --git a/src/test/java/io/weaviate/client6/v1/internal/json/JSONTest.java b/src/test/java/io/weaviate/client6/v1/internal/json/JSONTest.java index 1491f482..e045ad16 100644 --- a/src/test/java/io/weaviate/client6/v1/internal/json/JSONTest.java +++ b/src/test/java/io/weaviate/client6/v1/internal/json/JSONTest.java @@ -19,6 +19,7 @@ import io.weaviate.client6.v1.api.collections.ObjectMetadata; import io.weaviate.client6.v1.api.collections.Property; import io.weaviate.client6.v1.api.collections.Reranker; +import io.weaviate.client6.v1.api.collections.Tokenization; import io.weaviate.client6.v1.api.collections.Vectorizer; import io.weaviate.client6.v1.api.collections.Vectorizers; import io.weaviate.client6.v1.api.collections.Vectors; @@ -224,6 +225,7 @@ public static Object[][] testCases() { .description("A collection of things") .properties( Property.text("shape"), + Property.text("custom_id", p -> p.tokenization(Tokenization.WORD)), Property.integer("size")) .references( Property.reference("owner", "Person", "Company")) @@ -237,6 +239,7 @@ public static Object[][] testCases() { "properties": [ {"name": "shape", "dataType": ["text"]}, {"name": "size", "dataType": ["int"]}, + {"name": "custom_id", "dataType": ["text"], tokenization: "word"}, {"name": "owner", "dataType": ["Person", "Company"]} ], "vectorConfig": { From e709973e6b221aa5a2c6107230275e831e0e64e1 Mon Sep 17 00:00:00 2001 From: dyma solovei Date: Fri, 12 Sep 2025 15:28:43 +0200 Subject: [PATCH 2/4] chore: write javadoc --- .../client6/v1/api/collections/Property.java | 249 ++++++++++++++++-- .../v1/api/collections/Tokenization.java | 1 + 2 files changed, 235 insertions(+), 15 deletions(-) diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/Property.java b/src/main/java/io/weaviate/client6/v1/api/collections/Property.java index 848a575f..e87079ff 100644 --- a/src/main/java/io/weaviate/client6/v1/api/collections/Property.java +++ b/src/main/java/io/weaviate/client6/v1/api/collections/Property.java @@ -20,106 +20,250 @@ public record Property( @SerializedName("skipVectorization") Boolean skipVectorization, @SerializedName("vectorizePropertyName") Boolean vectorizePropertyName) { + /** + * Create a {@code text} property. + * + * @param name Property name. + */ public static Property text(String name) { return text(name, ObjectBuilder.identity()); } + /** + * Create a {@code text} property with additional configuration. + * + * @param name Property name. + * @param fn Lambda expression for optional parameters. + */ public static Property text(String name, Function> fn) { return newProperty(name, DataType.TEXT, fn); } + /** + * Create a {@code text} property. + * + * @param name Property name. + */ public static Property textArray(String name) { return textArray(name, ObjectBuilder.identity()); } + /** + * Create a {@code text[]} property with additional configuration. + * + * @param name Property name. + * @param fn Lambda expression for optional parameters. + */ public static Property textArray(String name, Function> fn) { return newProperty(name, DataType.TEXT_ARRAY, fn); } + /** + * Create a {@code int} property. + * + * @param name Property name. + */ public static Property integer(String name) { return integer(name, ObjectBuilder.identity()); } + /** + * Create a {@code int} property with additional configuration. + * + * @param name Property name. + * @param fn Lambda expression for optional parameters. + */ public static Property integer(String name, Function> fn) { return newProperty(name, DataType.INT, fn); } + /** + * Create a {@code int[]} property. + * + * @param name Property name. + */ public static Property integerArray(String name) { return integerArray(name, ObjectBuilder.identity()); } + /** + * Create a {@code int[]} property with additional configuration. + * + * @param name Property name. + * @param fn Lambda expression for optional parameters. + */ public static Property integerArray(String name, Function> fn) { return newProperty(name, DataType.INT_ARRAY, fn); } + /** + * Create a {@code bool} property. + * + * @param name Property name. + */ public static Property blob(String name) { return blob(name, ObjectBuilder.identity()); } + /** + * Create a {@code blob} property with additional configuration. + * + * @param name Property name. + * @param fn Lambda expression for optional parameters. + */ public static Property blob(String name, Function> fn) { return newProperty(name, DataType.BLOB, fn); } + /** + * Create a {@code bool} property. + * + * @param name Property name. + */ public static Property bool(String name) { return bool(name, ObjectBuilder.identity()); } + /** + * Create a {@code bool} property with additional configuration. + * + * @param name Property name. + * @param fn Lambda expression for optional parameters. + */ + public static Property bool(String name, Function> fn) { return newProperty(name, DataType.BOOL, fn); } + /** + * Create a {@code bool[]} property. + * + * @param name Property name. + */ public static Property boolArray(String name) { return boolArray(name, ObjectBuilder.identity()); } + /** + * Create a {@code bool[]} property with additional configuration. + * + * @param name Property name. + * @param fn Lambda expression for optional parameters. + */ public static Property boolArray(String name, Function> fn) { return newProperty(name, DataType.BOOL_ARRAY, fn); } + /** + * Create a {@code date} property. + * + * @param name Property name. + */ public static Property date(String name) { return date(name, ObjectBuilder.identity()); } + /** + * Create a {@code date} property with additional configuration. + * + * @param name Property name. + * @param fn Lambda expression for optional parameters. + */ public static Property date(String name, Function> fn) { return newProperty(name, DataType.DATE, fn); } + /** + * Create a {@code date[]} property. + * + * @param name Property name. + */ public static Property dateArray(String name) { return dateArray(name, ObjectBuilder.identity()); } + /** + * Create a {@code date[]} property with additional configuration. + * + * @param name Property name. + * @param fn Lambda expression for optional parameters. + */ public static Property dateArray(String name, Function> fn) { return newProperty(name, DataType.DATE_ARRAY, fn); } + /** + * Create a {@code uuid} property. + * + * @param name Property name. + */ public static Property uuid(String name) { return uuid(name, ObjectBuilder.identity()); } + /** + * Create a {@code uuid} property with additional configuration. + * + * @param name Property name. + * @param fn Lambda expression for optional parameters. + */ public static Property uuid(String name, Function> fn) { return newProperty(name, DataType.UUID, fn); } + /** + * Create a {@code uuid[]} property. + * + * @param name Property name. + */ public static Property uuidArray(String name) { return uuidArray(name, ObjectBuilder.identity()); } + /** + * Create a {@code uuid[]} property with additional configuration. + * + * @param name Property name. + * @param fn Lambda expression for optional parameters. + */ public static Property uuidArray(String name, Function> fn) { return newProperty(name, DataType.UUID_ARRAY, fn); } + /** + * Create a {@code number} property. + * + * @param name Property name. + */ public static Property number(String name) { return number(name, ObjectBuilder.identity()); } + /** + * Create a {@code number} property with additional configuration. + * + * @param name Property name. + * @param fn Lambda expression for optional parameters. + */ public static Property number(String name, Function> fn) { return newProperty(name, DataType.NUMBER, fn); } + /** + * Create a {@code number[]} property. + * + * @param name Property name. + */ public static Property numberArray(String name) { return numberArray(name, ObjectBuilder.identity()); } + /** + * Create a {@code number[]} property with additional configuration. + * + * @param name Property name. + * @param fn Lambda expression for optional parameters. + */ public static Property numberArray(String name, Function> fn) { return newProperty(name, DataType.NUMBER_ARRAY, fn); } @@ -136,6 +280,25 @@ public static ReferenceProperty reference(String name, List collections) return new ReferenceProperty(name, collections); } + /** + * Create a new "edit" builder from the property configuration. Consult the documentation + * to see which configuration is mutable before updating it. + * + * Example: Update property description. + * + *
+   * {@code
+   *  Property updated = propertyHeight.edit()
+   *    .description("How tall this building is.")
+   *    .build();
+   * }
+   *
+   * 
+   *
+   * @see #edit(Function)
+   */
   public Builder edit() {
     return new Builder(propertyName, dataTypes)
         .description(description)
@@ -148,6 +311,25 @@ public Builder edit() {
         .vectorizePropertyName(vectorizePropertyName);
   }
 
+  /**
+   * Pass a lambda expression to update property configuration. Consult the documentation
+   * to see which configuration is mutable before updating it.
+   *
+   * Example: Update property description.
+   *
+   * 
+   * {@code
+   *  Property updated = propertyHeight.edit(
+   *    p -> p.description("How tall this building is.")
+   *  );
+   * }
+   *
+   * 
+   *
+   * @see #edit()
+   */
   public Property edit(Function> fn) {
     return fn.apply(edit()).build();
   }
@@ -181,61 +363,98 @@ public static class Builder implements ObjectBuilder {
     private Boolean skipVectorization;
     private Boolean vectorizePropertyName;
 
+    /**
+     * Create a scalar / array type property.
+     *
+     * @param dataType Property data type, see {@link DataType}.
+     */
     public Builder(String propertyName, String dataType) {
       this.propertyName = propertyName;
       this.dataTypes = List.of(dataType);
     }
 
-    public Builder(String propertyName, String... dataTypes) {
-      this(propertyName, Arrays.asList(dataTypes));
-    }
-
+    /**
+     * Create a cross-reference property.
+     *
+     * @param dataTypes List of collection names this property can reference.
+     */
     public Builder(String propertyName, List dataTypes) {
       this.propertyName = propertyName;
       this.dataTypes = List.copyOf(dataTypes);
     }
 
-    public Builder dataTypes(List dataTypes) {
-      this.dataTypes = dataTypes;
-      return this;
-    }
-
+    /** Add property description. */
     public Builder description(String description) {
       this.description = description;
       return this;
     }
 
-    public Builder indexInverted(Boolean indexInverted) {
+    public Builder indexInverted(boolean indexInverted) {
       this.indexInverted = indexInverted;
       return this;
     }
 
-    public Builder indexFilterable(Boolean indexFilterable) {
+    /**
+     * Set to true to create a filtering index for this property.
+     *
+     * 

+ * Filterable indices are not applicable to {@code blob}, {@code object}, + * {@code geoCoordinates}, and {@code phoneNumber} properties or arrays thereof. + * + * @see https://docs.weaviate.io/weaviate/concepts/indexing/inverted-index#configure-inverted-indexes + */ + public Builder indexFilterable(boolean indexFilterable) { this.indexFilterable = indexFilterable; return this; } - public Builder indexRangeFilters(Boolean indexRangeFilters) { + /** + * Set to true to create a range-based filter for filtering + * by numerical ranges for this property. + * + *

+ * Applicable to {code int}, {@code number}, and {@code date} properties. + * + * @see https://docs.weaviate.io/weaviate/concepts/indexing/inverted-index#configure-inverted-indexes + */ + public Builder indexRangeFilters(boolean indexRangeFilters) { this.indexRangeFilters = indexRangeFilters; return this; } - public Builder indexSearchable(Boolean indexSearchable) { + /** + * Set to true to create a searchable index for this property. + * + *

+ * This index type enables BM25/hybrid search and is only applicable to + * {@code text}/{@code text[]} fields. For those it is also created + * by default; you should set {@code indexInverted(false)} if you + * do not plan to run BM25/hybrid queries on this property. + * + * @see https://docs.weaviate.io/weaviate/concepts/indexing/inverted-index#configure-inverted-indexes + */ + public Builder indexSearchable(boolean indexSearchable) { this.indexSearchable = indexSearchable; return this; } + /** + * Change tokenization method for this property. + * + * @see https://docs.weaviate.io/academy/py/tokenization/options + */ public Builder tokenization(Tokenization tokenization) { this.tokenization = tokenization; return this; } - public Builder skipVectorization(Boolean skipVectorization) { + public Builder skipVectorization(boolean skipVectorization) { this.skipVectorization = skipVectorization; return this; } - public Builder vectorizePropertyName(Boolean vectorizePropertyName) { + /** Include property name into the input for the vectorizer module. */ + public Builder vectorizePropertyName(boolean vectorizePropertyName) { this.vectorizePropertyName = vectorizePropertyName; return this; } diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/Tokenization.java b/src/main/java/io/weaviate/client6/v1/api/collections/Tokenization.java index 185c2efa..5ae79057 100644 --- a/src/main/java/io/weaviate/client6/v1/api/collections/Tokenization.java +++ b/src/main/java/io/weaviate/client6/v1/api/collections/Tokenization.java @@ -2,6 +2,7 @@ import com.google.gson.annotations.SerializedName; +/** Tokenization methods available within Weaviate. */ public enum Tokenization { @SerializedName("word") WORD, From dc9e377796e940e544aba6c2920f79d932537219 Mon Sep 17 00:00:00 2001 From: dyma solovei Date: Fri, 12 Sep 2025 15:35:45 +0200 Subject: [PATCH 3/4] chore: close

 tag in javadoc and insert links via
 

---
 .../client6/v1/api/collections/Property.java  | 40 +++++++++----------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/Property.java b/src/main/java/io/weaviate/client6/v1/api/collections/Property.java
index e87079ff..bb78b901 100644
--- a/src/main/java/io/weaviate/client6/v1/api/collections/Property.java
+++ b/src/main/java/io/weaviate/client6/v1/api/collections/Property.java
@@ -288,14 +288,11 @@ public static ReferenceProperty reference(String name, List collections)
    *
    * Example: Update property description.
    *
-   * 
-   * {@code
-   *  Property updated = propertyHeight.edit()
-   *    .description("How tall this building is.")
-   *    .build();
-   * }
-   *
-   * 
+   * 
{@code
+   * Property updated = propertyHeight.edit()
+   *     .description("How tall this building is.")
+   *     .build();
+   * }
* * @see #edit(Function) */ @@ -319,14 +316,10 @@ public Builder edit() { * * Example: Update property description. * - *
-   * {@code
-   *  Property updated = propertyHeight.edit(
-   *    p -> p.description("How tall this building is.")
-   *  );
-   * }
-   *
-   * 
+   * 
{@code
+   * Property updated = propertyHeight.edit(
+   *     p -> p.description("How tall this building is."));
+   * }
* * @see #edit() */ @@ -401,7 +394,9 @@ public Builder indexInverted(boolean indexInverted) { * Filterable indices are not applicable to {@code blob}, {@code object}, * {@code geoCoordinates}, and {@code phoneNumber} properties or arrays thereof. * - * @see https://docs.weaviate.io/weaviate/concepts/indexing/inverted-index#configure-inverted-indexes + * @see
Inverted + * Indexes */ public Builder indexFilterable(boolean indexFilterable) { this.indexFilterable = indexFilterable; @@ -415,7 +410,9 @@ public Builder indexFilterable(boolean indexFilterable) { *

* Applicable to {code int}, {@code number}, and {@code date} properties. * - * @see https://docs.weaviate.io/weaviate/concepts/indexing/inverted-index#configure-inverted-indexes + * @see Inverted + * Indexes */ public Builder indexRangeFilters(boolean indexRangeFilters) { this.indexRangeFilters = indexRangeFilters; @@ -431,7 +428,9 @@ public Builder indexRangeFilters(boolean indexRangeFilters) { * by default; you should set {@code indexInverted(false)} if you * do not plan to run BM25/hybrid queries on this property. * - * @see https://docs.weaviate.io/weaviate/concepts/indexing/inverted-index#configure-inverted-indexes + * @see Inverted + * Indexes */ public Builder indexSearchable(boolean indexSearchable) { this.indexSearchable = indexSearchable; @@ -441,7 +440,8 @@ public Builder indexSearchable(boolean indexSearchable) { /** * Change tokenization method for this property. * - * @see https://docs.weaviate.io/academy/py/tokenization/options + * @see Tokenization */ public Builder tokenization(Tokenization tokenization) { this.tokenization = tokenization; From dd968b4ef58c77dec9020153c586632d011ec6d4 Mon Sep 17 00:00:00 2001 From: dyma solovei Date: Fri, 12 Sep 2025 17:34:18 +0200 Subject: [PATCH 4/4] fix: avoid accidentally updating immutable property settings --- .../client6/v1/api/collections/Property.java | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/Property.java b/src/main/java/io/weaviate/client6/v1/api/collections/Property.java index bb78b901..fb597650 100644 --- a/src/main/java/io/weaviate/client6/v1/api/collections/Property.java +++ b/src/main/java/io/weaviate/client6/v1/api/collections/Property.java @@ -341,6 +341,17 @@ public Property(Builder builder) { builder.vectorizePropertyName); } + // All methods accepting a `boolean` should have a boxed overload + // to be used by Property::edit. + // + // There we can't just do: + // .indexInverted(indexInverted == null ? false : indexInverted) + // because that may change the value from `null` to Boolean.FALSE, + // effectively updating this setting. In the context of PUT /schema/{collection} + // call this becomes a problem because we're not allowed to update anything + // except for the description. + // + // The alternative (wrapping each call in an if-block) seems too verbose. public static class Builder implements ObjectBuilder { // Required parameters. private final String propertyName; @@ -387,6 +398,12 @@ public Builder indexInverted(boolean indexInverted) { return this; } + /** Convenience method to be used by {@link Property#edit}. */ + Builder indexInverted(Boolean indexInverted) { + this.indexInverted = indexInverted; + return this; + } + /** * Set to true to create a filtering index for this property. * @@ -403,6 +420,12 @@ public Builder indexFilterable(boolean indexFilterable) { return this; } + /** Convenience method to be used by {@link Property#edit}. */ + Builder indexFilterable(Boolean indexFilterable) { + this.indexFilterable = indexFilterable; + return this; + } + /** * Set to true to create a range-based filter for filtering * by numerical ranges for this property. @@ -419,6 +442,12 @@ public Builder indexRangeFilters(boolean indexRangeFilters) { return this; } + /** Convenience method to be used by {@link Property#edit}. */ + Builder indexRangeFilters(Boolean indexRangeFilters) { + this.indexRangeFilters = indexRangeFilters; + return this; + } + /** * Set to true to create a searchable index for this property. * @@ -437,6 +466,12 @@ public Builder indexSearchable(boolean indexSearchable) { return this; } + /** Convenience method to be used by {@link Property#edit}. */ + Builder indexSearchable(Boolean indexSearchable) { + this.indexSearchable = indexSearchable; + return this; + } + /** * Change tokenization method for this property. * @@ -453,12 +488,24 @@ public Builder skipVectorization(boolean skipVectorization) { return this; } + /** Convenience method to be used by {@link Property#edit}. */ + Builder skipVectorization(Boolean skipVectorization) { + this.skipVectorization = skipVectorization; + return this; + } + /** Include property name into the input for the vectorizer module. */ public Builder vectorizePropertyName(boolean vectorizePropertyName) { this.vectorizePropertyName = vectorizePropertyName; return this; } + /** Convenience method to be used by {@link Property#edit}. */ + Builder vectorizePropertyName(Boolean vectorizePropertyName) { + this.vectorizePropertyName = vectorizePropertyName; + return this; + } + @Override public Property build() { return new Property(this);