Merge branch 'main' into convert-security-trial-rest-test

tvernum · Aug 31, 2023 · 6627ee9 · 6627ee9
2 parents 22b9dcd + 25bc615
commit 6627ee9
Show file tree

Hide file tree

Showing 180 changed files with 5,566 additions and 757 deletions.
diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/EvalBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/EvalBenchmark.java
@@ -19,6 +19,7 @@
 import org.elasticsearch.compute.operator.Operator;
 import org.elasticsearch.core.TimeValue;
 import org.elasticsearch.xpack.esql.evaluator.EvalMapper;
+import org.elasticsearch.xpack.esql.evaluator.predicate.operator.comparison.Equals;
 import org.elasticsearch.xpack.esql.expression.function.scalar.date.DateTrunc;
 import org.elasticsearch.xpack.esql.expression.function.scalar.math.Abs;
 import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvMin;
@@ -27,7 +28,6 @@
 import org.elasticsearch.xpack.ql.expression.FieldAttribute;
 import org.elasticsearch.xpack.ql.expression.Literal;
 import org.elasticsearch.xpack.ql.expression.predicate.operator.arithmetic.Add;
-import org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.Equals;
 import org.elasticsearch.xpack.ql.tree.Source;
 import org.elasticsearch.xpack.ql.type.DataTypes;
 import org.elasticsearch.xpack.ql.type.EsField;

diff --git a/docs/changelog/98470.yaml b/docs/changelog/98470.yaml
@@ -0,0 +1,5 @@
+pr: 98470
+summary: Reduce verbosity of the bulk indexing audit log
+area: Audit
+type: enhancement
+issues: []
diff --git a/docs/changelog/98528.yaml b/docs/changelog/98528.yaml
@@ -0,0 +1,6 @@
+pr: 98528
+summary: "ESQL: Add support for TEXT fields in comparison operators and SORT"
+area: ES|QL
+type: enhancement
+issues:
+ - 98642
diff --git a/docs/changelog/98870.yaml b/docs/changelog/98870.yaml
@@ -0,0 +1,6 @@
+pr: 98870
+summary: "ESQL: Add ability to perform date math"
+area: ES|QL
+type: enhancement
+issues:
+ - 98402
diff --git a/docs/changelog/98944.yaml b/docs/changelog/98944.yaml
@@ -0,0 +1,5 @@
+pr: 98944
+summary: Auto-normalize `dot_product` vectors at index & query
+area: Vector Search
+type: enhancement
+issues: []
diff --git a/docs/changelog/98961.yaml b/docs/changelog/98961.yaml
@@ -0,0 +1,5 @@
+pr: 98961
+summary: Fix NPE when `GetUser` with profile uid before profile index exists
+area: Security
+type: bug
+issues: []
diff --git a/docs/changelog/98987.yaml b/docs/changelog/98987.yaml
@@ -0,0 +1,6 @@
+pr: 98987
+summary: EQL and ESQL to use only the necessary fields in the internal `field_caps`
+  calls
+area: EQL
+type: enhancement
+issues: []
diff --git a/docs/reference/analysis/tokenfilters/trim-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/trim-tokenfilter.asciidoc
@@ -9,7 +9,7 @@ can change the length of a token, the `trim` filter does _not_ change a token's
 offsets.
 
 The `trim` filter uses Lucene's
-https://lucene.apache.org/core/{lucene_version_path}/analyzers-common/org/apache/lucene/analysis/miscellaneous/TrimFilter.html[TrimFilter].
+https://lucene.apache.org/core/{lucene_version_path}/analysis/common/org/apache/lucene/analysis/miscellaneous/TrimFilter.html[TrimFilter].
 
 [TIP]
 ====
@@ -110,4 +110,4 @@ PUT trim_example
     }
   }
 }
-----
+----
diff --git a/docs/reference/docs/reindex.asciidoc b/docs/reference/docs/reindex.asciidoc
@@ -123,7 +123,7 @@ conflict.
 
 IMPORTANT: Because data streams are <<data-streams-append-only,append-only>>,
 any reindex request to a destination data stream must have an `op_type`
-of`create`. A reindex can only add new documents to a destination data stream.
+of `create`. A reindex can only add new documents to a destination data stream.
 It cannot update existing documents in a destination data stream.
 
 By default, version conflicts abort the `_reindex` process.

diff --git a/docs/reference/esql/functions/signature/ceil.svg b/docs/reference/esql/functions/signature/ceil.svg
diff --git a/docs/reference/esql/functions/signature/left.svg b/docs/reference/esql/functions/signature/left.svg
diff --git a/docs/reference/esql/functions/types/ceil.asciidoc b/docs/reference/esql/functions/types/ceil.asciidoc
@@ -1,6 +1,6 @@
 [%header.monospaced.styled,format=dsv,separator=|]
 |===
-arg1 | result
+n | result
 double | double
 integer | integer
 long | long

diff --git a/docs/reference/esql/functions/types/left.asciidoc b/docs/reference/esql/functions/types/left.asciidoc
@@ -0,0 +1,5 @@
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+arg1 | arg2 | result
+keyword | integer | keyword
+|===
diff --git a/docs/reference/esql/index.asciidoc b/docs/reference/esql/index.asciidoc
@@ -95,9 +95,8 @@ POST /_query?format=txt
 [discrete]
 ==== {kib}
 
-{esql} can be used in Discover to explore a data set, and in Lens to visualize it.
-First, enable the `enableTextBased` setting in *Advanced Settings*. Next, in
-Discover or Lens, from the data view dropdown, select *{esql}*.
+Use {esql} in Discover to explore a data set. From the data view dropdown,
+select *Try {esql}* to get started.
 
 NOTE: {esql} queries in Discover and Lens are subject to the time range selected
 with the time filter.
@@ -136,6 +135,8 @@ include::aggregation-functions.asciidoc[]
 
 include::multivalued-fields.asciidoc[]
 
+include::metadata-fields.asciidoc[]
+
 include::task-management.asciidoc[]
 
 :esql-tests!:

diff --git a/docs/reference/esql/metadata-fields.asciidoc b/docs/reference/esql/metadata-fields.asciidoc
@@ -0,0 +1,55 @@
+[[esql-metadata-fields]]
+== {esql} metadata fields
+
+++++
+<titleabbrev>Metadata fields</titleabbrev>
+++++
+
+{esql} can access <<mapping-fields, metadata fields>>. The currently
+supported ones are:
+
+  * <<mapping-index-field,`_index`>>: the index to which the document belongs.
+  The field is of the type <<keyword, keyword>>.
+
+  * <<mapping-id-field,`_id`>>: the source document's ID. The field is of the
+  type <<keyword, keyword>>.
+
+  * `_version`: the source document's version. The field is of the type
+  <<number,long>>.
+
+To enable the access to these fields, the <<esql-from,`FROM`>> source command needs
+to be provided with a dedicated directive:
+
+[source,esql]
+----
+FROM index [METADATA _index, _id]
+----
+
+Metadata fields are only available if the source of the data is an index.
+Consequently, `FROM` is the only source commands that supports the `METADATA`
+directive.
+
+Once enabled, the fields are then available to subsequent processing commands, just
+like the other index fields:
+
+[source.merge.styled,esql]
+----
+include::{esql-specs}/metadata-ignoreCsvTests.csv-spec[tag=multipleIndices]
+----
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+include::{esql-specs}/metadata-ignoreCsvTests.csv-spec[tag=multipleIndices-result]
+|===
+
+Also, similar to the index fields, once an aggregation is performed, a
+metadata field will no longer be accessible to subsequent commands, unless
+used as grouping field:
+
+[source.merge.styled,esql]
+----
+include::{esql-specs}/metadata-ignoreCsvTests.csv-spec[tag=metaIndexInAggs]
+----
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+include::{esql-specs}/metadata-ignoreCsvTests.csv-spec[tag=metaIndexInAggs-result]
+|===
diff --git a/docs/reference/esql/source-commands/from.asciidoc b/docs/reference/esql/source-commands/from.asciidoc
@@ -27,3 +27,10 @@ or aliases:
 ----
 FROM employees-00001,employees-*
 ----
+
+Use the `METADATA` directive to enable <<esql-metadata-fields,metadata fields>>:
+
+[source,esql]
+----
+FROM employees [METADATA _id]
+----
diff --git a/docs/reference/graph/explore.asciidoc b/docs/reference/graph/explore.asciidoc
@@ -387,7 +387,7 @@ To spider out, you need to specify two things:
  * The set of vertices you already know about that you want to exclude from the
  results of the spidering operation.
 
-You specify this information using `include`and `exclude` clauses. For example,
+You specify this information using `include` and `exclude` clauses. For example,
 the following request starts with the product `1854873` and spiders
 out to find additional search terms associated with that product. The terms
 "midi", "midi keyboard", and "synth" are excluded from the results.

diff --git a/docs/reference/health/health.asciidoc b/docs/reference/health/health.asciidoc
@@ -413,7 +413,7 @@ watermark threshold>>.
 
 `unhealthy_policies`::
     (map) A detailed view on the policies that are considered unhealthy due to having
-    several consecutive unssuccesful invocations.
+    several consecutive unsuccessful invocations.
     The `count` key represents the number of unhealthy policies (int).
     The `invocations_since_last_success` key will report a map where the unhealthy policy
     name is the key and it's corresponding number of failed invocations is the value.

diff --git a/docs/reference/how-to/knn-search.asciidoc b/docs/reference/how-to/knn-search.asciidoc
@@ -21,8 +21,8 @@ options.
 The `cosine` option accepts any float vector and computes the cosine
 similarity. While this is convenient for testing, it's not the most efficient
 approach. Instead, we recommend using the `dot_product` option to compute the
-similarity. To use `dot_product`, all vectors need to be normalized in advance
-to have length 1. The `dot_product` option is significantly faster, since it
+similarity. When using `dot_product`, all vectors are normalized during index to have
+a magnitude of 1. The `dot_product` option is significantly faster, since it
 avoids performing extra vector length computations during the search.
 
 [discrete]

diff --git a/docs/reference/ilm/error-handling.asciidoc b/docs/reference/ilm/error-handling.asciidoc
@@ -156,7 +156,7 @@ You can use the <<ilm-explain-lifecycle,{ilm-init} Explain API>> to monitor the
 [discrete]
 ==== How `min_age` is calculated
 
-When setting up an <<set-up-lifecycle-policy,{ilm-init} policy>> or <<getting-started-index-lifecycle-management,automating rollover with {ilm-init}>>, be aware that`min_age` can be relative to either the rollover time or the index creation time.
+When setting up an <<set-up-lifecycle-policy,{ilm-init} policy>> or <<getting-started-index-lifecycle-management,automating rollover with {ilm-init}>>, be aware that `min_age` can be relative to either the rollover time or the index creation time.
 
 If you use <<ilm-rollover,{ilm-init} rollover>>, `min_age` is calculated relative to the time the index was rolled over. This is because the <<indices-rollover-index,rollover API>> generates a new index. The `creation_date` of the new index (retrievable via <<indices-get-settings>>) is used in the calculation. If you do not use rollover in the {ilm-init} policy, `min_age` is calculated relative to the `creation_date` of the original index.
 

diff --git a/docs/reference/mapping/types/dense-vector.asciidoc b/docs/reference/mapping/types/dense-vector.asciidoc
@@ -159,9 +159,9 @@ Computes the dot product of two vectors. This option provides an optimized way
 to perform cosine similarity. The constraints and computed score are defined
 by `element_type`.
 +
-When `element_type` is `float`, all vectors must be unit length, including both
-document and query vectors. The document `_score` is computed as
-`(1 + dot_product(query, vector)) / 2`.
+When `element_type` is `float`, all vectors are automatically converted to unit length, including both
+document and query vectors. Consequently, `dot_product` does not allow vectors with a zero magnitude.
+The document `_score` is computed as `(1 + dot_product(query, vector)) / 2`.
 +
 When `element_type` is `byte`, all vectors must have the same
 length including both document and query vectors or results will be inaccurate.
@@ -171,9 +171,9 @@ where `dims` is the number of dimensions per vector.
 
 `cosine`:::
 Computes the cosine similarity. Note that the most efficient way to perform
-cosine similarity is to normalize all vectors to unit length, and instead use
+cosine similarity is to have all vectors normalized to unit length, and instead use
 `dot_product`. You should only use `cosine` if you need to preserve the
-original vectors and cannot normalize them in advance. The document `_score`
+original vectors and cannot allow Elasticsearch to normalize them. The document `_score`
 is computed as `(1 + cosine(query, vector)) / 2`. The `cosine` similarity does
 not allow vectors with zero magnitude, since cosine is not defined in this
 case.

diff --git a/docs/reference/migration/migrate_8_0/cluster-node-setting-changes.asciidoc b/docs/reference/migration/migrate_8_0/cluster-node-setting-changes.asciidoc
@@ -230,7 +230,7 @@ Remove the `http.content_type.required` setting from `elasticsearch.yml`. Specif
 [%collapsible]
 ====
 *Details* +
-The `http.tcp_no_delay` setting was deprecated in 7.x and has been removed in 8.0. Use`http.tcp.no_delay` instead.
+The `http.tcp_no_delay` setting was deprecated in 7.x and has been removed in 8.0. Use `http.tcp.no_delay` instead.
 
 *Impact* +
 Replace the `http.tcp_no_delay` setting with `http.tcp.no_delay`.
@@ -246,7 +246,7 @@ The `network.tcp.connect_timeout` setting was deprecated in 7.x and has been rem
 was a fallback setting for `transport.connect_timeout`.
 
 *Impact* +
-Remove the`network.tcp.connect_timeout` setting.
+Remove the `network.tcp.connect_timeout` setting.
 Use the `transport.connect_timeout` setting to change the default connection
 timeout for client connections. Specifying
 `network.tcp.connect_timeout` in `elasticsearch.yml` will result in an

diff --git a/docs/reference/migration/migrate_8_0/java-api-changes.asciidoc b/docs/reference/migration/migrate_8_0/java-api-changes.asciidoc
@@ -22,7 +22,7 @@ Update your workflow and applications to use the `ilm` package in place of
 To create `Fuzziness` instances, use the `fromString` and `fromEdits` method
 instead of the `build` method that used to accept both Strings and numeric
 values. Several fuzziness setters on query builders (e.g.
-MatchQueryBuilder#fuzziness) now accept only a `Fuzziness`instance instead of
+MatchQueryBuilder#fuzziness) now accept only a `Fuzziness` instance instead of
 an Object.
 
 Fuzziness used to be lenient when it comes to parsing arbitrary numeric values

diff --git a/docs/reference/search/search-your-data/semantic-search.asciidoc b/docs/reference/search/search-your-data/semantic-search.asciidoc
@@ -131,5 +131,7 @@ include::{es-repo-dir}/tab-widgets/semantic-search/hybrid-search-widget.asciidoc
 *** {blog-ref}improving-information-retrieval-elastic-stack-benchmarking-passage-retrieval[Part 2: Benchmarking passage retrieval]
 *** {blog-ref}may-2023-launch-information-retrieval-elasticsearch-ai-model[Part 3: Introducing Elastic Learned Sparse Encoder, our new retrieval model]
 *** {blog-ref}improving-information-retrieval-elastic-stack-hybrid[Part 4: Hybrid retrieval]
+* Interactive examples:
+** The https://github.com/elastic/elasticsearch-labs[`elasticsearch-labs`] repo contains a number of interactive semantic search examples in the form of executable Python notebooks, using the {es} Python client
 
-include::semantic-search-elser.asciidoc[]
+include::semantic-search-elser.asciidoc[]
diff --git a/...nt/impl/src/main/java/org/elasticsearch/xcontent/provider/json/JsonXContentGenerator.java b/...nt/impl/src/main/java/org/elasticsearch/xcontent/provider/json/JsonXContentGenerator.java
@@ -364,6 +364,31 @@ public void writeString(String value) throws IOException {
         }
     }
 
+    @Override
+    public void writeStringArray(String[] array) throws IOException {
+        try {
+            if (isFiltered()) {
+                // filtered serialization does not work correctly with the bulk array serializer, so we need to fall back to serializing
+                // the array one-by-one
+                // TODO: this can probably be removed after upgrading Jackson to 2.15.1 or later, see
+                // https://github.com/FasterXML/jackson-core/issues/1023
+                writeStringArrayFiltered(array);
+            } else {
+                generator.writeArray(array, 0, array.length);
+            }
+        } catch (JsonGenerationException e) {
+            throw new XContentGenerationException(e);
+        }
+    }
+
+    private void writeStringArrayFiltered(String[] array) throws IOException {
+        writeStartArray();
+        for (String s : array) {
+            writeString(s);
+        }
+        writeEndArray();
+    }
+
     @Override
     public void writeString(char[] value, int offset, int len) throws IOException {
         try {

diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContentBuilder.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContentBuilder.java
@@ -740,11 +740,7 @@ private XContentBuilder values(String[] values) throws IOException {
         if (values == null) {
             return nullValue();
         }
-        startArray();
-        for (String s : values) {
-            value(s);
-        }
-        endArray();
+        generator.writeStringArray(values);
         return this;
     }
 
@@ -1055,8 +1051,7 @@ public XContentBuilder stringStringMap(String name, Map<String, String> values)
         }
         startObject();
         for (Map.Entry<String, String> value : values.entrySet()) {
-            field(value.getKey());
-            value(value.getValue());
+            generator.writeStringField(value.getKey(), value.getValue());
         }
         return endObject();
     }

diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContentGenerator.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContentGenerator.java
@@ -76,6 +76,8 @@ public interface XContentGenerator extends Closeable, Flushable {
 
     void writeString(String value) throws IOException;
 
+    void writeStringArray(String[] array) throws IOException;
+
     void writeString(char[] text, int offset, int len) throws IOException;
 
     void writeUTF8String(byte[] value, int offset, int length) throws IOException;