diff --git a/docs/changelog/127949.yaml b/docs/changelog/127949.yaml new file mode 100644 index 0000000000000..82a8b65fc9ec4 --- /dev/null +++ b/docs/changelog/127949.yaml @@ -0,0 +1,5 @@ +pr: 127949 +summary: Ensure ordinal builder emit ordinal blocks +area: ES|QL +type: bug +issues: [] diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefBlock.java index 87b33b3b0893d..9dfc0055415d7 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefBlock.java @@ -54,7 +54,11 @@ void writeOrdinalBlock(StreamOutput out) throws IOException { * Returns true if this ordinal block is dense enough to enable optimizations using its ordinals */ public boolean isDense() { - return ordinals.getTotalValueCount() * 2 / 3 >= bytes.getPositionCount(); + return isDense(ordinals.getTotalValueCount(), bytes.getPositionCount()); + } + + public static boolean isDense(long totalPositions, long dictionarySize) { + return totalPositions >= 10 && totalPositions >= dictionarySize * 2L; } public IntBlock getOrdinalsBlock() { @@ -251,4 +255,17 @@ public long ramBytesUsed() { public String toString() { return getClass().getSimpleName() + "[ordinals=" + ordinals + ", bytes=" + bytes + "]"; } + + @Override + public boolean equals(Object o) { + if (o instanceof BytesRefBlock that) { + return BytesRefBlock.equals(this, that); + } + return false; + } + + @Override + public int hashCode() { + return BytesRefBlock.hash(this); + } } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilder.java index 576bde5cdf676..cfcc75c7c396a 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilder.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilder.java @@ -22,6 +22,8 @@ public class SingletonOrdinalsBuilder implements BlockLoader.SingletonOrdinalsBuilder, Releasable, Block.Builder { private final BlockFactory blockFactory; private final SortedDocValues docValues; + private int minOrd = Integer.MAX_VALUE; + private int maxOrd = Integer.MIN_VALUE; private final int[] ords; private int count; @@ -39,8 +41,10 @@ public SingletonOrdinalsBuilder appendNull() { } @Override - public SingletonOrdinalsBuilder appendOrd(int value) { - ords[count++] = value; + public SingletonOrdinalsBuilder appendOrd(int ord) { + ords[count++] = ord; + minOrd = Math.min(minOrd, ord); + maxOrd = Math.max(maxOrd, ord); return this; } @@ -55,7 +59,7 @@ public SingletonOrdinalsBuilder endPositionEntry() { } BytesRefBlock buildOrdinal() { - int valueCount = docValues.getValueCount(); + int valueCount = maxOrd - minOrd + 1; long breakerSize = ordsSize(valueCount); blockFactory.adjustBreaker(breakerSize); BytesRefVector bytesVector = null; @@ -65,7 +69,7 @@ BytesRefBlock buildOrdinal() { Arrays.fill(newOrds, -1); for (int ord : ords) { if (ord != -1) { - newOrds[ord] = 0; + newOrds[ord - minOrd] = 0; } } // resolve the ordinals and remaps the ordinals @@ -74,7 +78,7 @@ BytesRefBlock buildOrdinal() { for (int i = 0; i < newOrds.length; i++) { if (newOrds[i] != -1) { newOrds[i] = ++nextOrd; - bytesBuilder.appendBytesRef(docValues.lookupOrd(i)); + bytesBuilder.appendBytesRef(docValues.lookupOrd(i + minOrd)); } } bytesVector = bytesBuilder.build(); @@ -86,7 +90,7 @@ BytesRefBlock buildOrdinal() { if (ord == -1) { ordinalsBuilder.appendNull(); } else { - ordinalsBuilder.appendInt(newOrds[ord]); + ordinalsBuilder.appendInt(newOrds[ord - minOrd]); } } ordinalBlock = ordinalsBuilder.build(); @@ -107,7 +111,6 @@ BytesRefBlock buildRegularBlock() { blockFactory.adjustBreaker(breakerSize); try { int[] sortedOrds = ords.clone(); - Arrays.sort(sortedOrds); int uniqueCount = compactToUnique(sortedOrds); try (BreakingBytesRefBuilder copies = new BreakingBytesRefBuilder(blockFactory.breaker(), "ords")) { @@ -167,7 +170,12 @@ public BytesRefBlock build() { } boolean shouldBuildOrdinalsBlock() { - return ords.length >= 2 * docValues.getValueCount() && ords.length >= 32; + if (minOrd <= maxOrd) { + int numOrds = maxOrd - minOrd + 1; + return OrdinalBytesRefBlock.isDense(ords.length, numOrds); + } else { + return false; + } } @Override diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilderTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilderTests.java index 87f3210dc12df..9a338506c00d1 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilderTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilderTests.java @@ -8,7 +8,10 @@ package org.elasticsearch.compute.data; import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.store.Directory; @@ -37,6 +40,7 @@ import static org.elasticsearch.test.MapMatcher.assertMap; import static org.elasticsearch.test.MapMatcher.matchesMap; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; public class SingletonOrdinalsBuilderTests extends ESTestCase { public void testReader() throws IOException { @@ -154,6 +158,39 @@ public void testAllNull() throws IOException { } } + public void testEmitOrdinalForHighCardinality() throws IOException { + BlockFactory factory = breakingDriverContext().blockFactory(); + int numOrds = between(50, 100); + try (Directory directory = newDirectory(); IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig())) { + for (int o = 0; o < numOrds; o++) { + int docPerOrds = between(10, 15); + for (int d = 0; d < docPerOrds; d++) { + indexWriter.addDocument(List.of(new SortedDocValuesField("f", new BytesRef("value-" + o)))); + } + } + try (IndexReader reader = DirectoryReader.open(indexWriter)) { + assertThat(reader.leaves(), hasSize(1)); + for (LeafReaderContext ctx : reader.leaves()) { + int batchSize = between(40, 100); + int ord = random().nextInt(numOrds); + try ( + var b1 = new SingletonOrdinalsBuilder(factory, ctx.reader().getSortedDocValues("f"), batchSize); + var b2 = new SingletonOrdinalsBuilder(factory, ctx.reader().getSortedDocValues("f"), batchSize) + ) { + for (int i = 0; i < batchSize; i++) { + b1.appendOrd(ord); + b2.appendOrd(ord); + } + try (BytesRefBlock block1 = b1.build(); BytesRefBlock block2 = b2.buildRegularBlock()) { + assertThat(block1, equalTo(block2)); + assertNotNull(block1.asOrdinals()); + } + } + } + } + } + } + static BytesRefBlock buildOrdinalsBuilder(SingletonOrdinalsBuilder builder) { if (randomBoolean()) { return builder.buildRegularBlock();