Skip to content

[8.18] Ensure ordinal builder emit ordinal blocks (#127949) #127980

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 20, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/127949.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 127949
summary: Ensure ordinal builder emit ordinal blocks
area: ES|QL
type: bug
issues: []
Original file line number Diff line number Diff line change
@@ -54,7 +54,11 @@ void writeOrdinalBlock(StreamOutput out) throws IOException {
* Returns true if this ordinal block is dense enough to enable optimizations using its ordinals
*/
public boolean isDense() {
return ordinals.getTotalValueCount() * 2 / 3 >= bytes.getPositionCount();
return isDense(ordinals.getTotalValueCount(), bytes.getPositionCount());
}

public static boolean isDense(long totalPositions, long dictionarySize) {
return totalPositions >= 10 && totalPositions >= dictionarySize * 2L;
}

public IntBlock getOrdinalsBlock() {
@@ -251,4 +255,17 @@ public long ramBytesUsed() {
public String toString() {
return getClass().getSimpleName() + "[ordinals=" + ordinals + ", bytes=" + bytes + "]";
}

@Override
public boolean equals(Object o) {
if (o instanceof BytesRefBlock that) {
return BytesRefBlock.equals(this, that);
}
return false;
}

@Override
public int hashCode() {
return BytesRefBlock.hash(this);
}
}
Original file line number Diff line number Diff line change
@@ -22,6 +22,8 @@
public class SingletonOrdinalsBuilder implements BlockLoader.SingletonOrdinalsBuilder, Releasable, Block.Builder {
private final BlockFactory blockFactory;
private final SortedDocValues docValues;
private int minOrd = Integer.MAX_VALUE;
private int maxOrd = Integer.MIN_VALUE;
private final int[] ords;
private int count;

@@ -39,8 +41,10 @@ public SingletonOrdinalsBuilder appendNull() {
}

@Override
public SingletonOrdinalsBuilder appendOrd(int value) {
ords[count++] = value;
public SingletonOrdinalsBuilder appendOrd(int ord) {
ords[count++] = ord;
minOrd = Math.min(minOrd, ord);
maxOrd = Math.max(maxOrd, ord);
return this;
}

@@ -55,7 +59,7 @@ public SingletonOrdinalsBuilder endPositionEntry() {
}

BytesRefBlock buildOrdinal() {
int valueCount = docValues.getValueCount();
int valueCount = maxOrd - minOrd + 1;
long breakerSize = ordsSize(valueCount);
blockFactory.adjustBreaker(breakerSize);
BytesRefVector bytesVector = null;
@@ -65,7 +69,7 @@ BytesRefBlock buildOrdinal() {
Arrays.fill(newOrds, -1);
for (int ord : ords) {
if (ord != -1) {
newOrds[ord] = 0;
newOrds[ord - minOrd] = 0;
}
}
// resolve the ordinals and remaps the ordinals
@@ -74,7 +78,7 @@ BytesRefBlock buildOrdinal() {
for (int i = 0; i < newOrds.length; i++) {
if (newOrds[i] != -1) {
newOrds[i] = ++nextOrd;
bytesBuilder.appendBytesRef(docValues.lookupOrd(i));
bytesBuilder.appendBytesRef(docValues.lookupOrd(i + minOrd));
}
}
bytesVector = bytesBuilder.build();
@@ -86,7 +90,7 @@ BytesRefBlock buildOrdinal() {
if (ord == -1) {
ordinalsBuilder.appendNull();
} else {
ordinalsBuilder.appendInt(newOrds[ord]);
ordinalsBuilder.appendInt(newOrds[ord - minOrd]);
}
}
ordinalBlock = ordinalsBuilder.build();
@@ -107,7 +111,6 @@ BytesRefBlock buildRegularBlock() {
blockFactory.adjustBreaker(breakerSize);
try {
int[] sortedOrds = ords.clone();
Arrays.sort(sortedOrds);
int uniqueCount = compactToUnique(sortedOrds);

try (BreakingBytesRefBuilder copies = new BreakingBytesRefBuilder(blockFactory.breaker(), "ords")) {
@@ -167,7 +170,12 @@ public BytesRefBlock build() {
}

boolean shouldBuildOrdinalsBlock() {
return ords.length >= 2 * docValues.getValueCount() && ords.length >= 32;
if (minOrd <= maxOrd) {
int numOrds = maxOrd - minOrd + 1;
return OrdinalBytesRefBlock.isDense(ords.length, numOrds);
} else {
return false;
}
}

@Override
Original file line number Diff line number Diff line change
@@ -8,7 +8,10 @@
package org.elasticsearch.compute.data;

import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.store.Directory;
@@ -37,6 +40,7 @@
import static org.elasticsearch.test.MapMatcher.assertMap;
import static org.elasticsearch.test.MapMatcher.matchesMap;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.hasSize;

public class SingletonOrdinalsBuilderTests extends ESTestCase {
public void testReader() throws IOException {
@@ -154,6 +158,39 @@ public void testAllNull() throws IOException {
}
}

public void testEmitOrdinalForHighCardinality() throws IOException {
BlockFactory factory = breakingDriverContext().blockFactory();
int numOrds = between(50, 100);
try (Directory directory = newDirectory(); IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig())) {
for (int o = 0; o < numOrds; o++) {
int docPerOrds = between(10, 15);
for (int d = 0; d < docPerOrds; d++) {
indexWriter.addDocument(List.of(new SortedDocValuesField("f", new BytesRef("value-" + o))));
}
}
try (IndexReader reader = DirectoryReader.open(indexWriter)) {
assertThat(reader.leaves(), hasSize(1));
for (LeafReaderContext ctx : reader.leaves()) {
int batchSize = between(40, 100);
int ord = random().nextInt(numOrds);
try (
var b1 = new SingletonOrdinalsBuilder(factory, ctx.reader().getSortedDocValues("f"), batchSize);
var b2 = new SingletonOrdinalsBuilder(factory, ctx.reader().getSortedDocValues("f"), batchSize)
) {
for (int i = 0; i < batchSize; i++) {
b1.appendOrd(ord);
b2.appendOrd(ord);
}
try (BytesRefBlock block1 = b1.build(); BytesRefBlock block2 = b2.buildRegularBlock()) {
assertThat(block1, equalTo(block2));
assertNotNull(block1.asOrdinals());
}
}
}
}
}
}

static BytesRefBlock buildOrdinalsBuilder(SingletonOrdinalsBuilder builder) {
if (randomBoolean()) {
return builder.buildRegularBlock();