-
Notifications
You must be signed in to change notification settings - Fork 25.3k
ES|QL - Allow full text functions to be used in STATS ... WHERE #125479
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
5f4eff5
748e576
dc1dbe6
3117d50
3a753d2
2a8e987
c99d373
2cc6898
425f146
c842f14
8a0f48f
dfce035
3963fd3
85a2374
4f3228a
351d1ff
52dd7b9
02f8748
f9bf170
e98ad43
40eaeb8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -755,3 +755,29 @@ book_no:keyword | |
7140 | ||
2714 | ||
; | ||
|
||
testMatchInStatsNonPushable | ||
required_capability: match_function | ||
required_capability: full_text_functions_in_stats_where | ||
|
||
from books | ||
| where length(title) > 40 | ||
| stats c = count(*) where match(title, "Lord") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here are some suggestions on the test coverages that I can think of, these also apply to the other full text functions and operators:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I added testing in 3963fd3, hopefully that works!
It affects scoring as well. I'm thinking that
is the same as
so using a FTF in a STATS WHERE clause affects scoring as well. I think it's better for consistency, but happy to discuss with the team. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The example above makes sense to me. I wonder how the
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
@fang-xing-esql good catch. That doesn't work, as the filters are not pushed down to Lucene and thus there's no scoring available 😓 It could be doable to calculate the scoring on the individual WHERE clauses in the aggregation, but I'm inclined not to do it. We're looking into having separate scores for individual queries as a separate effort, and this kind of replicates some of those efforts. What I've done is to disallow the usage of
but does not allow to use WHERE in STATS:
Change done in e98ad43 I think this is a good compromise, and we can work to lift that limitation later if needed be. LMKWYT |
||
; | ||
|
||
c:long | ||
3 | ||
; | ||
|
||
testMatchInStatsPushable | ||
required_capability: match_function | ||
required_capability: full_text_functions_in_stats_where | ||
|
||
from books | ||
| stats c = count(*) where match(author, "tolkien") | ||
; | ||
|
||
c:long | ||
22 | ||
; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,7 @@ | |
import org.elasticsearch.xpack.esql.core.tree.Source; | ||
import org.elasticsearch.xpack.esql.core.type.DataType; | ||
import org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper; | ||
import org.elasticsearch.xpack.esql.expression.function.aggregate.FilteredExpression; | ||
import org.elasticsearch.xpack.esql.expression.predicate.logical.BinaryLogic; | ||
import org.elasticsearch.xpack.esql.expression.predicate.logical.Not; | ||
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates; | ||
|
@@ -209,13 +210,32 @@ private static void checkFullTextQueryFunctions(LogicalPlan plan, Failures failu | |
failures | ||
); | ||
checkFullTextFunctionsParents(condition, failures); | ||
} else if (plan instanceof Aggregate agg) { | ||
agg.forEachExpression(exp -> checkFullTextFunctionsInAggs(exp, failures)); | ||
} else { | ||
plan.forEachExpression(FullTextFunction.class, ftf -> { | ||
failures.add(fail(ftf, "[{}] {} is only supported in WHERE commands", ftf.functionName(), ftf.functionType())); | ||
failures.add( | ||
fail(ftf, "[{}] {} is only supported in WHERE and STATS ... WHERE commands", ftf.functionName(), ftf.functionType()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
); | ||
}); | ||
} | ||
} | ||
|
||
private static void checkFullTextFunctionsInAggs(Expression expression, Failures failures) { | ||
if (expression instanceof FilteredExpression) { | ||
return; | ||
} | ||
for (Expression child : expression.children()) { | ||
if (child instanceof FullTextFunction ftf) { | ||
failures.add( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: we could maybe add a method that builds the failure message and reuse it above as well. |
||
fail(ftf, "[{}] {} is only supported in WHERE and STATS ... WHERE commands", ftf.functionName(), ftf.functionType()) | ||
); | ||
return; | ||
} | ||
checkFullTextFunctionsInAggs(child, failures); | ||
} | ||
} | ||
|
||
/** | ||
* Checks all commands that exist before a specific type satisfy conditions. | ||
* | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,8 @@ | |
import org.elasticsearch.xpack.esql.planner.LocalExecutionPlanner.LocalExecutionPlannerContext; | ||
import org.elasticsearch.xpack.esql.planner.LocalExecutionPlanner.PhysicalOperation; | ||
|
||
import java.util.List; | ||
|
||
interface PhysicalOperationProviders { | ||
PhysicalOperation fieldExtractPhysicalOperation(FieldExtractExec fieldExtractExec, PhysicalOperation source); | ||
|
||
|
@@ -21,6 +23,7 @@ interface PhysicalOperationProviders { | |
PhysicalOperation groupingPhysicalOperation( | ||
AggregateExec aggregateExec, | ||
PhysicalOperation source, | ||
LocalExecutionPlannerContext context | ||
LocalExecutionPlannerContext context, | ||
List<EsPhysicalOperationProviders.ShardContext> shardContexts | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's feels a bit odd to provide an implementation piece of EsPhysicalOperationProviders (in the list) to the interface, given that actually There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This will help detect non-pushable expressions that need to be created with ShardConfig information