Skip to content

Commit

Permalink
Support filtering on bool/scalar fields without evaluator
Browse files Browse the repository at this point in the history
Fixes the bugs in apache#8444 and apache#8487.
Added unit tests.
  • Loading branch information
Vivek Iyer Vaidyanathan committed May 2, 2022
1 parent b5690a7 commit a229fc7
Show file tree
Hide file tree
Showing 5 changed files with 171 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,13 @@ public boolean isRange() {
return this == GREATER_THAN || this == GREATER_THAN_OR_EQUAL || this == LESS_THAN || this == LESS_THAN_OR_EQUAL
|| this == BETWEEN || this == RANGE;
}

/**
* Returns true if the filter is a predicate. Returns false otherwise. This logic should mimic FilterContext.Type.
*
* @return where the filter is a predicate.
*/
public boolean isPredicate() {
return this != AND && this != OR && this != NOT;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,24 @@
*/
package org.apache.pinot.sql.parsers.rewriter;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.lang3.EnumUtils;
import org.apache.pinot.common.request.Expression;
import org.apache.pinot.common.request.ExpressionType;
import org.apache.pinot.common.request.Function;
import org.apache.pinot.common.request.PinotQuery;
import org.apache.pinot.common.utils.request.RequestUtils;
import org.apache.pinot.pql.parsers.pql2.ast.FilterKind;
import org.apache.pinot.sql.parsers.SqlCompilationException;


public class PredicateComparisonRewriter implements QueryRewriter {
@Override
public PinotQuery rewrite(PinotQuery pinotQuery) {
Expression filterExpression = pinotQuery.getFilterExpression();
if (filterExpression != null) {
pinotQuery.setFilterExpression(updateComparisonPredicate(filterExpression));
if (pinotQuery.getFilterExpression() != null) {
pinotQuery.setFilterExpression(updateBooleanPredicates(pinotQuery.getFilterExpression(), null));
pinotQuery.setFilterExpression(updateComparisonPredicate(pinotQuery.getFilterExpression()));
}
Expression havingExpression = pinotQuery.getHavingExpression();
if (havingExpression != null) {
Expand All @@ -42,6 +44,73 @@ public PinotQuery rewrite(PinotQuery pinotQuery) {
return pinotQuery;
}

/**
* Updates boolean predicates that are missing an EQUALS filter.
* E.g. 1: 'WHERE a' will be updated to 'WHERE a = true'
* E.g. 2: "WHERE startsWith(col, 'str')" will be updated to "WHERE startsWith(col, 'str') = true"
*
* @param expression current expression in the expression tree
* @param parentFunction parent expression
* @return re-written expression.
*/
private static Expression updateBooleanPredicates(Expression expression, Function parentFunction) {
ExpressionType type = expression.getType();
if (type == ExpressionType.FUNCTION) {
Function function = expression.getFunctionCall();

// If the function is not of FilterKind, we might have to rewrite the function if parentFunction is AND, OR or
// NOT. Example: A query like "select col1 from table where startsWith(col1, 'myStr') AND col2 > 10;" should be
// rewritten to "select col1 from table where startsWith(col1, 'myStr') = true AND col2 > 10;".
if (!EnumUtils.isValidEnum(FilterKind.class, function.getOperator())) {

// Rewrite non FilterKind functions if one of the two conditions are satified:
// 1. parent function is empty. Example: "select * from table where startsWith(col, 'myStr');
// 2. Separator is not a predicate (non predicates are AND, OR, NOT).
// Example: "select * from table where startsWith(col, 'myStr') AND col2 > 10";
if (parentFunction == null || (EnumUtils.isValidEnum(FilterKind.class, parentFunction.getOperator())
&& !FilterKind.valueOf(parentFunction.getOperator()).isPredicate())) {
Expression currExpression = expression.deepCopy();
expression = RequestUtils.getFunctionExpression(FilterKind.EQUALS.name());
List<Expression> operands = new ArrayList<>();
operands.add(currExpression);
operands.add(RequestUtils.getLiteralExpression(true));
expression.getFunctionCall().setOperands(operands);

return expression;
}
}

List<Expression> operands = expression.getFunctionCall().getOperands();

if (operands.size() > 0) {
// unset operands only within the "if" condition because 'no argument' functions expect operand list to
// be initialized.
expression.getFunctionCall().unsetOperands();

for (Expression exp : operands) {
expression.getFunctionCall().addToOperands(updateBooleanPredicates(exp, expression.getFunctionCall()));
}
}
} else if (type == ExpressionType.IDENTIFIER) {

// Rewrite identifiers if one of the two conditions are satified:
// 1. parent function is empty. Example: "select * from table where col1";
// 2. Separator is not a predicate (non predicates are AND, OR, NOT).
// Example: "select * from table where col1 AND col2 > 10";
if (parentFunction == null || (EnumUtils.isValidEnum(FilterKind.class, parentFunction.getOperator())
&& !FilterKind.valueOf(parentFunction.getOperator()).isPredicate())) {
Expression currExpression = expression.deepCopy();
expression = RequestUtils.getFunctionExpression(FilterKind.EQUALS.name());
List<Expression> operands = new ArrayList<>();
operands.add(currExpression);
operands.add(RequestUtils.getLiteralExpression(true));
expression.getFunctionCall().setOperands(operands);
}
}

return expression;
}

// This method converts a predicate expression to the what Pinot could evaluate.
// For comparison expression, left operand could be any expression, but right operand only
// supports literal.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2205,6 +2205,19 @@ public void testFlattenAndOr() {
}
}

{
String query = "SELECT * FROM foo WHERE col1 > 0 AND (col2 AND col3 > 0) AND startsWith(col4, 'myStr')";
PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
Function functionCall = pinotQuery.getFilterExpression().getFunctionCall();
Assert.assertEquals(functionCall.getOperator(), FilterKind.AND.name());
List<Expression> operands = functionCall.getOperands();
Assert.assertEquals(operands.size(), 4);
Assert.assertEquals(operands.get(0).getFunctionCall().getOperator(), FilterKind.GREATER_THAN.name());
Assert.assertEquals(operands.get(1).getFunctionCall().getOperator(), FilterKind.EQUALS.name());
Assert.assertEquals(operands.get(2).getFunctionCall().getOperator(), FilterKind.GREATER_THAN.name());
Assert.assertEquals(operands.get(3).getFunctionCall().getOperator(), FilterKind.EQUALS.name());
}

{
String query = "SELECT * FROM foo WHERE col1 > 0 AND (col2 AND col3 > 0) AND col4";
PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
Expand All @@ -2213,9 +2226,9 @@ public void testFlattenAndOr() {
List<Expression> operands = functionCall.getOperands();
Assert.assertEquals(operands.size(), 4);
Assert.assertEquals(operands.get(0).getFunctionCall().getOperator(), FilterKind.GREATER_THAN.name());
Assert.assertEquals(operands.get(1).getIdentifier().getName(), "col2");
Assert.assertEquals(operands.get(1).getFunctionCall().getOperator(), FilterKind.EQUALS.name());
Assert.assertEquals(operands.get(2).getFunctionCall().getOperator(), FilterKind.GREATER_THAN.name());
Assert.assertEquals(operands.get(3).getIdentifier().getName(), "col4");
Assert.assertEquals(operands.get(3).getFunctionCall().getOperator(), FilterKind.EQUALS.name());

// NOTE: PQL does not support logical identifier
}
Expand Down Expand Up @@ -2249,9 +2262,9 @@ public void testFlattenAndOr() {
List<Expression> operands = functionCall.getOperands();
Assert.assertEquals(operands.size(), 4);
Assert.assertEquals(operands.get(0).getFunctionCall().getOperator(), FilterKind.LESS_THAN_OR_EQUAL.name());
Assert.assertEquals(operands.get(1).getIdentifier().getName(), "col2");
Assert.assertEquals(operands.get(1).getFunctionCall().getOperator(), FilterKind.EQUALS.name());
Assert.assertEquals(operands.get(2).getFunctionCall().getOperator(), FilterKind.LESS_THAN_OR_EQUAL.name());
Assert.assertEquals(operands.get(3).getIdentifier().getName(), "col4");
Assert.assertEquals(operands.get(3).getFunctionCall().getOperator(), FilterKind.EQUALS.name());

// NOTE: PQL does not support logical identifier
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,21 @@ public void testQueries() {
assertEquals(row[0], false);
}
}
{
String query = "SELECT booleanColumn FROM testTable WHERE booleanColumn";
BrokerResponseNative brokerResponse = getBrokerResponseForSqlQuery(query);
ResultTable resultTable = brokerResponse.getResultTable();
DataSchema dataSchema = resultTable.getDataSchema();
assertEquals(dataSchema,
new DataSchema(new String[]{"booleanColumn"}, new ColumnDataType[]{ColumnDataType.BOOLEAN}));
List<Object[]> rows = resultTable.getRows();
assertEquals(rows.size(), 10);
for (int i = 0; i < 10; i++) {
Object[] row = rows.get(i);
assertEquals(row.length, 1);
assertEquals(row[0], true);
}
}
{
String query = "SELECT * FROM testTable ORDER BY booleanColumn DESC LIMIT 20";
BrokerResponseNative brokerResponse = getBrokerResponse(query);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ public class ExplainPlanQueriesTest extends BaseQueriesTest {
private final static String COL1_NO_INDEX = "noIndexCol1";
private final static String COL2_NO_INDEX = "noIndexCol2";
private final static String COL3_NO_INDEX = "noIndexCol3";
private final static String COL4_NO_INDEX = "noIndexCol4";
private final static String COL1_INVERTED_INDEX = "invertedIndexCol1";
private final static String COL2_INVERTED_INDEX = "invertedIndexCol2";
private final static String COL3_INVERTED_INDEX = "invertedIndexCol3";
Expand All @@ -69,6 +70,7 @@ public class ExplainPlanQueriesTest extends BaseQueriesTest {
.addSingleValueDimension(COL1_NO_INDEX, FieldSpec.DataType.INT)
.addSingleValueDimension(COL2_NO_INDEX, FieldSpec.DataType.INT)
.addSingleValueDimension(COL3_NO_INDEX, FieldSpec.DataType.INT)
.addSingleValueDimension(COL4_NO_INDEX, FieldSpec.DataType.BOOLEAN)
.addSingleValueDimension(COL1_INVERTED_INDEX, FieldSpec.DataType.DOUBLE)
.addSingleValueDimension(COL2_INVERTED_INDEX, FieldSpec.DataType.INT)
.addSingleValueDimension(COL3_INVERTED_INDEX, FieldSpec.DataType.STRING)
Expand Down Expand Up @@ -104,14 +106,16 @@ protected List<IndexSegment> getIndexSegments() {
return _indexSegments;
}

GenericRow createMockRecord(int noIndexCol1, int noIndexCol2, int noIndexCol3, double invertedIndexCol1,
int invertedIndexCol2, String intervedIndexCol3, double rangeIndexCol1, int rangeIndexCol2, int rangeIndexCol3,
double sortedIndexCol1, String jsonIndexCol1, String textIndexCol1) {
GenericRow createMockRecord(int noIndexCol1, int noIndexCol2, int noIndexCol3,
boolean noIndexCol4, double invertedIndexCol1, int invertedIndexCol2, String intervedIndexCol3,
double rangeIndexCol1, int rangeIndexCol2, int rangeIndexCol3, double sortedIndexCol1, String jsonIndexCol1,
String textIndexCol1) {

GenericRow record = new GenericRow();
record.putValue(COL1_NO_INDEX, noIndexCol1);
record.putValue(COL2_NO_INDEX, noIndexCol2);
record.putValue(COL3_NO_INDEX, noIndexCol3);
record.putValue(COL4_NO_INDEX, noIndexCol4);

record.putValue(COL1_INVERTED_INDEX, invertedIndexCol1);
record.putValue(COL2_INVERTED_INDEX, invertedIndexCol2);
Expand All @@ -135,11 +139,11 @@ public void setUp()
FileUtils.deleteDirectory(INDEX_DIR);

List<GenericRow> records = new ArrayList<>(NUM_RECORDS);
records.add(createMockRecord(1, 2, 3, 1.1, 2, "daffy", 10.1, 20, 30, 100.1,
records.add(createMockRecord(1, 2, 3, true, 1.1, 2, "daffy", 10.1, 20, 30, 100.1,
"{\"first\": \"daffy\", \"last\": " + "\"duck\"}", "daffy"));
records.add(createMockRecord(0, 1, 2, 0.1, 1, "mickey", 0.1, 10, 20, 100.2,
records.add(createMockRecord(0, 1, 2, false, 0.1, 1, "mickey", 0.1, 10, 20, 100.2,
"{\"first\": \"mickey\", \"last\": " + "\"mouse\"}", "mickey"));
records.add(createMockRecord(3, 4, 5, 2.1, 3, "mickey", 20.1, 30, 40, 100.3,
records.add(createMockRecord(3, 4, 5, true, 2.1, 3, "mickey", 20.1, 30, 40, 100.3,
"{\"first\": \"mickey\", \"last\": " + "\"mouse\"}", "mickey"));

IndexingConfig indexingConfig = TABLE_CONFIG.getIndexingConfig();
Expand Down Expand Up @@ -191,14 +195,14 @@ public void testSelect() {
result1.add(new Object[]{"COMBINE_SELECT", 1, 0});
result1.add(new Object[]{
"SELECT(selectList:invertedIndexCol1, invertedIndexCol2, invertedIndexCol3, jsonIndexCol1, "
+ "noIndexCol1, noIndexCol2, noIndexCol3, rangeIndexCol1, rangeIndexCol2, rangeIndexCol3, "
+ "noIndexCol1, noIndexCol2, noIndexCol3, noIndexCol4, rangeIndexCol1, rangeIndexCol2, rangeIndexCol3, "
+ "sortedIndexCol1, textIndexCol1)", 2, 1});
result1.add(new Object[]{"TRANSFORM_PASSTHROUGH(invertedIndexCol1, invertedIndexCol2, invertedIndexCol3, "
+ "jsonIndexCol1, noIndexCol1, noIndexCol2, noIndexCol3, rangeIndexCol1, rangeIndexCol2, rangeIndexCol3, "
+ "sortedIndexCol1, textIndexCol1)", 3, 2});
result1.add(new Object[]{"PROJECT(sortedIndexCol1, noIndexCol3, rangeIndexCol1, rangeIndexCol2, jsonIndexCol1, "
+ "invertedIndexCol1, noIndexCol2, invertedIndexCol2, noIndexCol1, invertedIndexCol3, rangeIndexCol3, "
+ "textIndexCol1)", 4, 3});
+ "jsonIndexCol1, noIndexCol1, noIndexCol2, noIndexCol3, noIndexCol4, rangeIndexCol1, rangeIndexCol2, "
+ "rangeIndexCol3, sortedIndexCol1, textIndexCol1)", 3, 2});
result1.add(new Object[]{"PROJECT(noIndexCol4, sortedIndexCol1, noIndexCol3, rangeIndexCol1, rangeIndexCol2, "
+ "invertedIndexCol1, noIndexCol2, invertedIndexCol2, noIndexCol1, rangeIndexCol3, textIndexCol1, "
+ "jsonIndexCol1, invertedIndexCol3)", 4, 3});
result1.add(new Object[]{"DOC_ID_SET", 5, 4});
result1.add(new Object[]{"FILTER_MATCH_ENTIRE_SEGMENT(docs:3)", 6, 5});
check(query1, new ResultTable(DATA_SCHEMA, result1));
Expand Down Expand Up @@ -337,6 +341,47 @@ public void testSelectColumnsUsingFilter() {
result3.add(new Object[]{"FILTER_FULL_SCAN(operator:RANGE,predicate:noIndexCol1 > '1')", 7, 6});
result3.add(new Object[]{"FILTER_FULL_SCAN(operator:RANGE,predicate:noIndexCol2 BETWEEN '2' AND '101')", 8, 6});
check(query3, new ResultTable(DATA_SCHEMA, result3));

String query4 = "EXPLAIN PLAN FOR SELECT invertedIndexCol1, noIndexCol1 FROM testTable WHERE noIndexCol1 > 1 OR "
+ "contains(textIndexCol1, 'daff') OR noIndexCol2 BETWEEN 2 AND 101 LIMIT 100";
List<Object[]> result4 = new ArrayList<>();
result4.add(new Object[]{"BROKER_REDUCE(limit:100)", 0, -1});
result4.add(new Object[]{"COMBINE_SELECT", 1, 0});
result4.add(new Object[]{"SELECT(selectList:invertedIndexCol1, noIndexCol1)", 2, 1});
result4.add(new Object[]{"TRANSFORM_PASSTHROUGH(invertedIndexCol1, noIndexCol1)", 3, 2});
result4.add(new Object[]{"PROJECT(invertedIndexCol1, noIndexCol1)", 4, 3});
result4.add(new Object[]{"DOC_ID_SET", 5, 4});
result4.add(new Object[]{"FILTER_OR", 6, 5});
result4.add(new Object[]{"FILTER_FULL_SCAN(operator:RANGE,predicate:noIndexCol1 > '1')", 7, 6});
result4.add(new Object[]{"FILTER_EXPRESSION(operator:EQ,predicate:contains(textIndexCol1,'daff') = 'true')", 8, 6});
result4.add(new Object[]{"FILTER_FULL_SCAN(operator:RANGE,predicate:noIndexCol2 BETWEEN '2' AND '101')", 9, 6});
check(query4, new ResultTable(DATA_SCHEMA, result4));

String query5 = "EXPLAIN PLAN FOR SELECT invertedIndexCol1, noIndexCol1 FROM testTable WHERE noIndexCol4 LIMIT 100";
List<Object[]> result5 = new ArrayList<>();
result5.add(new Object[]{"BROKER_REDUCE(limit:100)", 0, -1});
result5.add(new Object[]{"COMBINE_SELECT", 1, 0});
result5.add(new Object[]{"SELECT(selectList:invertedIndexCol1, noIndexCol1)", 2, 1});
result5.add(new Object[]{"TRANSFORM_PASSTHROUGH(invertedIndexCol1, noIndexCol1)", 3, 2});
result5.add(new Object[]{"PROJECT(invertedIndexCol1, noIndexCol1)", 4, 3});
result5.add(new Object[]{"DOC_ID_SET", 5, 4});
result5.add(new Object[]{"FILTER_FULL_SCAN(operator:EQ,predicate:noIndexCol4 = 'true')", 6, 5});
check(query5, new ResultTable(DATA_SCHEMA, result5));

String query6 = "EXPLAIN PLAN FOR SELECT invertedIndexCol1, noIndexCol1 FROM testTable WHERE startsWith "
+ "(textIndexCol1, 'daff') AND noIndexCol4";
List<Object[]> result6 = new ArrayList<>();
result6.add(new Object[]{"BROKER_REDUCE(limit:10)", 0, -1});
result6.add(new Object[]{"COMBINE_SELECT", 1, 0});
result6.add(new Object[]{"SELECT(selectList:invertedIndexCol1, noIndexCol1)", 2, 1});
result6.add(new Object[]{"TRANSFORM_PASSTHROUGH(invertedIndexCol1, noIndexCol1)", 3, 2});
result6.add(new Object[]{"PROJECT(invertedIndexCol1, noIndexCol1)", 4, 3});
result6.add(new Object[]{"DOC_ID_SET", 5, 4});
result6.add(new Object[]{"FILTER_AND", 6, 5});
result6.add(new Object[]{"FILTER_FULL_SCAN(operator:EQ,predicate:noIndexCol4 = 'true')", 7, 6});
result6.add(new Object[]{"FILTER_EXPRESSION(operator:EQ,predicate:startswith(textIndexCol1,'daff') = 'true')", 8,
6});
check(query6, new ResultTable(DATA_SCHEMA, result6));
}

/** Test case for SQL statements with filter that involves inverted or sorted index access. */
Expand Down

0 comments on commit a229fc7

Please sign in to comment.