Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Abort request when too many Druid filters are generated #690

Merged
merged 3 commits into from
May 23, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ pull request if there was one.

### Changed:

- [Abort request when too many Druid filters are generated](https://github.com/yahoo/fili/pull/690)
* In order to avoid Druid queries with too much filters on high-cardinality dimension, Fili sets a upper limit
on the number of filters and aborts requests if the limit is exceeded.

- [Class re-organization](https://github.com/yahoo/fili/pull/694)
* Put `Granularity` interfaces and its implementations in the same package
* Put `*ApiRequest` interfaces and their implementations in the same package
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
// Licensed under the terms of the Apache license. Please see LICENSE.md file distributed with this work for terms.
package com.yahoo.bard.webservice.data.filterbuilders;

import static com.yahoo.bard.webservice.web.ErrorMessageFormat.TOO_MANY_DRUID_FILTERS;

import com.yahoo.bard.webservice.config.SystemConfig;
import com.yahoo.bard.webservice.config.SystemConfigProvider;
import com.yahoo.bard.webservice.data.dimension.Dimension;
import com.yahoo.bard.webservice.data.dimension.DimensionRow;
import com.yahoo.bard.webservice.data.dimension.DimensionRowNotFoundException;
Expand All @@ -11,6 +15,7 @@
import com.yahoo.bard.webservice.druid.model.filter.ExtractionFilter;
import com.yahoo.bard.webservice.druid.model.filter.Filter;
import com.yahoo.bard.webservice.druid.model.filter.SelectorFilter;
import com.yahoo.bard.webservice.exception.TooManyDruidFiltersException;
import com.yahoo.bard.webservice.web.ApiFilter;
import com.yahoo.bard.webservice.web.ErrorMessageFormat;

Expand All @@ -30,7 +35,15 @@
* each dimension being filtered on.
*/
public abstract class ConjunctionDruidFilterBuilder implements DruidFilterBuilder {

private static final Logger LOG = LoggerFactory.getLogger(ConjunctionDruidFilterBuilder.class);
private static final SystemConfig SYSTEM_CONFIG = SystemConfigProvider.getInstance();

private static final int DEFAULT_MAX_NUM_DRUID_FILTERS = 10000;
private static final int MAX_NUM_DRUID_FILTERS = SYSTEM_CONFIG.getIntProperty(
SYSTEM_CONFIG.getPackageVariableName("max_num_druid_filters"),
DEFAULT_MAX_NUM_DRUID_FILTERS
);

@Override
public Filter buildFilters(Map<Dimension, Set<ApiFilter>> filterMap) throws DimensionRowNotFoundException {
Expand All @@ -52,6 +65,12 @@ public Filter buildFilters(Map<Dimension, Set<ApiFilter>> filterMap) throws Dime
}

AndFilter newFilter = new AndFilter(dimensionFilters);

if (newFilter.getFields().size() > MAX_NUM_DRUID_FILTERS) {
LOG.error(TOO_MANY_DRUID_FILTERS.logFormat());
throw new TooManyDruidFiltersException(TOO_MANY_DRUID_FILTERS.format());
}

LOG.trace("Filter: {}", newFilter);
return newFilter;
}
Expand Down Expand Up @@ -88,6 +107,7 @@ protected Set<DimensionRow> getFilteredDimensionRows(Dimension dimension, Set<Ap
LOG.debug(msg);
throw new DimensionRowNotFoundException(msg);
}

return rows;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Copyright 2016 Yahoo Inc.
// Licensed under the terms of the Apache license. Please see LICENSE.md file distributed with this work for terms.
package com.yahoo.bard.webservice.exception;

/**
* Unchecked exception for situations when too many Druid filters a generated for a Druid query.
* <p>
* Dimensions with extremely large cardinalities could result in such error when user put a "contain" filter on the
* dimension value. For example, a filter of dimension|id-contains[123], where there are 10,000 ID's starting with 123,
* could generate 10,000 Druid filters using
* {@link com.yahoo.bard.webservice.data.filterbuilders.DefaultDruidFilterBuilder}. This giant query shall eventually
* timeout the Druid query and returns the timeout error to API user.
*/
public class TooManyDruidFiltersException extends RuntimeException {

/**
* Constructor.
*
* @param message Message of the exception
*/
public TooManyDruidFiltersException(String message) {
super(message);
}

/**
* Constructor.
*
* @param cause Cause of the exception
*/
public TooManyDruidFiltersException(Throwable cause) {
super(cause);
}

/**
* Constructor.
*
* @param message Message of the exception
* @param cause Cause of the exception
*/
public TooManyDruidFiltersException(String message, Throwable cause) {
super(message, cause);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,12 @@ public enum ErrorMessageFormat implements MessageFormatter {
"%s operator needs %d parameters but found %d"),
HAVING_OPERATOR_IMPROPER_RANGE("Upper range cannot be less than the lower range for %s operator."),

TOO_MANY_DRUID_FILTERS(
"Too many filtering dimension values found. Request is aborted because it will significantly slows down " +
"or timeout Druid query.",
"DruidFilterBuilder generated too many filters. Request is aborted."
),

UNABLE_TO_CREATE_DIR("Unable to create directory %s."),
UNABLE_TO_DELETE_DIR("Unable to delete directory %s."),
FAIL_TO_WIPTE_LUCENE_INDEX_DIR("Failed to wipte Lucene index at directory: %s"),
Expand Down
3 changes: 3 additions & 0 deletions fili-core/src/main/resources/moduleConfig.properties
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ bard__lucene_index_path = [SET ME IN APPLICATION CONFIG]
# Lucene search timeout in milliseconds
bard__lucene_search_timeout_ms = 600000

# Maximum number of druid filters in a Fili-generated Druid query
bard__max_num_druid_filters = 10000

# setting for maximum allowed results without any filters - used for /dim/values endpoint
bard__max_results_without_filters = 10000

Expand Down
3 changes: 3 additions & 0 deletions fili-core/src/test/resources/testApplicationConfig.properties
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ bard__cache_partial_data = false
# Lucene index files path
bard__lucene_index_path = ./target/tmp/

# Maximum number of druid filters in a Fili-generated Druid query
bard__max_num_druid_filters = 10000

# max results without filters
# Default number of records per-page. This applies ONLY to the dimensions endpoint, NOT to the data endpoint. The
# data endpoint does not paginate by default.
Expand Down