Skip to content

Commit

Permalink
Integrate new core composite table and availability structure into Fili
Browse files Browse the repository at this point in the history
  • Loading branch information
michael-mclawhorn authored and garyluoex committed Mar 6, 2017
1 parent 33ca04b commit 540c757
Show file tree
Hide file tree
Showing 54 changed files with 681 additions and 761 deletions.
19 changes: 18 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ Current
-------

### Added:
- [CompositePhsyicalTable Core Components Refactor](https://github.com/yahoo/fili/pull/179)
* Added `ConcretePhysicalTable` and `ConcreteAvailability` to model table in druid datasource and its availabillity in the new table availability structure
* Added method `containsColumn` to `Schema`

- [Refactor DatasourceMetaDataService to fit composite table needs](https://github.com/yahoo/fili/pull/173)
* `DataSourceMetadataService` also stores interval data from segment data as intervals by column name map and provides method `getAvailableIntervalsByTable` to retrieve it

Expand All @@ -30,6 +34,10 @@ Current
- [Support timeouts for lucene search provider](https://github.com/yahoo/fili/pull/183)

### Changed:
- [CompositePhsyicalTable Core Components Refactor](https://github.com/yahoo/fili/pull/179)
* `ConfigurationLoader` now takes an additional constructor argument `DataSourceMetadataService` for creating tables
* `findMissingRequestTimeGrainIntervals` method in `PartialDataHandler` now takes `DataSourceConstraint`

- [Refactor DatasourceMetaDataService to fit composite table needs](https://github.com/yahoo/fili/pull/173)
* `BasePhysicalTable` now stores table name as the `TableName` instead of `String`
* `SegmentInfo` now stores dimension and metrics from segment data for constructing column to available interval map
Expand Down Expand Up @@ -58,6 +66,10 @@ Current

### Deprecated:

- [CompositePhsyicalTable Core Components Refactor](https://github.com/yahoo/fili/pull/179)
* `getAvailability` method on `PartialDataHandler` is now deprecated since the logic is pushed into `Availability`
* `loadTableDictionary` method only taking one `ResourceDictionaries` argument, should use the one taking an additional `DataSourceMetadataService` instead

- [`RequestLog::stopMostRecentTimer` has been deprecated](https://github.com/yahoo/fili/pull/143)
- This method is a part of the infrastructure to support the recently
deprecated `RequestLog::switchTiming`.
Expand All @@ -77,7 +89,12 @@ Current


### Removed:

- [CompositePhsyicalTable Core Components Refactor](https://github.com/yahoo/fili/pull/179)
* Removed deprecated method `findMissingRequestTimeGrainIntervals` from `PartialDataHandler`
* Removed `permissive_column_availability_enabled` feature flag support and corresponding functionality in `PartialDataHandler`, permissive availability will be a table configuration
* Removed `getIntersectSubintervalsForColumns` and `getUnionSubintervalsForColumns` from `PartialDataHandler` since the logic is pushed into `Availability` now
* Removed `getIntervalsByColumnName`, `resetColumns` and `hasLogicalMapping` methods in `PhysicalTable` since no longer needed with the availability structure
* Removed deprecated `buildTableGroup` method in `BaseTableLoader`


v0.7.36 - 2017/01/30
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,12 @@ protected void configure() {
//Initialize the metrics filter helper
FieldConverterSupplier.metricsFilterSetBuilder = initializeMetricsFilterSetBuilder();

// Build the datasource metadata service containing the data segments
DataSourceMetadataService dataSourceMetadataService = buildDataSourceMetadataService();
bind(dataSourceMetadataService).to(DataSourceMetadataService.class);

// Build the configuration loader and load configuration
ConfigurationLoader loader = buildConfigurationLoader();
ConfigurationLoader loader = buildConfigurationLoader(dataSourceMetadataService);
loader.load();

// Bind the configuration dictionaries
Expand Down Expand Up @@ -273,10 +277,6 @@ protected void configure() {
bind(PartialDataHandler.class).to(PartialDataHandler.class);
bind(getVolatileIntervalsService()).to(VolatileIntervalsService.class);

DataSourceMetadataService dataSourceMetadataService = buildDataSourceMetadataService();

bind(dataSourceMetadataService).to(DataSourceMetadataService.class);

QuerySigningService<?> querySigningService = buildQuerySigningService(
loader.getPhysicalTableDictionary(),
dataSourceMetadataService
Expand Down Expand Up @@ -833,13 +833,15 @@ protected final List<FeatureFlag> collectFeatureFlags(List<Class<? extends Featu
/**
* Build an application specific configuration loader initialized with pluggable loaders.
*
* @param metadataService datasource metadata service containing data segments for tables
*
* @return A configuration loader instance
*/
protected final ConfigurationLoader buildConfigurationLoader() {
protected final ConfigurationLoader buildConfigurationLoader(DataSourceMetadataService metadataService) {
DimensionLoader dimensionLoader = getDimensionLoader();
TableLoader tableLoader = getTableLoader();
MetricLoader metricLoader = getMetricLoader();
return buildConfigurationLoader(dimensionLoader, metricLoader, tableLoader);
return buildConfigurationLoader(dimensionLoader, metricLoader, tableLoader, metadataService);
}

/**
Expand All @@ -848,15 +850,17 @@ protected final ConfigurationLoader buildConfigurationLoader() {
* @param dimensionLoader A dimension loader
* @param metricLoader A metric loader
* @param tableLoader A table loader
* @param metadataService datasource metadata service containing data segments for tables
*
* @return A configurationLoader instance
*/
protected ConfigurationLoader buildConfigurationLoader(
DimensionLoader dimensionLoader,
MetricLoader metricLoader,
TableLoader tableLoader
TableLoader tableLoader,
DataSourceMetadataService metadataService
) {
return new ConfigurationLoader(dimensionLoader, metricLoader, tableLoader);
return new ConfigurationLoader(dimensionLoader, metricLoader, tableLoader, metadataService);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ public enum BardFeatureFlag implements FeatureFlag {
TOP_N("top_n_enabled"),
DATA_FILTER_SUBSTRING_OPERATIONS("data_filter_substring_operations_enabled"),
INTERSECTION_REPORTING("intersection_reporting_enabled"),
PERMISSIVE_COLUMN_AVAILABILITY("permissive_column_availability_enabled"),
UPDATED_METADATA_COLLECTION_NAMES("updated_metadata_collection_names_enabled"),
DRUID_COORDINATOR_METADATA("druid_coordinator_metadata_enabled"),
DRUID_DIMENSIONS_LOADER("druid_dimensions_loader_enabled"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,16 @@
// Licensed under the terms of the Apache license. Please see LICENSE.md file distributed with this work for terms.
package com.yahoo.bard.webservice.data;

import static com.yahoo.bard.webservice.config.BardFeatureFlag.PERMISSIVE_COLUMN_AVAILABILITY;

import com.yahoo.bard.webservice.druid.model.query.AllGranularity;
import com.yahoo.bard.webservice.druid.model.query.DruidAggregationQuery;
import com.yahoo.bard.webservice.druid.model.query.Granularity;
import com.yahoo.bard.webservice.table.PhysicalTable;
import com.yahoo.bard.webservice.table.resolver.DataSourceConstraint;
import com.yahoo.bard.webservice.util.IntervalUtils;
import com.yahoo.bard.webservice.util.SimplifiedIntervalList;
import com.yahoo.bard.webservice.util.TableUtils;
import com.yahoo.bard.webservice.web.DataApiRequest;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Collection;
import java.util.Set;

import javax.inject.Singleton;
Expand All @@ -31,39 +26,6 @@ public class PartialDataHandler {

private static final Logger LOG = LoggerFactory.getLogger(PartialDataHandler.class);

/**
* Find the request time grain intervals for which partial data is present for a given combination of request
* metrics and dimensions (pulled from the API request and generated druid query).
*
* @param apiRequest api request made by the end user
* @param query used to fetch data from druid
* @param physicalTables the tables whose column availabilities are checked
*
* @return list of simplified intervals with incomplete data
*
* @deprecated This function does the same thing as `findMissingTimeGrainIntervals`, except for an unnecessary
* null check.
*/
@Deprecated
public SimplifiedIntervalList findMissingRequestTimeGrainIntervals(
DataApiRequest apiRequest,
DruidAggregationQuery<?> query,
Set<PhysicalTable> physicalTables
) {
// Make sure we have a list of requested intervals
if (apiRequest.getIntervals() == null) {
String message = "Requested interval list cannot be null";
LOG.error(message);
throw new IllegalArgumentException(message);
}
return findMissingTimeGrainIntervals(
TableUtils.getColumnNames(apiRequest, query),
physicalTables,
new SimplifiedIntervalList(apiRequest.getIntervals()),
apiRequest.getGranularity()
);
}

/**
* Find the holes in the passed in intervals at a given granularity.
* <pre>
Expand All @@ -78,21 +40,21 @@ public SimplifiedIntervalList findMissingRequestTimeGrainIntervals(
* present for a given combination of request metrics and dimensions (pulled from the API request and generated
* druid query) at the specified granularity.
*
* @param columnNames all the column names the request depends on
* @param constraints all the column names the request depends on
* @param physicalTables the tables whose column availabilities are checked
* @param requestedIntervals The intervals that may not be fully satisfied
* @param granularity The granularity at which to find missing intervals
*
* @return subintervals of the requested intervals with incomplete data
*/
public SimplifiedIntervalList findMissingTimeGrainIntervals(
Set<String> columnNames,
DataSourceConstraint constraints,
Set<PhysicalTable> physicalTables,
@NotNull SimplifiedIntervalList requestedIntervals,
Granularity granularity
) {
SimplifiedIntervalList availableIntervals = physicalTables.stream()
.map(table -> getAvailability(table, columnNames))
.map(table -> table.getAvailableIntervals(constraints))
.flatMap(SimplifiedIntervalList::stream)
.collect(SimplifiedIntervalList.getCollector());

Expand All @@ -114,54 +76,14 @@ public SimplifiedIntervalList findMissingTimeGrainIntervals(
* merge into a single availability list.
*
* @param physicalTable The fact source for the columns
* @param columnNames The names of the columns whose availability is being checked
* @param constraints data constraint containing columns and api filters
*
* @return the simplified available intervals
*/
public SimplifiedIntervalList getAvailability(PhysicalTable physicalTable, Set<String> columnNames) {
return PERMISSIVE_COLUMN_AVAILABILITY.isOn() ?
getUnionSubintervalsForColumns(columnNames, physicalTable) :
getIntersectSubintervalsForColumns(columnNames, physicalTable);
}

/**
* Take a list of column names, get the intervals for those columns from the physical table then merge into a
* single availability list by intersecting subintervals.
*
* @param columnNames The names of the columns in the physical table
* @param physicalTable The physical table the columns appear in
*
* @return The set of all intervals fully satisfied on the request columns for the physical table
* @deprecated use getAvailableIntervals function on physical tables instead
*/
public SimplifiedIntervalList getIntersectSubintervalsForColumns(
Collection<String> columnNames,
PhysicalTable physicalTable
) {
return columnNames.isEmpty() ?
new SimplifiedIntervalList() :
new SimplifiedIntervalList(columnNames.stream()
.map(physicalTable::getIntervalsByColumnName)
.reduce(null, IntervalUtils::getOverlappingSubintervals));
}

/**
* Take a list of column names, get the intervals for those columns from the physical table then merge into a
* single availability list by unioning subintervals.
*
* @param columnNames The names of the columns in the physical table
* @param physicalTable The physical table the columns appear in
*
* @return The set of all intervals partially satisfied on the request columns for the physical table
*/
public SimplifiedIntervalList getUnionSubintervalsForColumns(
Collection<String> columnNames,
PhysicalTable physicalTable
) {
return columnNames.isEmpty() ?
new SimplifiedIntervalList() :
columnNames.stream()
.map(physicalTable::getIntervalsByColumnName)
.flatMap(Set::stream)
.collect(SimplifiedIntervalList.getCollector());
@Deprecated
public SimplifiedIntervalList getAvailability(PhysicalTable physicalTable, DataSourceConstraint constraints) {
return physicalTable.getAvailableIntervals(constraints);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import com.yahoo.bard.webservice.data.config.table.TableLoader;
import com.yahoo.bard.webservice.data.dimension.DimensionDictionary;
import com.yahoo.bard.webservice.data.metric.MetricDictionary;
import com.yahoo.bard.webservice.metadata.DataSourceMetadataService;
import com.yahoo.bard.webservice.table.LogicalTableDictionary;
import com.yahoo.bard.webservice.table.PhysicalTableDictionary;

Expand All @@ -33,6 +34,8 @@ public class ConfigurationLoader {
protected final TableLoader tableLoader;
protected final MetricLoader metricLoader;

protected final DataSourceMetadataService metadataService;

// Default JodaTime zone to UTC
public static final String TIMEZONE = SYSTEM_CONFIG.getStringProperty(
SYSTEM_CONFIG.getPackageVariableName("timezone"),
Expand All @@ -45,9 +48,15 @@ public class ConfigurationLoader {
* @param dimensionLoader DimensionLoader to load dimensions from
* @param metricLoader MetricLoader to load metrics from
* @param tableLoader TableLoader to load tables from
* @param metadataService datasource metadata service containing segments for building table
*/
@Inject
public ConfigurationLoader(DimensionLoader dimensionLoader, MetricLoader metricLoader, TableLoader tableLoader) {
public ConfigurationLoader(
DimensionLoader dimensionLoader,
MetricLoader metricLoader,
TableLoader tableLoader,
DataSourceMetadataService metadataService
) {
DateTimeZone.setDefault(DateTimeZone.forID(TIMEZONE));

// Set the max lucene query clauses as high as it can go
Expand All @@ -56,6 +65,7 @@ public ConfigurationLoader(DimensionLoader dimensionLoader, MetricLoader metricL
this.dimensionLoader = dimensionLoader;
this.metricLoader = metricLoader;
this.tableLoader = tableLoader;
this.metadataService = metadataService;
}

/**
Expand All @@ -64,7 +74,7 @@ public ConfigurationLoader(DimensionLoader dimensionLoader, MetricLoader metricL
public void load() {
dimensionLoader.loadDimensionDictionary(dictionaries.getDimensionDictionary());
metricLoader.loadMetricDictionary(dictionaries.getMetricDictionary());
tableLoader.loadTableDictionary(dictionaries);
tableLoader.loadTableDictionary(dictionaries, metadataService);

LOG.info("Initialized ConfigurationLoader");
LOG.info(dictionaries.toString());
Expand Down
Loading

0 comments on commit 540c757

Please sign in to comment.