Skip to content

Commit

Permalink
Fix using logiccal name instead of phsyical name to retireve availabi…
Browse files Browse the repository at this point in the history
…lity data
  • Loading branch information
garyluoex committed Apr 5, 2017
1 parent e78d748 commit e2a36f1
Show file tree
Hide file tree
Showing 19 changed files with 253 additions and 183 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ pull request if there was one.
Current
-------
### Added:
- [Fix to use physical name instead of logical name to retrieve available interval](https://github.com/yahoo/fili/pull/226)
* Added `PhysicalDataSourceConstraint` class to capture physical names of columns for retrieving availble interval

- [CompositePhysicalTable Core Components Refactor](https://github.com/yahoo/fili/pull/179)
* Added `ConcretePhysicalTable` and `ConcreteAvailability` to model table in druid datasource and its availabillity in the new table availability structure
* Added class variable for `DataSourceMetadataService` and `ConfigurationLoader` into `AbstractBinderFactory` for application to access
Expand Down Expand Up @@ -59,6 +62,13 @@ Current

### Changed:

- [Fix to use physical name instead of logical name to retrieve available interval](https://github.com/yahoo/fili/pull/226)
* `getAllAvailbleIntervals` in `ConcreteAvailability` no longer filters out unconfigured columns, instead table's `getAllAvailbleIntervals` does
* `getAvailbleIntervals` in `Availbality` now takes `PhysicalDataSourceConstraint` instead of `DataSourceConstraint`
* `Availbility` no longer takes a set of columns on the table, only table needs to know
* `getAllAvailbleIntervals` in `Availbility` now returns a map of column physical name to interval list instead of column to interval list
* `TestDataSourceMetadataService` now takes map from string to list of intervals instead of column to list of intervals for constructor

- [Allow GranularityComparator to return static instance](https://github.com/yahoo/fili/pull/225)
* Implementation of [PR #193](https://github.com/yahoo/fili/pull/193) suggests an possible improvement
on GranularityComparator: Put the static instance on the GranularityComparator class itself, so that
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import com.yahoo.bard.webservice.data.time.ZonedTimeGrain;
import com.yahoo.bard.webservice.table.availability.Availability;
import com.yahoo.bard.webservice.table.resolver.DataSourceConstraint;
import com.yahoo.bard.webservice.table.resolver.PhysicalDataSourceConstraint;
import com.yahoo.bard.webservice.util.IntervalUtils;
import com.yahoo.bard.webservice.util.SimplifiedIntervalList;

Expand All @@ -16,6 +17,8 @@

import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;

import javax.validation.constraints.NotNull;

Expand Down Expand Up @@ -79,12 +82,21 @@ public DateTime getTableAlignment() {

@Override
public Map<Column, List<Interval>> getAllAvailableIntervals() {
return getAvailability().getAllAvailableIntervals();
Map<String, List<Interval>> availableIntervals = getAvailability().getAllAvailableIntervals();

return getSchema().getColumns().stream()
.collect(Collectors.toMap(
Function.identity(),
column -> availableIntervals.getOrDefault(
getSchema().getPhysicalColumnName(column.getName()),
new SimplifiedIntervalList()
)
));
}

@Override
public SimplifiedIntervalList getAvailableIntervals(DataSourceConstraint constraint) {
return getAvailability().getAvailableIntervals(constraint);
return getAvailability().getAvailableIntervals(new PhysicalDataSourceConstraint(constraint, getSchema()));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public ConcretePhysicalTable(
timeGrain,
columns,
logicalToPhysicalColumnNames,
new ConcreteAvailability(name, columns, metadataService)
new ConcreteAvailability(name, metadataService)
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public PermissiveConcretePhysicalTable(
timeGrain,
columns,
logicalToPhysicalColumnNames,
new PermissiveAvailability(name, columns, dataSourceMetadataService)
new PermissiveAvailability(name, dataSourceMetadataService)
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
package com.yahoo.bard.webservice.table.availability;

import com.yahoo.bard.webservice.data.config.names.TableName;
import com.yahoo.bard.webservice.table.Column;
import com.yahoo.bard.webservice.table.resolver.DataSourceConstraint;
import com.yahoo.bard.webservice.table.resolver.PhysicalDataSourceConstraint;
import com.yahoo.bard.webservice.util.SimplifiedIntervalList;

import org.joda.time.Interval;
Expand All @@ -30,14 +29,14 @@ public interface Availability {
*
* @return The intervals, by column, available.
*/
Map<Column, List<Interval>> getAllAvailableIntervals();
Map<String, List<Interval>> getAllAvailableIntervals();

/**
* Fetch a set of intervals given a set of column name in DataSourceConstraint.
*
* @param constraint Data constraint containing columns and api filters
* @param constraint Physical data source constraint containing column's physical name, metrics names, api filters
*
* @return A simplified list of intervals associated with all column in constraint, empty if column is missing
*/
SimplifiedIntervalList getAvailableIntervals(DataSourceConstraint constraint);
SimplifiedIntervalList getAvailableIntervals(PhysicalDataSourceConstraint constraint);
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,17 @@

import com.yahoo.bard.webservice.data.config.names.TableName;
import com.yahoo.bard.webservice.metadata.DataSourceMetadataService;
import com.yahoo.bard.webservice.table.Column;
import com.yahoo.bard.webservice.table.resolver.DataSourceConstraint;
import com.yahoo.bard.webservice.table.resolver.PhysicalDataSourceConstraint;
import com.yahoo.bard.webservice.util.IntervalUtils;
import com.yahoo.bard.webservice.util.SimplifiedIntervalList;

import com.google.common.collect.ImmutableSet;

import org.joda.time.Interval;

import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;

import javax.validation.constraints.NotNull;

Expand All @@ -29,29 +24,23 @@
public class ConcreteAvailability implements Availability {

private final TableName name;
private final Set<Column> columns;
private final DataSourceMetadataService metadataService;

private final Set<String> columnNames;
private final Set<TableName> dataSourceNames;

/**
* Constructor.
*
* @param tableName The name of the table and data source associated with this Availability
* @param columns The columns associated with the table and availability
* @param metadataService A service containing the datasource segment data
*/
public ConcreteAvailability(
@NotNull TableName tableName,
@NotNull Set<Column> columns,
@NotNull DataSourceMetadataService metadataService
) {
this.name = tableName;
this.columns = ImmutableSet.copyOf(columns);
this.metadataService = metadataService;

this.columnNames = columns.stream().map(Column::getName).collect(Collectors.toSet());
this.dataSourceNames = Collections.singleton(name);
}

Expand All @@ -61,52 +50,27 @@ public Set<TableName> getDataSourceNames() {
}

@Override
public Map<Column, List<Interval>> getAllAvailableIntervals() {

Map<String, List<Interval>> allAvailableIntervals = getAvailableIntervalsByTable();
return columns.stream()
.collect(
Collectors.toMap(
Function.identity(),
column -> new SimplifiedIntervalList(
allAvailableIntervals.getOrDefault(column.getName(), Collections.emptyList())
)
)
);
public Map<String, List<Interval>> getAllAvailableIntervals() {
return metadataService.getAvailableIntervalsByTable(name);
}

@Override
public SimplifiedIntervalList getAvailableIntervals(DataSourceConstraint constraint) {
public SimplifiedIntervalList getAvailableIntervals(PhysicalDataSourceConstraint constraint) {

Set<String> requestColumns = constraint.getAllColumnNames().stream()
.filter(columnNames::contains)
.collect(Collectors.toSet());
Set<String> requestColumns = constraint.getAllColumnPhysicalNames();

if (requestColumns.isEmpty()) {
return new SimplifiedIntervalList();
}

Map<String, List<Interval>> allAvailableIntervals = getAvailableIntervalsByTable();
Map<String, List<Interval>> allAvailableIntervals = getAllAvailableIntervals();

// Need to ensure requestColumns is not empty in order to prevent returning null by reduce operation
return new SimplifiedIntervalList(
requestColumns.stream()
.map(columnName -> allAvailableIntervals.getOrDefault(columnName, Collections.emptyList()))
.map(physicalName -> allAvailableIntervals.getOrDefault(physicalName, Collections.emptyList()))
.map(intervals -> (Collection<Interval>) intervals)
.reduce(null, IntervalUtils::getOverlappingSubintervals)
);
}

/**
* Retrieves the most up to date column to available interval map from data source metadata service.
*
* @return map of column name to list of avialable intervals
*/
protected Map<String, List<Interval>> getAvailableIntervalsByTable() {
return metadataService.getAvailableIntervalsByTable(name);
}

protected Set<String> getColumnNames() {
return columnNames;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,14 @@

import com.yahoo.bard.webservice.data.config.names.TableName;
import com.yahoo.bard.webservice.metadata.DataSourceMetadataService;
import com.yahoo.bard.webservice.table.Column;
import com.yahoo.bard.webservice.table.resolver.DataSourceConstraint;
import com.yahoo.bard.webservice.table.resolver.PhysicalDataSourceConstraint;
import com.yahoo.bard.webservice.util.SimplifiedIntervalList;

import org.joda.time.Interval;

import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;

import javax.validation.constraints.NotNull;

Expand All @@ -26,36 +25,36 @@ public class PermissiveAvailability extends ConcreteAvailability {
/**
* Constructor.
*
* @param tableName The name of the data source
* @param columns A set of columns associated with the data source
* @param metadataService A service containing the data source segment data
* @param tableName The name of the data source
* @param metadataService A service containing the data source segment data
*/
public PermissiveAvailability(
@NotNull TableName tableName,
@NotNull Set<Column> columns,
@NotNull DataSourceMetadataService metadataService
) {
super(tableName, columns, metadataService);
super(tableName, metadataService);
}

/**
* Returns union of all available intervals.
* <p>
* This is different from its parent's
* {@link
* com.yahoo.bard.webservice.table.availability.ConcreteAvailability#getAvailableIntervals(DataSourceConstraint)};
* com.yahoo.bard.webservice.table.availability.ConcreteAvailability#getAvailableIntervals
* (PhysicalDataSourceConstraint)};
* Instead of returning the intersection of all available intervals, this method returns the union of them.
*
* @param ignoredConstraints Data constraint containing columns and api filters. Constrains are ignored, because
* @param constraint Data constraint containing columns and api filters. Constrains are ignored, because
* <tt>PermissiveAvailability</tt> returns as much available intervals as possible by, for example, allowing
* missing intervals and returning unions of available intervals
*
* @return the union of all available intervals
*/
@Override
public SimplifiedIntervalList getAvailableIntervals(DataSourceConstraint ignoredConstraints) {
Map<String, List<Interval>> allAvailableIntervals = getAvailableIntervalsByTable();
return getColumnNames().stream()
public SimplifiedIntervalList getAvailableIntervals(PhysicalDataSourceConstraint constraint) {
Map<String, List<Interval>> allAvailableIntervals = getAllAvailableIntervals();

return constraint.getAllColumnPhysicalNames().stream()
.map(columnName -> allAvailableIntervals.getOrDefault(columnName, Collections.emptyList()))
.flatMap(List::stream)
.collect(SimplifiedIntervalList.getCollector());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,22 @@ public DataSourceConstraint(DataApiRequest dataApiRequest, DruidAggregationQuery
).collect(Collectors.toSet()));
}

/**
* Copy Constructor.
*
* @param dataSourceConstraint The data source constarint to copy from
*/
protected DataSourceConstraint(DataSourceConstraint dataSourceConstraint) {
this.requestDimensions = dataSourceConstraint.getRequestDimensions();
this.filterDimensions = dataSourceConstraint.getFilterDimensions();
this.metricDimensions = dataSourceConstraint.getMetricDimensions();
this.metricNames = dataSourceConstraint.getMetricNames();
this.apiFilters = dataSourceConstraint.getApiFilters();
this.allDimensions = dataSourceConstraint.getAllDimensions();
this.allDimensionNames = this.getAllDimensionNames();
this.allColumnNames = this.getAllColumnNames();
}

public Set<Dimension> getRequestDimensions() {
return requestDimensions;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Copyright 2017 Yahoo Inc.
// Licensed under the terms of the Apache license. Please see LICENSE.md file distributed with this work for terms.
package com.yahoo.bard.webservice.table.resolver;

import com.yahoo.bard.webservice.table.Column;
import com.yahoo.bard.webservice.table.PhysicalTableSchema;

import java.util.Set;
import java.util.stream.Collectors;

/**
* Data source constraint containing physical name of the columns.
*/
public class PhysicalDataSourceConstraint extends DataSourceConstraint {

private final Set<String> allColumnPhysicalNames;

/**
* Constructor.
*
* @param dataSourceConstraint Data source constraint containing all the column names as logical names
* @param physicalTableSchema A map from logical column name to physical column names
*/
public PhysicalDataSourceConstraint(
DataSourceConstraint dataSourceConstraint,
PhysicalTableSchema physicalTableSchema
) {
super(dataSourceConstraint);

Set<String> schemaColumnNames = physicalTableSchema.getColumns().stream()
.map(Column::getName)
.collect(Collectors.toSet());

this.allColumnPhysicalNames = dataSourceConstraint.getAllColumnNames().stream()
.filter(schemaColumnNames::contains)
.map(physicalTableSchema::getPhysicalColumnName)
.collect(Collectors.toSet());
}

/**
* Getter for the all column names as physical names.
*
* @return the physical name of all the columns
*/
public Set<String> getAllColumnPhysicalNames() {
return allColumnPhysicalNames;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,11 @@ class PartialDataHandlerSpec extends Specification {
* dim1 is missing four days of data internally, dim2 and dim3 are complete over the period and page_views is
* starts inside the dim1 hole and goes to the end of the period.
*/
Map<Column, Set<Interval>> segmentIntervals = [
(new Column("userDeviceType")): buildIntervals(["2014-07-01/2014-07-09","2014-07-11/2014-07-29"]) as Set,
(new Column("property")): buildIntervals(["2014-07-01/2014-07-29"]) as Set,
(new Column("os")): buildIntervals(["2014-07-01/2014-07-29"]) as Set,
(new Column("page_views")): buildIntervals(["2014-07-04/2014-07-29"]) as Set
Map<String, Set<Interval>> segmentIntervals = [
'user_device_type': buildIntervals(["2014-07-01/2014-07-09","2014-07-11/2014-07-29"]) as Set,
'property': buildIntervals(["2014-07-01/2014-07-29"]) as Set,
'os': buildIntervals(["2014-07-01/2014-07-29"]) as Set,
'page_views': buildIntervals(["2014-07-04/2014-07-29"]) as Set
]

table = new ConcretePhysicalTable(
Expand Down
Loading

0 comments on commit e2a36f1

Please sign in to comment.