Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix to use physical name instead of logical name to retrieve available interval #226

Merged
merged 8 commits into from
Apr 17, 2017
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;

Expand Down Expand Up @@ -96,6 +97,22 @@ public Map<Column, List<Interval>> getAllAvailableIntervals() {

@Override
public SimplifiedIntervalList getAvailableIntervals(DataSourceConstraint constraint) {

Set<String> tableColumnNames = getSchema().getColumnNames();
Set<String> invalidColumnNames = constraint.getAllColumnNames().stream()
.filter(name -> !tableColumnNames.contains(name))
.collect(Collectors.toSet());

if (!constraint.getAllColumnNames().stream().allMatch(tableColumnNames::contains)) {
String message = String.format(
"Received invalid request requesting for columns: %s that is not available in this table: %s",
invalidColumnNames.stream().collect(Collectors.joining(",")),
Copy link
Collaborator

@cdeszaq cdeszaq Apr 17, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than computing invalidColumnNames every time, let's replace the formatting stream here with the computation for the invalid names:

Set<String> constraintColumnNames = constraint.getAllColumnNames();
// ...
constraintColumnNames.stream().filter(name -> !tableColumnNames.contains(name)).collect(Collectors.joining(","))

getName()
);
LOG.error(message);
throw new RuntimeException(message);
}

return getAvailability().getAvailableIntervals(new PhysicalDataSourceConstraint(constraint, getSchema()));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,12 +148,6 @@ public Map<String, List<Interval>> getAllAvailableIntervals() {

@Override
public SimplifiedIntervalList getAvailableIntervals(PhysicalDataSourceConstraint constraint) {

// If there are columns requested that are not configured on this table
if (constraint.getAllColumnPhysicalNames().size() != constraint.getAllColumnNames().size()) {
return new SimplifiedIntervalList();
}

return new SimplifiedIntervalList(
constructSubConstraint(constraint).entrySet().stream()
.map(entry -> entry.getKey().getAvailableIntervals(entry.getValue()))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,7 @@ public PhysicalDataSourceConstraint(
) {
super(dataSourceConstraint);

Set<String> schemaColumnNames = physicalTableSchema.getColumnNames();

this.allColumnPhysicalNames = dataSourceConstraint.getAllColumnNames().stream()
.filter(schemaColumnNames::contains)
.map(physicalTableSchema::getPhysicalColumnName)
.collect(Collectors.collectingAndThen(Collectors.toSet(), Collections::unmodifiableSet));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,20 @@ class ConcretePhysicalTableSpec extends Specification {
dimensionColumn | intervalSet1
metricColumn1 | intervalSet2
metricColumn2 | intervalSet3
new Column("MissingName") | [] as Set
}

@Unroll
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need for @Unroll here, since there's no where block and no templated test name.

def "Physical table getAvailableIntervals throws exception when requesting a column not on the table"() {
given:
DataSourceConstraint constraints = Mock(DataSourceConstraint)
constraints.getAllColumnNames() >> ['un_configured']

when:
physicalTable.getAvailableIntervals(constraints)

then:
RuntimeException exception = thrown()
exception.message == 'Received invalid request requesting for columns: un_configured that is not available in this table: test table'
}

def "test datasource metadata service correctly initializes"() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -252,38 +252,4 @@ class MetricUnionAvailabilitySpec extends Specification {
['2017-01-01/2017-02-01'] | ['2017-01-01/2017-01-15'] | ['2017-01-01/2017-01-15'] | "full back overlap (0/10, 0/5)"
['2017-01-01/2017-02-01'] | ['2017-01-15/2017-01-25'] | ['2017-01-15/2017-01-25'] | "fully contain (0/10, 3/9)"
}

def "getAvailableInterval requesting un-configured column on the table will result in empty available interval"() {
given:
Interval interval = new Interval('2000-01-01/3000-01-01')
availability1.getAllAvailableIntervals() >> [
(metric1): [interval]
]
availability2.getAllAvailableIntervals() >> [
(metric2): [interval]
]

availability1.getAvailableIntervals(_ as PhysicalDataSourceConstraint) >> new SimplifiedIntervalList([interval])
availability2.getAvailableIntervals(_ as PhysicalDataSourceConstraint) >> new SimplifiedIntervalList([interval])

metricUnionAvailability = new MetricUnionAvailability(physicalTables, [metricColumn1, metricColumn2] as Set)

DataSourceConstraint dataSourceConstraint = new DataSourceConstraint(
[] as Set,
[] as Set,
[] as Set,
[metric1, metric2, 'un_configured'] as Set,
[] as Set,
[] as Set,
[metric1, metric2, 'un_configured'] as Set,
[:]
)

PhysicalTableSchema physicalTableSchema = new PhysicalTableSchema(Mock(ZonedTimeGrain), [new Column(metric1), new Column(metric2)], [:])

PhysicalDataSourceConstraint physicalDataSourceConstraint = new PhysicalDataSourceConstraint(dataSourceConstraint, physicalTableSchema)

expect:
metricUnionAvailability.getAvailableIntervals(physicalDataSourceConstraint) == new SimplifiedIntervalList()
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,30 +23,25 @@ class PhysicalDataSourceConstraintSpec extends Specification {
['columnThree', 'columnFour', 'columnFive'] as Set,
[] as Set,
[] as Set,
['columnOne', 'columnTwo', 'columnThree', 'columnFour', 'columnFive'] as Set,
['columnOne', 'columnTwo', 'columnThree', 'columnFour'] as Set,
[:]
)

physicalTableSchema = new PhysicalTableSchema(Mock(ZonedTimeGrain), [new Column('columnOne'), new Column('columnThree'), new Column('columnFour'), new Column('columnFive')], ['columnOne': 'column_one', 'columnTwo': 'column_two'])
physicalTableSchema = new PhysicalTableSchema(Mock(ZonedTimeGrain), [new Column('columnOne'), new Column('columnTwo'), new Column('columnThree'), new Column('columnFour')], ['columnOne': 'column_one', 'columnTwo': 'column_two'])
physicalDataSourceConstraint = new PhysicalDataSourceConstraint(dataSourceConstraint, physicalTableSchema)
}

def "physical data source constraint filters out columns not in the given schema"() {
expect:
!physicalDataSourceConstraint.getAllColumnPhysicalNames().contains('column_two')
}

def "physical data source constraint maps to physical name correctly even if mapping does not exist"() {
expect:
physicalDataSourceConstraint.getAllColumnPhysicalNames() == ['column_one', 'columnThree', 'columnFour', 'columnFive'] as Set
physicalDataSourceConstraint.getAllColumnPhysicalNames() == ['column_one', 'column_two', 'columnThree', 'columnFour'] as Set
}

def "withMetricIntersection correctly intersects metricNames and allPhysicalColumnNames with the provided metrics"() {
given:
PhysicalDataSourceConstraint subConstraint = physicalDataSourceConstraint.withMetricIntersection(['columnThree', 'columnFour'] as Set)
PhysicalDataSourceConstraint subConstraint = physicalDataSourceConstraint.withMetricIntersection(['columnFour'] as Set)

expect:
subConstraint.getMetricNames() == ['columnThree', 'columnFour'] as Set
subConstraint.withMetricIntersection(['columnThree', 'columnFour'] as Set).getAllColumnPhysicalNames() == ['column_one', 'columnThree', 'columnFour'] as Set
subConstraint.getMetricNames() == ['columnFour'] as Set
subConstraint.withMetricIntersection(['columnFour'] as Set).getAllColumnPhysicalNames() == ['column_one', 'column_two', 'columnFour'] as Set
}
}