Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Qtree quantileBounds off-by-one error #472

Merged
merged 11 commits into from Aug 4, 2015
65 changes: 24 additions & 41 deletions algebird-core/src/main/scala/com/twitter/algebird/QTree.scala
Expand Up @@ -268,49 +268,32 @@ class QTree[@specialized(Int, Long, Float, Double) A] private[algebird] (
def quantileBounds(p: Double): (Double, Double) = {
require(p >= 0.0 && p <= 1.0, "The given percentile must be of the form 0 <= p <= 1.0")

val rank = math.floor(_count * p).toLong
// get is safe below, because findRankLowerBound only returns
// None if rank > count, but due to construction rank <= count
(findRankLowerBound(rank), findRankUpperBound(rank))
}
// rank is 0-indexed and 0 <= rank < count
val rank = math.floor(count * p).toLong.min(count - 1)

private def findRankLowerBound(rank: Long): java.lang.Double =
if (rank > _count)
null
else {
val childCounts = mapChildrenWithDefault(0L)(_.count)
val parentCount = _count - childCounts._1 - childCounts._2
val r2 = if (lowerChildNullable != null) lowerChildNullable.findRankLowerBound(rank - parentCount) else null

if (r2 == null) {
val newRank = rank - childCounts._1 - parentCount
if (newRank <= 0)
lowerBound
else if (upperChildNullable != null)
upperChildNullable.findRankLowerBound(newRank)
else
null
} else r2
}
findRankBounds(rank)
}

private def findRankUpperBound(rank: Long): java.lang.Double = {
if (rank > _count)
null
else {
val r = if (lowerChildNullable != null) {
lowerChildNullable.findRankUpperBound(rank)
} else null
if (r == null) {
val lowerCount = if (lowerChildNullable == null) 0L else lowerChildNullable.count

val r2: java.lang.Double = if (upperChildNullable != null) {
upperChildNullable.findRankUpperBound(rank - lowerCount)
} else null

if (r2 == null) {
upperBound
} else r2
} else r
/**
* Precondition: 0 <= rank < count
*/
private def findRankBounds(rank: Long): (Double, Double) = {
require(0 <= rank && rank < count)

val (leftCount, rightCount) = mapChildrenWithDefault(0L)(_.count)
val parentCount = count - leftCount - rightCount

if (rank < leftCount) {
// Note that 0 <= rank < leftCount because of the require above.
// So leftCount > 0, so lowerChild is not None.
lowerChild.get.findRankBounds(rank)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we use lowerChildNullable and upperChildNullable in this method?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If its called from the 'hot path' i.e. in plus or sumOption then yes worth doing it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's only called in quantileBounds, so optimizing it is probably not necessary

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I wouldn't bother there, best to keep the less functional stuff out of the non-critical speed paths. -- That said, it could be worth adding a benchmark on how the results are usually extracted from QTree, all the benchmarks now are for combinations. We had some big perf woes in HLL around some of them. So might be worth the experiment

} else if (rank < leftCount + parentCount) {
// leftCount <= rank < leftCount + parentCount
(lowerBound, upperBound)
} else {
// Note that leftCount + parentCount <= rank < count.
// So rightCount > 0, so upperChild is not None.
upperChild.get.findRankBounds(rank - leftCount - parentCount)
}
}

Expand Down
15 changes: 15 additions & 0 deletions algebird-test/src/test/scala/com/twitter/algebird/QTreeTest.scala
Expand Up @@ -68,6 +68,21 @@ class QTreeTest extends WordSpec with Matchers {
def trueRangeSum(list: Seq[Double], from: Double, to: Double) =
list.filter{ _ >= from }.filter{ _ < to }.sum

for (k <- Seq(3, 11, 51, 101)) {
s"QTree with elements (1 to $k)" should {
val trueMedian = (1 + k) / 2
s"have median $trueMedian" in {
implicit val sg = new QTreeSemigroup[Unit](k)

val list = (1 to k).map(_.toDouble)
val qtree = sg.sumOption(list.map(QTree.value(_))).get

val (lower, upper) = qtree.quantileBounds(0.5)
assert(lower <= trueMedian && trueMedian <= upper)
}
}
}

for (k <- (1 to 6))
("QTree with sizeHint 2^" + k) should {
"always contain the true quantile within its bounds" in {
Expand Down