Skip to content

Commit

Permalink
Renamed the method.
Browse files Browse the repository at this point in the history
  • Loading branch information
rxin committed Jul 18, 2014
1 parent 6940010 commit badf20d
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ import scala.reflect.ClassTag
private[spark] object SamplingUtils {

/**
* Reservoir Sampling implementation.
* Reservoir sampling implementation that also returns the input size.
*
* @param input input size
* @param k reservoir size
* @return (samples, input size)
*/
def reservoirSample[T: ClassTag](input: Iterator[T], k: Int): (Array[T], Int) = {
def reservoirSampleAndCount[T: ClassTag](input: Iterator[T], k: Int): (Array[T], Int) = {
val reservoir = new Array[T](k)
// Put the first k elements in the reservoir.
var i = 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,21 @@ import org.scalatest.FunSuite

class SamplingUtilsSuite extends FunSuite {

test("reservoirSample") {
test("reservoirSampleAndCount") {
val input = Seq.fill(100)(Random.nextInt())

// input size < k
val (sample1, count1) = SamplingUtils.reservoirSample(input.iterator, 150)
val (sample1, count1) = SamplingUtils.reservoirSampleAndCount(input.iterator, 150)
assert(count1 === 100)
assert(input === sample1.toSeq)

// input size == k
val (sample2, count2) = SamplingUtils.reservoirSample(input.iterator, 100)
val (sample2, count2) = SamplingUtils.reservoirSampleAndCount(input.iterator, 100)
assert(count2 === 100)
assert(input === sample2.toSeq)

// input size > k
val (sample3, count3) = SamplingUtils.reservoirSample(input.iterator, 10)
val (sample3, count3) = SamplingUtils.reservoirSampleAndCount(input.iterator, 10)
assert(count3 === 100)
assert(sample3.length === 10)
}
Expand Down

0 comments on commit badf20d

Please sign in to comment.