Skip to content

Commit

Permalink
Added seed.
Browse files Browse the repository at this point in the history
  • Loading branch information
rxin committed Jul 18, 2014
1 parent badf20d commit 17bcbf3
Showing 1 changed file with 8 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.spark.util.random

import scala.reflect.ClassTag
import scala.util.Random

private[spark] object SamplingUtils {

Expand All @@ -26,9 +27,14 @@ private[spark] object SamplingUtils {
*
* @param input input size
* @param k reservoir size
* @param seed random seed
* @return (samples, input size)
*/
def reservoirSampleAndCount[T: ClassTag](input: Iterator[T], k: Int): (Array[T], Int) = {
def reservoirSampleAndCount[T: ClassTag](
input: Iterator[T],
k: Int,
seed: Long = Random.nextLong())
: (Array[T], Int) = {
val reservoir = new Array[T](k)
// Put the first k elements in the reservoir.
var i = 0
Expand All @@ -46,7 +52,7 @@ private[spark] object SamplingUtils {
(trimReservoir, i)
} else {
// If input size > k, continue the sampling process.
val rand = new XORShiftRandom
val rand = new XORShiftRandom(seed)
while (input.hasNext) {
val item = input.next()
val replacementIndex = rand.nextInt(i)
Expand Down

0 comments on commit 17bcbf3

Please sign in to comment.