-
Notifications
You must be signed in to change notification settings - Fork 346
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
92 changed files
with
4,425 additions
and
1,029 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,10 @@ | ||
language: scala | ||
scala: | ||
- 2.10.4 | ||
- 2.11.4 | ||
sudo: false | ||
matrix: | ||
include: | ||
- scala: 2.10.4 | ||
script: ./sbt ++$TRAVIS_SCALA_VERSION clean test | ||
|
||
- scala: 2.11.5 | ||
script: ./sbt ++$TRAVIS_SCALA_VERSION clean test | ||
after_success: "./sbt coveralls" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
100 changes: 100 additions & 0 deletions
100
algebird-caliper/src/test/scala/com/twitter/algebird/caliper/CMSHashingBenchmark.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
package com.twitter.algebird.caliper | ||
|
||
import com.google.caliper.{ Param, SimpleBenchmark } | ||
|
||
/** | ||
* Benchmarks the hashing algorithms used by Count-Min sketch for CMS[BigInt]. | ||
* | ||
* The input values are generated ahead of time to ensure that each trial uses the same input (and that the RNG is not | ||
* influencing the runtime of the trials). | ||
* | ||
* More details available at https://github.com/twitter/algebird/issues/392. | ||
*/ | ||
// Once we can convince cappi (https://github.com/softprops/capp) -- the sbt plugin we use to run | ||
// caliper benchmarks -- to work with the latest caliper 1.0-beta-1, we would: | ||
// - Let `CMSHashingBenchmark` extend `Benchmark` (instead of `SimpleBenchmark`) | ||
// - Annotate `timePlus` with `@MacroBenchmark`. | ||
class CMSHashingBenchmark extends SimpleBenchmark { | ||
|
||
/** | ||
* The `a` parameter for CMS' default ("legacy") hashing algorithm: `h_i(x) = a_i * x + b_i (mod p)`. | ||
*/ | ||
@Param(Array("5123456")) | ||
val a: Int = 0 | ||
|
||
/** | ||
* The `b` parameter for CMS' default ("legacy") hashing algorithm: `h_i(x) = a_i * x + b_i (mod p)`. | ||
* | ||
* Algebird's CMS implementation hard-codes `b` to `0`. | ||
*/ | ||
@Param(Array("0")) | ||
val b: Int = 0 | ||
|
||
/** | ||
* Width of the counting table. | ||
*/ | ||
@Param(Array("11" /* eps = 0.271 */ , "544" /* eps = 0.005 */ , "2719" /* eps = 1E-3 */ , "271829" /* eps = 1E-5 */ )) | ||
val width: Int = 0 | ||
|
||
/** | ||
* Number of operations per benchmark repetition. | ||
*/ | ||
@Param(Array("100000")) | ||
val operations: Int = 0 | ||
|
||
/** | ||
* Maximum number of bits for randomly generated BigInt instances. | ||
*/ | ||
@Param(Array("128", "1024", "2048")) | ||
val maxBits: Int = 0 | ||
|
||
var random: scala.util.Random = _ | ||
var inputs: Seq[BigInt] = _ | ||
|
||
override def setUp() { | ||
random = new scala.util.Random | ||
// We draw numbers randomly from a 2^maxBits address space. | ||
inputs = (1 to operations).view.map { _ => scala.math.BigInt(maxBits, random) } | ||
} | ||
|
||
private def murmurHashScala(a: Int, b: Int, width: Int)(x: BigInt) = { | ||
val hash: Int = scala.util.hashing.MurmurHash3.arrayHash(x.toByteArray, a) | ||
val h = { | ||
// We only want positive integers for the subsequent modulo. This method mimics Java's Hashtable | ||
// implementation. The Java code uses `0x7FFFFFFF` for the bit-wise AND, which is equal to Int.MaxValue. | ||
val positiveHash = hash & Int.MaxValue | ||
positiveHash % width | ||
} | ||
assert(h >= 0, "hash must not be negative") | ||
h | ||
} | ||
|
||
private val PRIME_MODULUS = (1L << 31) - 1 | ||
|
||
private def brokenCurrentHash(a: Int, b: Int, width: Int)(x: BigInt) = { | ||
val unModded: BigInt = (x * a) + b | ||
val modded: BigInt = (unModded + (unModded >> 32)) & PRIME_MODULUS | ||
val h = modded.toInt % width | ||
assert(h >= 0, "hash must not be negative") | ||
h | ||
} | ||
|
||
def timeBrokenCurrentHashWithRandomMaxBitsNumbers(operations: Int): Int = { | ||
var dummy = 0 | ||
while (dummy < operations) { | ||
inputs.foreach { input => brokenCurrentHash(a, b, width)(input) } | ||
dummy += 1 | ||
} | ||
dummy | ||
} | ||
|
||
def timeMurmurHashScalaWithRandomMaxBitsNumbers(operations: Int): Int = { | ||
var dummy = 0 | ||
while (dummy < operations) { | ||
inputs.foreach { input => murmurHashScala(a, b, width)(input) } | ||
dummy += 1 | ||
} | ||
dummy | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
42 changes: 42 additions & 0 deletions
42
algebird-caliper/src/test/scala/com/twitter/algebird/caliper/HLLPresentBenchmark.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
package com.twitter.algebird.caliper | ||
|
||
import com.google.caliper.{ SimpleBenchmark, Param } | ||
import com.twitter.algebird.{ HyperLogLogMonoid, HLL } | ||
import com.twitter.bijection._ | ||
import java.nio.ByteBuffer | ||
|
||
class HLLPresentBenchmark extends SimpleBenchmark { | ||
@Param(Array("5", "10", "17", "20")) | ||
val bits: Int = 0 | ||
|
||
@Param(Array("10", "100", "500", "1000", "10000")) | ||
val max: Int = 0 | ||
|
||
@Param(Array("10", "20", "100")) | ||
val numHLL: Int = 0 | ||
|
||
var data: IndexedSeq[HLL] = _ | ||
|
||
implicit val byteEncoder = implicitly[Injection[Long, Array[Byte]]] | ||
|
||
override def setUp { | ||
val hllMonoid = new HyperLogLogMonoid(bits) | ||
val r = new scala.util.Random(12345L) | ||
data = (0 until numHLL).map { _ => | ||
val input = (0 until max).map(_ => r.nextLong).toSet | ||
hllMonoid.batchCreate(input)(byteEncoder.toFunction) | ||
}.toIndexedSeq | ||
|
||
} | ||
|
||
def timeBatchCreate(reps: Int): Int = { | ||
var dummy = 0 | ||
while (dummy < reps) { | ||
data.foreach { hll => | ||
hll.approximateSize | ||
} | ||
dummy += 1 | ||
} | ||
dummy | ||
} | ||
} |
Oops, something went wrong.