Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Merge pull request #21 from johnynek/feature/hll-ser

add serialization to hyperloglog
  • Loading branch information...
commit f2da9f273a7bc4a28682d36b62fd9febf537623e 2 parents efe579a + 9c2e6ee
@singhala singhala authored
View
42 src/main/scala/com/twitter/algebird/HyperLogLog.scala
@@ -75,6 +75,40 @@ object HyperLogLog {
(onBits.filter { _ < bits }.map { 1 << _ }.sum,
(onBits.filter { _ >= bits }.min - bits + 1).toByte)
}
+
+ def toBytes(h : HLL) : Array[Byte] = {
+ h match {
+ case HLLZero => Array[Byte](0)
+ case HLLItem(sz,idx,bv) => {
+ val buf = new Array[Byte](1 + 4 + 4 + 1)
+ java.nio.ByteBuffer
+ .wrap(buf)
+ .put(1 : Byte) //Indicator of HLLItem
+ .putInt(sz)
+ .putInt(idx)
+ .put(bv)
+ buf
+ }
+ case HLLInstance(v) => (Array[Byte](2) ++ v)
+ }
+ }
+
+ def fromBytes(bytes : Array[Byte]) : HLL = {
+ // Make sure to be reversible so fromBytes(toBytes(x)) == x
+ val bb = java.nio.ByteBuffer.wrap(bytes)
+ bb.get.toInt match {
+ case 0 => HLLZero
+ case 1 => {
+ HLLItem(bb.getInt, bb.getInt, bb.get)
+ }
+ case 2 => {
+ HLLInstance(bytes.toIndexedSeq.tail)
+ }
+ case _ => {
+ throw new Exception("Unrecognized HLL type: " + bytes(0))
+ }
+ }
+ }
}
sealed abstract class HLL extends java.io.Serializable {
@@ -97,10 +131,7 @@ case class HLLItem(size : Int, j : Int, rhow : Byte) extends HLL {
}
else {
//They are certainly different
- val vect = Vector.fill(size)(0 : Byte)
- .updated(oJ, oRhow)
- .updated(j,rhow)
- HLLInstance(vect)
+ HLLInstance(toHLLInstance.v.updated(oJ, oRhow))
}
}
case HLLInstance(ov) => {
@@ -115,6 +146,8 @@ case class HLLItem(size : Int, j : Int, rhow : Byte) extends HLL {
}
}
}
+
+ lazy val toHLLInstance = HLLInstance(Vector.fill(size)(0 : Byte).updated(j,rhow))
}
/**
@@ -136,7 +169,6 @@ case class HLLInstance(v : IndexedSeq[Byte]) extends HLL {
}
}
}
-
// Named from the parameter in the paper, probably never useful to anyone
// except HyperLogLogMonoid
lazy val z : Double = 1.0 / (v.map { mj => HyperLogLog.twopow(-mj) }.sum)
View
14 src/test/scala/com/twitter/algebird/HyperLogLogTest.scala
@@ -66,10 +66,6 @@ class HyperLogLogTest extends Specification {
}
"HyperLogLog" should {
- "count with 4-bits" in {
- test(4)
- testLong(4)
- }
"count with 5-bits" in {
test(5)
testLong(5)
@@ -82,6 +78,10 @@ class HyperLogLogTest extends Specification {
test(7)
testLong(7)
}
+ "count with 10-bits" in {
+ test(10)
+ testLong(10)
+ }
"count intersections of 2" in { testLongIntersection(10,2) }
"count intersections of 3" in { testLongIntersection(10,3) }
"count intersections of 4" in { testLongIntersection(10,4) }
@@ -93,5 +93,11 @@ class HyperLogLogTest extends Specification {
val smaller = smallMon(1) // uses implicit int2Bytes to make 4 byte array
(larger + smaller) must throwA[AssertionError]
}
+ "Correctly serialize" in {
+ val mon = new HyperLogLogMonoid(10)
+ fromBytes(toBytes(HLLZero)) must be_==(HLLZero)
+ fromBytes(toBytes(mon(12))) must be_==(mon(12))
+ fromBytes(toBytes(mon(12) + mon(13))) must be_==(mon(12) + mon(13))
+ }
}
}
Please sign in to comment.
Something went wrong with that request. Please try again.