-
Notifications
You must be signed in to change notification settings - Fork 346
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
681 additions
and
174 deletions.
There are no files selected for viewing
60 changes: 60 additions & 0 deletions
60
algebird-core/src/main/scala/com/twitter/algebird/Bytes.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
package com.twitter.algebird | ||
|
||
import java.nio.ByteBuffer | ||
|
||
/** | ||
* A wrapper for `Array[Byte]` that provides sane implementations of `hashCode`, `equals`, and `toString`. | ||
* The wrapped array of bytes is assumed to be never modified. | ||
* | ||
* Note: Unfortunately we cannot make [[Bytes]] a value class because a value class may not override the `hashCode` | ||
* and `equals` methods (cf. SIP-15, criterion 4). | ||
* | ||
* =Alternatives= | ||
* | ||
* Instead of wrapping an `Array[Byte]` with this class you can also convert an `Array[Byte]` to a `Seq[Byte]` via | ||
* Scala's `toSeq` method: | ||
* | ||
* {{{ | ||
* val arrayByte: Array[Byte] = Array(1.toByte) | ||
* val seqByte: Seq[Byte] = arrayByte.toSeq | ||
* }}} | ||
* | ||
* Like [[Bytes]], a `Seq[Byte]` has sane `hashCode`, `equals`, and `toString` implementations. | ||
* | ||
* Performance-wise we found that a `Seq[Byte]` is comparable to [[Bytes]]. For example, a `CMS[Seq[Byte]]` was | ||
* measured to be only slightly slower than `CMS[Bytes]` (think: single-digit percentages). | ||
* | ||
* @param array the wrapped array of bytes | ||
* | ||
* @see [[MinHasher]] | ||
*/ | ||
final case class Bytes(array: Array[Byte]) extends java.io.Serializable { | ||
|
||
require(array != null, "array must not be null") | ||
|
||
override def hashCode: Int = scala.util.hashing.MurmurHash3.arrayHash(array, Bytes.HashSeed) | ||
|
||
/** | ||
* Implementation detail: This `equals` method is defined in terms of the wrapped array, which is a mutable field. | ||
* In general such a definition of `equals` is considered bad practice, but in our case we justify the use of a | ||
* mutable field because the contract of [[Bytes]] requires that the wrapped array must never be modified (and we | ||
* intentionally do not create a defensive, immutable copy because of performance considerations). | ||
*/ | ||
override def equals(that: Any): Boolean = that match { | ||
case Bytes(thatArray) => array sameElements thatArray | ||
case _ => false | ||
} | ||
|
||
override def toString: String = array.map(_.toString).mkString("Bytes(", ",", ")") | ||
|
||
} | ||
|
||
object Bytes { | ||
|
||
private val HashSeed = 0 | ||
|
||
implicit val ordering: Ordering[Bytes] = new Ordering[Bytes] { | ||
def compare(a: Bytes, b: Bytes): Int = ByteBuffer.wrap(a.array) compareTo ByteBuffer.wrap(b.array) | ||
} | ||
|
||
} |
Oops, something went wrong.