From 4743e7a61ef751dd69b41fef67b148b9fd6556c5 Mon Sep 17 00:00:00 2001 From: Isaac Whitfield Date: Sat, 26 Sep 2020 10:57:54 -0700 Subject: [PATCH] Migrate to a Rust xxHash implementation --- Cargo.toml | 1 + src/filters.rs | 27 +++++++++++++++++++-------- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e65a0e4..94b538a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,3 +19,4 @@ clap = "2.33" fnv = "1.0" scalable_bloom_filter = "0.1" xxhash2 = "0.1" +twox-hash = "1.5.0" diff --git a/src/filters.rs b/src/filters.rs index 50fe29a..d832232 100644 --- a/src/filters.rs +++ b/src/filters.rs @@ -8,8 +8,10 @@ use clap::*; use fnv::FnvHashSet; use scalable_bloom_filter::ScalableBloomFilter; +use twox_hash::XxHash64; + use std::collections::HashSet; -use xxhash2; +use std::hash::Hasher; // Enumerable filters for clap-rs. arg_enum! { @@ -108,11 +110,8 @@ impl Filter for DigestFilter { /// Detects a unique value. #[inline] fn detect(&mut self, input: &[u8]) -> bool { - // hash to u64 always, for collisions - let digest = xxhash2::hash64(input, 0); - - // insert the new digest - self.inner.insert(digest) + // insert as a hashed digest + self.inner.insert(hash(input)) } } @@ -180,8 +179,8 @@ impl Filter for BloomFilter { /// Detects a unique value. #[inline] fn detect(&mut self, input: &[u8]) -> bool { - // // create a digest from the input - let digest = xxhash2::hash64(input, 0); + // create a digest from the input + let digest = hash(input); // short circuit if duplicated if self.inner.contains(&digest) { @@ -194,6 +193,18 @@ impl Filter for BloomFilter { } } +/// Small hash binding around `Hasher`. +fn hash(input: &[u8]) -> u64 { + // create a new default hasher + let mut hasher = XxHash64::default(); + + // write the bytes to the hasher + hasher.write(input); + + // finish the hash + hasher.finish() +} + #[cfg(test)] mod tests { use super::*;