diff --git a/setup.cfg b/setup.cfg index c337034c..20655592 100644 --- a/setup.cfg +++ b/setup.cfg @@ -34,7 +34,7 @@ install_requires = kazoo kafka-python==1.3.5 kombu==4.6.3 - pymmh3 + mmh3 pyparsing ratelimitingfilter redis diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 3d14c140..32073658 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -169,42 +169,6 @@ def test_config_file_validator(self): with self.assertRaises(PanoptesConfigurationParsingError): parse_config_file(bad_file, spec_path) - def test_mmh3_hash_lib(self): - - self.assertEqual(unsigned_mmh3(u"loofzqgugp"), 279167916) - self.assertEqual(unsigned_mmh3(u"aliyzhliwj"), 3024468606) - self.assertEqual(unsigned_mmh3(u"ybbjayiitx"), 3256418505) - self.assertEqual(unsigned_mmh3(u"cvsshywjsn"), 1360741730) - self.assertEqual(unsigned_mmh3(u"drwfeqgxza"), 2547591695) - self.assertEqual(unsigned_mmh3(u"cwhkagexku"), 2250297940) - self.assertEqual(unsigned_mmh3(u"hbfnzemztl"), 3639147325) - self.assertEqual(unsigned_mmh3(u"zlafkcoynl"), 1055376886) - self.assertEqual(unsigned_mmh3(u"ipuxlfrbcr"), 4136803275) - self.assertEqual(unsigned_mmh3(u"cwkyakgnlr"), 433154673) - self.assertEqual(unsigned_mmh3(u"wsachwgrcd"), 3720140426) - self.assertEqual(unsigned_mmh3(u"dqhqvovhce"), 4247286627) - self.assertEqual(unsigned_mmh3(u"czctdflbcu"), 3605659707) - self.assertEqual(unsigned_mmh3(u"ysxsxhybju"), 1892411859) - self.assertEqual(unsigned_mmh3(u"mdrwpudirs"), 645279226) - self.assertEqual(unsigned_mmh3(u"hgkcripxnx"), 1616481172) - - self.assertEqual(unsigned_mmh3("loofzqgugp"), 279167916) - self.assertEqual(unsigned_mmh3("aliyzhliwj"), 3024468606) - self.assertEqual(unsigned_mmh3("ybbjayiitx"), 3256418505) - self.assertEqual(unsigned_mmh3("cvsshywjsn"), 1360741730) - self.assertEqual(unsigned_mmh3("drwfeqgxza"), 2547591695) - self.assertEqual(unsigned_mmh3("cwhkagexku"), 2250297940) - self.assertEqual(unsigned_mmh3("hbfnzemztl"), 3639147325) - self.assertEqual(unsigned_mmh3("zlafkcoynl"), 1055376886) - self.assertEqual(unsigned_mmh3("ipuxlfrbcr"), 4136803275) - self.assertEqual(unsigned_mmh3("cwkyakgnlr"), 433154673) - self.assertEqual(unsigned_mmh3("wsachwgrcd"), 3720140426) - self.assertEqual(unsigned_mmh3("dqhqvovhce"), 4247286627) - self.assertEqual(unsigned_mmh3("czctdflbcu"), 3605659707) - self.assertEqual(unsigned_mmh3("ysxsxhybju"), 1892411859) - self.assertEqual(unsigned_mmh3("mdrwpudirs"), 645279226) - self.assertEqual(unsigned_mmh3("hgkcripxnx"), 1616481172) - if __name__ == '__main__': unittest.main() diff --git a/yahoo_panoptes/framework/utilities/helpers.py b/yahoo_panoptes/framework/utilities/helpers.py index 89c2714f..a2e6e77b 100644 --- a/yahoo_panoptes/framework/utilities/helpers.py +++ b/yahoo_panoptes/framework/utilities/helpers.py @@ -424,90 +424,3 @@ def convert_netmask_to_cidr(netmask): cidr (int): IP cidr """ return sum([bin(int(x)).count(u"1") for x in netmask.split(u".")]) - - -def unsigned_mmh3(key): - """ - Converts the value passed to an unsigned mmh3 hash. - Args: - key (str): The key to hash on - Returns - hash (long): Result of the hash function - """ - - # cr https://github.com/wc-duck/pymmh3 - import sys as _sys - if _sys.version_info > (3, 0): - - def xencode(x): - if isinstance(x, bytes) or isinstance(x, bytearray): - return x - else: - return x.encode() - else: - def xencode(x): - if isinstance(key, unicode): - return x.encode('utf-8') - return x - - def hash(key, seed=0x0): - ''' Implements 32bit murmur3 hash. ''' - - key = bytearray(xencode(key)) - - def fmix(h): - h ^= h >> 16 - h = (h * 0x85ebca6b) & 0xFFFFFFFF - h ^= h >> 13 - h = (h * 0xc2b2ae35) & 0xFFFFFFFF - h ^= h >> 16 - return h - - length = len(key) - nblocks = int(length / 4) - - h1 = seed - - c1 = 0xcc9e2d51 - c2 = 0x1b873593 - - # body - for block_start in range(0, nblocks * 4, 4): - # ??? big endian? - k1 = key[block_start + 3] << 24 | \ - key[block_start + 2] << 16 | \ - key[block_start + 1] << 8 | \ - key[block_start + 0] - - k1 = (c1 * k1) & 0xFFFFFFFF - k1 = (k1 << 15 | k1 >> 17) & 0xFFFFFFFF # inlined ROTL32 - k1 = (c2 * k1) & 0xFFFFFFFF - - h1 ^= k1 - h1 = (h1 << 13 | h1 >> 19) & 0xFFFFFFFF # inlined ROTL32 - h1 = (h1 * 5 + 0xe6546b64) & 0xFFFFFFFF - - # tail - tail_index = nblocks * 4 - k1 = 0 - tail_size = length & 3 - - if tail_size >= 3: - k1 ^= key[tail_index + 2] << 16 - if tail_size >= 2: - k1 ^= key[tail_index + 1] << 8 - if tail_size >= 1: - k1 ^= key[tail_index + 0] - - if tail_size > 0: - k1 = (k1 * c1) & 0xFFFFFFFF - k1 = (k1 << 15 | k1 >> 17) & 0xFFFFFFFF # inlined ROTL32 - k1 = (k1 * c2) & 0xFFFFFFFF - h1 ^= k1 - - # finalization - unsigned_val = fmix(h1 ^ length) - - return -((unsigned_val ^ 0xFFFFFFFF) + 1) + 2**32 - - return hash(key) diff --git a/yahoo_panoptes/framework/utilities/key_value_store.py b/yahoo_panoptes/framework/utilities/key_value_store.py index a4e155ec..2557ca57 100644 --- a/yahoo_panoptes/framework/utilities/key_value_store.py +++ b/yahoo_panoptes/framework/utilities/key_value_store.py @@ -7,12 +7,12 @@ from builtins import range from builtins import object +import mmh3 from six import string_types import sys from yahoo_panoptes.framework import const from yahoo_panoptes.framework.validators import PanoptesValidators -from yahoo_panoptes.framework.utilities.helpers import unsigned_mmh3 class PanoptesKeyValueStoreException(BaseException): @@ -78,10 +78,7 @@ def _get_redis_shard(self, key): Returns: redis.StrictRedis: The Redis Connection """ - if sys.version_info[0] < 3: - key = key.encode('utf-8') - - shard_no = unsigned_mmh3(key) % self._no_of_shards + shard_no = mmh3.hash(key, signed=False) % self._no_of_shards return self._panoptes_context.get_redis_connection(group=self.redis_group, shard=shard_no) @property