Skip to content

Commit

Permalink
Move back to C library for mmh3
Browse files Browse the repository at this point in the history
  • Loading branch information
road-cycling committed Nov 7, 2019
1 parent 3a9e309 commit d6a7317
Show file tree
Hide file tree
Showing 4 changed files with 3 additions and 129 deletions.
2 changes: 1 addition & 1 deletion setup.cfg
Expand Up @@ -34,7 +34,7 @@ install_requires =
kazoo
kafka-python==1.3.5
kombu==4.6.3
pymmh3
mmh3
pyparsing
ratelimitingfilter
redis
Expand Down
36 changes: 0 additions & 36 deletions tests/test_helpers.py
Expand Up @@ -169,42 +169,6 @@ def test_config_file_validator(self):
with self.assertRaises(PanoptesConfigurationParsingError):
parse_config_file(bad_file, spec_path)

def test_mmh3_hash_lib(self):

self.assertEqual(unsigned_mmh3(u"loofzqgugp"), 279167916)
self.assertEqual(unsigned_mmh3(u"aliyzhliwj"), 3024468606)
self.assertEqual(unsigned_mmh3(u"ybbjayiitx"), 3256418505)
self.assertEqual(unsigned_mmh3(u"cvsshywjsn"), 1360741730)
self.assertEqual(unsigned_mmh3(u"drwfeqgxza"), 2547591695)
self.assertEqual(unsigned_mmh3(u"cwhkagexku"), 2250297940)
self.assertEqual(unsigned_mmh3(u"hbfnzemztl"), 3639147325)
self.assertEqual(unsigned_mmh3(u"zlafkcoynl"), 1055376886)
self.assertEqual(unsigned_mmh3(u"ipuxlfrbcr"), 4136803275)
self.assertEqual(unsigned_mmh3(u"cwkyakgnlr"), 433154673)
self.assertEqual(unsigned_mmh3(u"wsachwgrcd"), 3720140426)
self.assertEqual(unsigned_mmh3(u"dqhqvovhce"), 4247286627)
self.assertEqual(unsigned_mmh3(u"czctdflbcu"), 3605659707)
self.assertEqual(unsigned_mmh3(u"ysxsxhybju"), 1892411859)
self.assertEqual(unsigned_mmh3(u"mdrwpudirs"), 645279226)
self.assertEqual(unsigned_mmh3(u"hgkcripxnx"), 1616481172)

self.assertEqual(unsigned_mmh3("loofzqgugp"), 279167916)
self.assertEqual(unsigned_mmh3("aliyzhliwj"), 3024468606)
self.assertEqual(unsigned_mmh3("ybbjayiitx"), 3256418505)
self.assertEqual(unsigned_mmh3("cvsshywjsn"), 1360741730)
self.assertEqual(unsigned_mmh3("drwfeqgxza"), 2547591695)
self.assertEqual(unsigned_mmh3("cwhkagexku"), 2250297940)
self.assertEqual(unsigned_mmh3("hbfnzemztl"), 3639147325)
self.assertEqual(unsigned_mmh3("zlafkcoynl"), 1055376886)
self.assertEqual(unsigned_mmh3("ipuxlfrbcr"), 4136803275)
self.assertEqual(unsigned_mmh3("cwkyakgnlr"), 433154673)
self.assertEqual(unsigned_mmh3("wsachwgrcd"), 3720140426)
self.assertEqual(unsigned_mmh3("dqhqvovhce"), 4247286627)
self.assertEqual(unsigned_mmh3("czctdflbcu"), 3605659707)
self.assertEqual(unsigned_mmh3("ysxsxhybju"), 1892411859)
self.assertEqual(unsigned_mmh3("mdrwpudirs"), 645279226)
self.assertEqual(unsigned_mmh3("hgkcripxnx"), 1616481172)


if __name__ == '__main__':
unittest.main()
87 changes: 0 additions & 87 deletions yahoo_panoptes/framework/utilities/helpers.py
Expand Up @@ -424,90 +424,3 @@ def convert_netmask_to_cidr(netmask):
cidr (int): IP cidr
"""
return sum([bin(int(x)).count(u"1") for x in netmask.split(u".")])


def unsigned_mmh3(key):
"""
Converts the value passed to an unsigned mmh3 hash.
Args:
key (str): The key to hash on
Returns
hash (long): Result of the hash function
"""

# cr https://github.com/wc-duck/pymmh3
import sys as _sys
if _sys.version_info > (3, 0):

def xencode(x):
if isinstance(x, bytes) or isinstance(x, bytearray):
return x
else:
return x.encode()
else:
def xencode(x):
if isinstance(key, unicode):
return x.encode('utf-8')
return x

def hash(key, seed=0x0):
''' Implements 32bit murmur3 hash. '''

key = bytearray(xencode(key))

def fmix(h):
h ^= h >> 16
h = (h * 0x85ebca6b) & 0xFFFFFFFF
h ^= h >> 13
h = (h * 0xc2b2ae35) & 0xFFFFFFFF
h ^= h >> 16
return h

length = len(key)
nblocks = int(length / 4)

h1 = seed

c1 = 0xcc9e2d51
c2 = 0x1b873593

# body
for block_start in range(0, nblocks * 4, 4):
# ??? big endian?
k1 = key[block_start + 3] << 24 | \
key[block_start + 2] << 16 | \
key[block_start + 1] << 8 | \
key[block_start + 0]

k1 = (c1 * k1) & 0xFFFFFFFF
k1 = (k1 << 15 | k1 >> 17) & 0xFFFFFFFF # inlined ROTL32
k1 = (c2 * k1) & 0xFFFFFFFF

h1 ^= k1
h1 = (h1 << 13 | h1 >> 19) & 0xFFFFFFFF # inlined ROTL32
h1 = (h1 * 5 + 0xe6546b64) & 0xFFFFFFFF

# tail
tail_index = nblocks * 4
k1 = 0
tail_size = length & 3

if tail_size >= 3:
k1 ^= key[tail_index + 2] << 16
if tail_size >= 2:
k1 ^= key[tail_index + 1] << 8
if tail_size >= 1:
k1 ^= key[tail_index + 0]

if tail_size > 0:
k1 = (k1 * c1) & 0xFFFFFFFF
k1 = (k1 << 15 | k1 >> 17) & 0xFFFFFFFF # inlined ROTL32
k1 = (k1 * c2) & 0xFFFFFFFF
h1 ^= k1

# finalization
unsigned_val = fmix(h1 ^ length)

return -((unsigned_val ^ 0xFFFFFFFF) + 1) + 2**32

return hash(key)
7 changes: 2 additions & 5 deletions yahoo_panoptes/framework/utilities/key_value_store.py
Expand Up @@ -7,12 +7,12 @@
from builtins import range
from builtins import object

import mmh3
from six import string_types
import sys

from yahoo_panoptes.framework import const
from yahoo_panoptes.framework.validators import PanoptesValidators
from yahoo_panoptes.framework.utilities.helpers import unsigned_mmh3


class PanoptesKeyValueStoreException(BaseException):
Expand Down Expand Up @@ -78,10 +78,7 @@ def _get_redis_shard(self, key):
Returns:
redis.StrictRedis: The Redis Connection
"""
if sys.version_info[0] < 3:
key = key.encode('utf-8')

shard_no = unsigned_mmh3(key) % self._no_of_shards
shard_no = mmh3.hash(key, signed=False) % self._no_of_shards
return self._panoptes_context.get_redis_connection(group=self.redis_group, shard=shard_no)

@property
Expand Down

0 comments on commit d6a7317

Please sign in to comment.