In [1]:
{-# LANGUAGE ScopedTypeVariables #-}

import Data.Hashable
import qualified Data.HashMap.Strict as HM

import qualified Data.BitVector.LittleEndian as BV (rank, select)
import Data.BitVector.LittleEndian hiding (rank, select)
import Data.Bits

import qualified Data.Vector as V
import qualified Data.Vector.Mutable as MV

import Control.Monad.ST
import Data.Traversable
import Data.Foldable
import Data.STRef
import Data.Maybe (fromJust)

In [2]:
rank :: BitVector -> Word -> Word
rank bv w = BV.rank bv (w + 1)

select :: BitVector -> Word -> Maybe Word
select bv w = BV.select bv (w - 1)

In [3]:
foo = fromBits [False, True, False, False, True, True]
foo

dimension foo

[6]50

6

In [4]:
rank foo 4
select foo 2

2

Just 4

In [5]:
hashWithSalt 2 "foo"

4018608059442784853

In [6]:
enumFromTo 1 5
V.enumFromN 1 5

[1,2,3,4,5]

[1,2,3,4,5]

In [7]:
getIndices :: BitVector -> V.Vector Word
getIndices bv = let
    count = popCount bv
    range = V.enumFromN 1 count
    Just is = traverse (select bv . fromIntegral) range
    in is

-- Takes a source vector and a vector of indices and
-- copies the values at those indices to a new vector
extract :: V.Vector a -> V.Vector Word -> V.Vector a
extract vector = V.map ((vector V.!) . fromIntegral)

pluck :: (Hashable k, Eq k) => HM.HashMap k v -> V.Vector k -> HM.HashMap k v
pluck = foldr HM.delete

In [8]:
getIndices foo

[1,4,5]

In [9]:
step :: (Hashable k, Eq k) => HM.HashMap k v -> Int -> Double -> (BitVector, V.Vector v, HM.HashMap k v)
step hashmap level gamma = runST $ do
    let vectorSize = floor $ fromIntegral (HM.size hashmap) * gamma
    hashVector <- MV.replicate vectorSize False
    collisionVector <- MV.replicate vectorSize False
    keysVector <- MV.replicate vectorSize (undefined :: k)
    for_ (HM.keys hashmap) $ \key -> do
        let position = hashWithSalt level key `mod` vectorSize
        present <- MV.read hashVector position
        collision <- MV.read collisionVector position
        case (present, collision) of
            (False, False) -> do
                MV.write hashVector position True
                MV.write keysVector position key
            (True, False) -> do
                MV.write hashVector position False
                MV.write collisionVector position True
                -- MV.write keysVector position undefined
            (False, True) -> pure ()
            (True, True) -> error "this should never happen"
    bitVector <- fromBits <$> V.freeze hashVector
    finalKeys <- V.freeze keysVector
    let uniqueKeys = extract finalKeys (getIndices bitVector)
    let valuesVector = V.map (hashmap HM.!) uniqueKeys
    let leftover = pluck hashmap uniqueKeys
    pure (bitVector, valuesVector, leftover)

finalise :: (Hashable k, Eq k) => HM.HashMap k v -> (HM.HashMap k Int, V.Vector v)
finalise hashmap = let
    pairs = HM.toList hashmap
    valuesVector = V.fromList $ map snd pairs
    indices = HM.fromList $ zipWith (\(k,_) i -> (k,i)) pairs [1..]
    in (indices, valuesVector)

In [10]:
finalise $ HM.fromList [("f", "foo"), ("b", "bar")]

(fromList [("b",1),("f",2)],["bar","foo"])

In [11]:
data MinimalPerfectHash k
    = MinimalPerfectHash
    { mphBitVectors :: V.Vector BitVector
    , mphLeftovers :: Maybe (HM.HashMap k Int)
    } deriving (Eq, Show)

generate :: (Hashable k, Eq k) => HM.HashMap k v -> Int -> Double -> (MinimalPerfectHash k, V.Vector v)
generate hashmap maxLevel gamma = undefined