In [1]:
{-# LANGUAGE ScopedTypeVariables #-}

import Data.Hashable
import qualified Data.HashMap.Strict as HM

import qualified Data.BitVector.LittleEndian as BV (rank, select)
import Data.BitVector.LittleEndian hiding (rank, select)
import Data.Bits

import qualified Data.Vector as V
import qualified Data.Vector.Mutable as MV

import Control.Monad.ST
import Data.Traversable
import Data.Foldable
import Data.STRef
import Data.Maybe (fromJust)

In [2]:
rank :: BitVector -> Word -> Word
rank bv w = BV.rank bv (w + 1)

select :: BitVector -> Word -> Maybe Word
select bv w = BV.select bv (w - 1)

In [3]:
foo = fromBits [False, True, False, False, True, True]
foo

[6]50

In [4]:
rank foo 4
select foo 2

2

Just 4

In [5]:
hashWithSalt 2 "foo"

4018608059442784853

In [6]:
getIndices :: BitVector -> [Word]
getIndices bv = let
    count = popCount bv
    range = enumFromTo 1 count
    Just is = traverse (select bv . fromIntegral) range
    in is

-- Takes a source vector and a list of indices and
-- copies the values at those indices to a new vector
extract :: V.Vector a -> [Word] -> V.Vector a
extract vector indices = V.fromList $ map ((vector V.!) . fromIntegral) indices

pluck :: (Hashable k, Eq k) => HM.HashMap k v -> V.Vector k -> HM.HashMap k v
pluck = foldr HM.delete

In [7]:
step :: (Hashable k, Eq k) => HM.HashMap k v -> Int -> Double -> (BitVector, V.Vector v, HM.HashMap k v)
step hashmap level gamma = runST $ do
    let vectorSize = floor $ fromIntegral (HM.size hashmap) * gamma
    hashVector <- MV.replicate vectorSize False
    collisionVector <- MV.replicate vectorSize False
    keysVector <- MV.replicate vectorSize (undefined :: k)
    for_ (HM.keys hashmap) $ \key -> do
        let position = hashWithSalt level key `mod` vectorSize
        present <- MV.read hashVector position
        collision <- MV.read collisionVector position
        case (present, collision) of
            (False, False) -> do
                MV.write hashVector position True
                MV.write keysVector position key
            (True, False) -> do
                MV.write hashVector position False
                MV.write collisionVector position True
                -- MV.write keysVector position Nothing
            (False, True) -> pure ()
    bitVector <- fromBits <$> V.freeze hashVector
    finalKeys <- V.freeze keysVector
    let uniqueKeys = extract finalKeys (getIndices bitVector)
    let valuesVector = V.map (hashmap HM.!) uniqueKeys
    let hashmap' = pluck hashmap uniqueKeys
    pure (bitVector, valuesVector, hashmap')