/
Storage.hs
116 lines (100 loc) · 4.17 KB
/
Storage.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
module Storage (
withDB
,keys
,grams
,queueAction
,IndexAction (..)
,flush
,flushOnce
,saveGrams
,search
) where
import Parser
import Conversions
import Database.LevelDB
import qualified Data.Binary as Bin
import Data.Binary (Binary, Get, decode, encode)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Lazy as BL (ByteString)
import Data.Text.Lazy hiding (map, empty, null)
import Data.Text.Lazy.Encoding (decodeUtf8)
import Data.Time.Clock.POSIX
import Codec.Digest.SHA
import Data.HashMap.Lazy (toList)
import Control.Monad
import Control.Concurrent
import Data.List (nub)
import Data.Vector as V (empty, fromList)
import qualified Data.Aeson as A
data IndexAction = IndexCreate | IndexDelete
deriving (Eq, Ord, Show)
instance Binary IndexAction where
put IndexCreate = Bin.put (0 :: Int)
put _ = error "Unimplemented"
get = do i <- Bin.get :: Get Int
case i of
0 -> return IndexCreate
e -> error $ "Unknown Index Action " ++ (show e)
withDB :: FilePath -> (DB -> IO a) -> IO a
withDB filePath f = withLevelDB filePath [CreateIfMissing, CacheSize 1024] f
grams :: DB -> IO [Gram]
grams db = withIterator db [] $ \iter -> do
iterFirst iter
byteKeys <- keys db
return $ map (decode . byteStringToLazy) byteKeys
keys :: DB -> IO [ByteString]
keys db = withIterator db [] doGetKeys
where doGetKeys iter = iterFirst iter >> getKeys [] iter
getKeys :: [ByteString] -> Iterator -> IO [ByteString]
getKeys xs iter = do valid <- iterValid iter
case valid of
True -> do
key <- iterKey iter
iterNext iter
otherKeys <- getKeys xs iter
return (key:otherKeys)
False -> return xs
saveGrams :: DB -> [(Gram, [Index])] -> IO ()
saveGrams db pairs = mapM_ put' pairs
where put' (gram, indexes) = do
let key = encode' gram
maybeExisting <- get db [] key
let value = case maybeExisting of
Just binaryIndexes -> encode' . (indexes ++) $ (decode' binaryIndexes :: [Index])
Nothing -> encode' indexes
put db [] key value
queueAction :: DB -> IndexAction -> BL.ByteString -> IO ()
queueAction db action contents = do let value = encode' (action, contents)
uid <- genUID
put db [] uid value
genUID :: IO ByteString
genUID = do time <- getPOSIXTime
return . hash SHA256 . encode $ show time
flushOnce :: DB -> DB -> IO ()
flushOnce stageDB gramDB = withIterator stageDB [] flush'
where flush' = flushIterator stageDB gramDB
flush :: DB -> DB -> IO ()
flush stageDB gramDB = forever $ flushOnce stageDB gramDB
flushIterator :: DB -> DB -> Iterator -> IO ()
flushIterator stageDB gramDB iter = iterFirst iter >> iterValid iter >>= flush'
where save = saveGrams gramDB . toList . parseInvertedIndex
flush' valid
| valid = do key <- iterKey iter
value <- iterValue iter
case decode' value :: (IndexAction, BL.ByteString) of
(IndexCreate, rawJson) -> save $ decodeUtf8 rawJson
_ -> error "Unknown action"
delete stageDB [] key
iterNext iter
iterValid iter >>= flush'
| otherwise = yield
search :: DB -> Text -> [Text] -> IO A.Value
search db query fields = do
let gram = Gram (toStrict query)
maybeValue <- get db [] (encode' gram)
case maybeValue of
Just binaryIndexes -> do let indexes = decode' binaryIndexes :: [Index]
filtered = [x | x <- indexes, indexField x `elem` (map toStrict fields)]
xs = if null fields then indexes else filtered
return . A.Array . V.fromList . nub $ map indexId xs
Nothing -> return $ A.Array (empty)