Skip to content
Browse files

Moving from json2 to aeson. Still have a serialization kink to work out

  • Loading branch information...
1 parent a019b7e commit f502c9645a3370406350673a71f2a65b628bc749 @zmoazeni committed Apr 6, 2012
Showing with 78 additions and 83 deletions.
  1. +12 −9 src/CLI.hs
  2. +4 −1 src/Main.hs
  3. +47 −59 src/Parser.hs
  4. +9 −8 src/Storage.hs
  5. +6 −6 src/Web.hs
View
21 src/CLI.hs
@@ -10,32 +10,35 @@ import Storage
import Parser
import Conversions
import Database.LevelDB
-import Data.Map (toList)
+import Data.HashMap.Lazy (toList)
+import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
parseAndPrint :: FilePath -> IO ()
parseAndPrint filePath = do
rawJsons <- readFile filePath
- putStrLn . show $ parseInvertedIndex rawJsons
+ putStrLn . show . parseInvertedIndex $ TL.pack rawJsons
printGrams :: DB -> IO ()
printGrams db = withIterator db [] printKeys
where printKeys iter = do
iterFirst iter
keys' <- keys db
- case toStrings keys' of
- [] -> putStrLn "No grams stored"
- xs -> putStrLn $ "grams: " ++ xs
+ let textGrams = toText keys'
+ case T.null textGrams of
+ True -> putStrLn "No grams stored"
+ False -> putStrLn $ "grams: " ++ (T.unpack textGrams)
- toStrings = unwords . map (degrammify . decode')
- degrammify (Gram string) = string
+ toText = T.unwords . map (degrammify . decode')
+ degrammify (Gram text) = text
readGram :: DB -> Gram -> IO ()
readGram db gram = do
value <- get db [] (encode' gram)
case value of
Just x -> let indexes = decode' x :: [Index]
in print indexes
- Nothing -> let Gram rawGram = gram in putStrLn $ "gram: [" ++ rawGram ++ "] not found"
+ Nothing -> let Gram rawGram = gram in putStrLn $ "gram: [" ++ (T.unpack rawGram) ++ "] not found"
example :: DB -> IO ()
example db = do
@@ -48,4 +51,4 @@ loadIndex :: DB -> FilePath -> IO ()
loadIndex db filePath = do
rawJsons <- readFile filePath
saveGrams db (toGrams rawJsons)
- where toGrams = toList . parseInvertedIndex
+ where toGrams = toList . parseInvertedIndex . TL.pack
View
5 src/Main.hs
@@ -1,3 +1,5 @@
+{-# LANGUAGE OverloadedStrings #-}
+
import System.Environment (getArgs)
import Parser
import Storage
@@ -6,6 +8,7 @@ import CLI
import ConcurrencyTest
import Database.LevelDB
import Control.Concurrent
+import Data.Text
main :: IO ()
main = do
@@ -14,7 +17,7 @@ main = do
withDB dbPath $ \db ->
withDB stageDBPath $ \stageDB ->
case args of
- ("read":rawGram:_) -> readGram db (Gram rawGram)
+ ("read":rawGram:_) -> readGram db (Gram (pack rawGram))
("load":_) -> loadIndex db "input.json"
("grams":_) -> printGrams db
("rawGrams":_) -> grams db >>= print
View
106 src/Parser.hs
@@ -1,86 +1,74 @@
-{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE OverloadedStrings #-}
+
module Parser (
parseInvertedIndex
- , Gram (..)
- , Index (..)
+ ,Gram (..)
+ ,Index (..)
) where
-import Data.Char
-import Data.JSON2 (Json(..), ToJson, toJson)
-import Data.JSON2.Parser (parseJson)
-import Data.Map as M (Map, insertWith, empty, toList, unionWith, singleton)
-import qualified Data.Map as M (lookup)
import Data.List (nub)
import Data.Binary
-import Data.Typeable
import Control.Monad
-import Conversions
-
-type JsonMap = Map String Json
-type IndexId = Json
-type Field = String
-type FieldText = String
-type RawJson = String
+import Data.Aeson as A
+import Data.Attoparsec.Lazy
+import Data.HashMap.Lazy as HM (HashMap, insertWith, empty, toList, unionWith, singleton)
+import qualified Data.HashMap.Lazy as HM (lookup)
+import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
+import Data.Text.Lazy.Encoding
+import qualified Data.Text.Encoding as TE
+import Data.Maybe
+import Data.Hashable
-data Gram = Gram String
- deriving (Eq, Ord, Show, Typeable)
+data Gram = Gram T.Text
+ deriving (Eq, Ord, Show)
-data Index = Index { indexId :: IndexId, indexField :: Field }
+data Index = Index { indexId :: Value, indexField :: T.Text }
deriving (Eq, Show)
instance Binary Gram where
- put (Gram gramValue) = put $ stringToByteString gramValue
- get = get >>= return . Gram . byteStringToString
+ put (Gram gramValue) = put $ TE.encodeUtf8 gramValue
+ get = get >>= return . Gram . TE.decodeUtf8
+
+instance ToJSON Gram where
+ toJSON (Gram gramValue) = toJSON gramValue
-instance ToJson Gram where
- toJson (Gram gramValue) = toJson gramValue
+instance Hashable Gram where
+ hash (Gram gramValue) = hash gramValue
+ hashWithSalt salt (Gram gramValue) = hashWithSalt salt gramValue
instance Binary Index where
- put index = do let field = indexField index
- id' = indexId index
- put $ indexType id'
- case indexId index of
- JNumber i -> put i
- JString s -> put $ stringToByteString s
- _ -> error "Unknown id type"
- put $ stringToByteString field
- where indexType :: Json -> Int
- indexType (JNumber _) = 0
- indexType (JString _) = 1
- indexType j = error $ "Unkown type for " ++ show j ++ "]"
+ put index = do put . A.encode $ indexId index
+ put . TE.encodeUtf8 $ indexField index
- get = do indexType <- get :: Get Int
- id' <- case indexType of
- 0 -> liftM JNumber get
- 1 -> liftM (JString . byteStringToString) get
- _ -> error "Unknown type"
- field <- liftM byteStringToString get
+ get = do id' <- liftM (fromJust . A.decode) get
+ field <- liftM TE.decodeUtf8 get
return (Index id' field)
-parseInvertedIndex :: String -> Map Gram [Index]
+parseInvertedIndex :: TL.Text -> HashMap Gram [Index]
parseInvertedIndex = splitGrams . textAndIndex . stringToJson
-splitGrams :: Map FieldText [Index] -> Map Gram [Index]
+splitGrams :: HashMap T.Text [Index] -> HashMap Gram [Index]
splitGrams = foldr gramsToIndicies empty . toList
- where gramsToIndicies (text, indicies) gramMap = foldr (insertGrams indicies) gramMap $ words text
+ where gramsToIndicies (text, indicies) gramMap = foldr (insertGrams indicies) gramMap $ T.words text
insertGrams indicies rawGram gramMap = insertWith (\x y -> nub $ x ++ y) (Gram rawGram) indicies gramMap
-textAndIndex :: [JsonMap] -> Map FieldText [Index]
-textAndIndex jsons = foldr combiner empty . concat $ map singletons jsons
- where combiner kvPair textMap = unionWith (++) textMap kvPair
- singletons json' = map (toSingleton (getId json')) $ [x | x <- toList json', let (key, _) = x, key /= "id"]
- toSingleton id' (key, (JString value)) = singleton (lowercase value) [(Index id' key)]
+textAndIndex :: [Object] -> HashMap T.Text [Index]
+textAndIndex = foldr combiner empty . concat . map singletons
+ where singletons object' = map (toSingleton (getId object')) $ [x | x <- toList object', let (key, _) = x, key /= "id"]
+ toSingleton id' (key, (String value)) = singleton (T.toLower value) [(Index id' key)]
toSingleton _ _ = empty
- lowercase = map toLower
+ combiner kvPair textMap = unionWith (++) textMap kvPair
- getId json' = case M.lookup "id" json' of
- Just x -> x
- Nothing -> error "We don't have an id"
+ getId object' = case HM.lookup "id" object' of
+ Just x -> x
+ Nothing -> error "We don't have an id"
-stringToJson :: RawJson -> [JsonMap]
-stringToJson = map processJson . lines
- where processJson rawJson = case parseJson rawJson of
- Right (JObject x) -> x
- Right _ -> error "JSON is incorrect format"
- Left _ -> empty
+stringToJson :: TL.Text -> [Object]
+stringToJson = map processJson . TL.lines
+ where processJson rawJsonText = let rawJson = encodeUtf8 rawJsonText
+ in case eitherResult $ parse json rawJson of
+ Right (Object x) -> x
+ Left msg -> error $ "error parsing: " ++ msg
+ _ -> error $ "unexpected json"
View
17 src/Storage.hs
@@ -11,20 +11,21 @@ module Storage (
) where
import Parser
+import Conversions
import Database.LevelDB
import qualified Data.Binary as Bin
import Data.Binary (Binary, Get, decode, encode)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Lazy as BL (ByteString)
-import Data.ByteString.Lazy.Char8 (unpack)
+import Data.Text.Lazy.Encoding (decodeUtf8)
import Data.Time.Clock.POSIX
import Codec.Digest.SHA
-import Data.Map (toList)
+import Data.HashMap.Lazy (toList)
import Control.Monad
import Control.Concurrent
-import Data.JSON2 (Json(..), toJson)
import Data.List (nub)
-import Conversions
+import Data.Vector as V (empty, fromList)
+import qualified Data.Aeson as A
data IndexAction = IndexCreate | IndexDelete
deriving (Eq, Ord, Show)
@@ -94,18 +95,18 @@ flushIterator stageDB gramDB iter = iterFirst iter >> iterValid iter >>= flush'
| valid = do key <- iterKey iter
value <- iterValue iter
case decode' value :: (IndexAction, BL.ByteString) of
- (IndexCreate, rawJson) -> save $ unpack rawJson
+ (IndexCreate, rawJson) -> save $ decodeUtf8 rawJson
_ -> error "Unknown action"
delete stageDB [] key
iterNext iter
iterValid iter >>= flush'
| otherwise = yield
-search :: DB -> Gram -> IO Json
+search :: DB -> Gram -> IO A.Value
search db gram = do
maybeValue <- get db [] (encode' gram)
case maybeValue of
Just binaryIndexes -> do let indexes = decode' binaryIndexes :: [Index]
- return . toJson . nub $ map indexId indexes
- Nothing -> return $ JArray []
+ return . A.Array . V.fromList . nub $ map indexId indexes
+ Nothing -> return $ A.Array (empty)
View
12 src/Web.hs
@@ -3,27 +3,27 @@
module Web where
import Web.Scotty
-import Data.JSON2
import Storage
import Parser
-import Data.Text.Lazy (pack)
+import Data.Text.Lazy
+import Data.Text.Lazy.Encoding
import Database.LevelDB (DB)
import Control.Monad.IO.Class
import Network.HTTP.Types
-import Data.ByteString.Char8 (unpack)
+import Data.Aeson
run :: String -> (DB, DB) -> IO ()
run port (gramDB, stageDB) = scotty (read port) $ do
get "/grams" $ do
grams' <- fetchGrams
- text . pack . toString . toJson $ grams'
+ text . decodeUtf8 . encode . toJSON $ grams'
header "Content-Type" "application/json"
get "/search" $ do
q <- param "q"
- let gram = Gram (unpack q)
+ let gram = Gram (toStrict q)
results <- liftIO $ search gramDB gram
- text . pack . toString $ results
+ text . decodeUtf8 . encode $ results
header "Content-Type" "application/json"
post "/" $ do

0 comments on commit f502c96

Please sign in to comment.
Something went wrong with that request. Please try again.