From f834d694fe1be66794530eec22894561c9b4554a Mon Sep 17 00:00:00 2001 From: yihuang Date: Thu, 4 Oct 2012 16:06:08 +0800 Subject: [PATCH] api and haddock overhaul. --- Text/XML/ToJSON.hs | 126 +++++++++++++++++++++++++------------ Text/XML/ToJSON/Builder.hs | 86 ++++++++++++++++++------- 2 files changed, 147 insertions(+), 65 deletions(-) diff --git a/Text/XML/ToJSON.hs b/Text/XML/ToJSON.hs index 4ac1b1e..f5256c2 100644 --- a/Text/XML/ToJSON.hs +++ b/Text/XML/ToJSON.hs @@ -1,22 +1,39 @@ -{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE OverloadedStrings, DeriveDataTypeable #-} + module Text.XML.ToJSON - ( elementToJSON - , tokensToJSON + ( + +{-| This library provide a way to convert xml to json. + + Further more, by combining with aeson's parsing facility, it provide a way to parse xml to haskell data type. + -} + parseXML , xmlToJSON + , JSONParseError + -- * streamlined api + , bsSourceToJSON + , bsRSourceToJSON + -- * utils + , tokenToBuilder + , elementToJSON + , tokensToJSON ) where -import Control.Monad (when) +import Control.Monad (when, liftM) import Control.Arrow (second) -import Control.Applicative ( (<$>), (*>), (<|>) ) +import Control.Exception (Exception) +import Control.Applicative ( (*>), (<|>) ) +import Data.Typeable (Typeable) import Data.Maybe (fromMaybe) import qualified Data.CaseInsensitive as CI import qualified Data.Text as T +import qualified Data.ByteString.Lazy as L import Data.ByteString (ByteString) import qualified Blaze.ByteString.Builder as B import qualified Data.Attoparsec as A import Data.Attoparsec.ByteString.Char8 (char, skipSpace) -import Data.Conduit +import Data.Conduit (Source, yield, (=$), ($$++), ($$+-), MonadThrow(monadThrow)) import Data.Conduit.Internal (ResumableSource(ResumableSource)) import qualified Data.Conduit.List as C import qualified Data.Conduit.Attoparsec as C @@ -28,8 +45,9 @@ import Text.HTML.TagStream import qualified Text.HTML.TagStream.Text as T import qualified Text.HTML.TagStream.ByteString as S import Text.XML.ToJSON.Builder -import Data.Aeson (Value(..), Object) +import Data.Aeson (Value(..), Object, FromJSON, fromJSON, Result(Error, Success)) +-- | Convert tagstream-conduit `Token' to xml element `Builder' tokenToBuilder :: T.Token -> Builder tokenToBuilder (TagOpen s as selfClose) = do beginElement s @@ -39,13 +57,9 @@ tokenToBuilder (TagClose _) = endElement -- FIXME should match tag name? tokenToBuilder (Text s) = addValue s tokenToBuilder _ = return () -attrsToObject :: [(Str, Str)] -> Object -attrsToObject = HM.fromList . map (second String) - -mergeObject :: Value -> Value -> Value -mergeObject (Array arr) v = Array (V.cons v arr) -mergeObject v1 v2 = Array (V.fromList [v1, v2]) - +-- |Convert xml `Element' to aeson `Value' . +-- +-- xml attributes and text values are converted to special object attribute @__attributes@ and @__values@. elementToJSON :: Element -> Value elementToJSON (Element as vs cs) = if null as && null cs @@ -64,21 +78,29 @@ elementToJSON (Element as vs cs) = then [] else [("__values", Array (V.fromList (map String vs)))] + attrsToObject :: [(T.Text, T.Text)] -> Object + attrsToObject = HM.fromList . map (second String) + + mergeObject :: Value -> Value -> Value + mergeObject (Array arr) v = Array (V.cons v arr) + mergeObject v1 v2 = Array (V.fromList [v1, v2]) + +-- |Convert list of tagstream-conduit `Token` to aeson `Value' tokensToJSON :: [T.Token] -> Value tokensToJSON tokens = elementToJSON $ runBuilder (mapM_ tokenToBuilder tokens) -xmlToJSON :: (Functor m, Monad m, MonadThrow m) => Source m ByteString -> m Value -xmlToJSON src = xmlToJSONResumable (ResumableSource src (return ())) - -skipBOM :: A.Parser () -skipBOM = - ( A.string "\xff\xfe" - <|> A.string "\xef\xbb\xbf" - ) *> return () - -xmlToJSONResumable :: (Functor m, Monad m, MonadThrow m) => ResumableSource m ByteString -> m Value -xmlToJSONResumable src = do +-- | Consume a source and convert the content to aeson `Value', it try to inspect xml encoding from first tag. +-- +-- e.g. @bsSourceToJSON (C.sourceFile path_to_xml_file)@ +bsSourceToJSON :: MonadThrow m => Source m ByteString -> m Value +bsSourceToJSON src = bsRSourceToJSON (ResumableSource src (return ())) + +-- | Consume a source and convert the content to aeson `Value', it try to inspect xml encoding from first tag. +-- +-- e.g. @xmlStreamToJSONResumable (requestBody req)@ +bsRSourceToJSON :: MonadThrow m => ResumableSource m ByteString -> m Value +bsRSourceToJSON src = do -- try to peek the first tag to find the xml encoding. (src', token) <- src $$++ C.sinkParser (skipBOM *> skipSpace *> char '<' *> S.tag) @@ -95,19 +117,41 @@ xmlToJSONResumable src = do codec = fromMaybe C.utf8 (mencoding >>= getCodec . CI.mk) - tokensToJSON <$> (src'' $$+- (C.decode codec =$ T.tokenStream =$ C.consume)) - -prependRSrc :: Monad m - => Source m a - -> ResumableSource m a - -> ResumableSource m a -prependRSrc src (ResumableSource src' close) = ResumableSource (src >> src') close - -getCodec :: CI.CI ByteString -> Maybe C.Codec -getCodec c = - case c of - "utf-8" -> Just C.utf8 - "utf8" -> Just C.utf8 - "iso8859" -> Just C.iso8859_1 - _ -> Nothing - + liftM tokensToJSON (src'' $$+- (C.decode codec =$ T.tokenStream =$ C.consume)) + where + skipBOM :: A.Parser () + skipBOM = + ( A.string "\xff\xfe" + <|> A.string "\xef\xbb\xbf" + ) *> return () + + prependRSrc :: Monad m + => Source m a + -> ResumableSource m a + -> ResumableSource m a + prependRSrc src (ResumableSource src' close) = ResumableSource (src >> src') close + + getCodec :: CI.CI ByteString -> Maybe C.Codec + getCodec c = + case c of + "utf-8" -> Just C.utf8 + "utf8" -> Just C.utf8 + "iso8859" -> Just C.iso8859_1 + _ -> Nothing + +newtype JSONParseError = JSONParseError String + deriving (Typeable, Show) +instance Exception JSONParseError + +-- | parse xml to haskell data type by using aeson's `FromJSON'. +parseXML :: (MonadThrow m, FromJSON a) => L.ByteString -> m a +parseXML s = xmlToJSON s >>= convert + where + convert v = + case fromJSON v of + Error err -> monadThrow (JSONParseError err) + Success a -> return a + +-- | convert lazy xml `ByteString' to aeson `Value'. +xmlToJSON :: MonadThrow m => L.ByteString -> m Value +xmlToJSON s = bsSourceToJSON (C.sourceList (L.toChunks s)) diff --git a/Text/XML/ToJSON/Builder.hs b/Text/XML/ToJSON/Builder.hs index 1b38714..a2b40ae 100644 --- a/Text/XML/ToJSON/Builder.hs +++ b/Text/XML/ToJSON/Builder.hs @@ -1,55 +1,89 @@ {-# LANGUAGE OverloadedStrings #-} -module Text.XML.ToJSON.Builder where +module Text.XML.ToJSON.Builder + ( -- * Element type and operations + Element(..) + , emptyElement + , addChild' + , addValue' + , addAttr' + , addAttrs' + -- * Stack type and operations + , Stack + , popStack + , closeStack + -- * Builder type and operations + , Builder + , runBuilder + , beginElement + , endElement + , modifyTopElement + , addChild + , addValue + , addAttr + , addAttrs + ) where import Data.Text (Text) import Control.Monad.Trans.State -type Str = Text - +-- | represent a XML element. data Element = Element - { elAttrs :: [(Str, Str)] - , elValues :: [Str] - , elChildren :: [(Str, Element)] + { elAttrs :: [(Text, Text)] -- ^ tag attributes. + , elValues :: [Text] -- ^ text values. + , elChildren :: [(Text, Element)] -- ^ child elements. } deriving (Show) emptyElement :: Element emptyElement = Element [] [] [] -addChild' :: (Str, Element) -> Element -> Element +-- | add a child element to an element +addChild' :: (Text, Element) -> Element -> Element addChild' item o = o { elChildren = item : elChildren o } -addValue' :: Str -> Element -> Element +-- | add a text value to an element +addValue' :: Text -> Element -> Element addValue' v o = o { elValues = v : elValues o } -addAttr' :: (Str, Str) -> Element -> Element +-- | add an attribute to an element +addAttr' :: (Text, Text) -> Element -> Element addAttr' attr o = o { elAttrs = attr : elAttrs o } -addAttrs' :: [(Str, Str)] -> Element -> Element +-- | add multiple attributes to an element +addAttrs' :: [(Text, Text)] -> Element -> Element addAttrs' as o = o { elAttrs = as ++ elAttrs o } -type Stack = [(Str, Element)] -type Builder = State Stack () - -runBuilder :: Builder -> Element -runBuilder b = finishStack $ execState b [("", emptyElement)] +-- | xml element stack with recent opened element at the top. +type Stack = [(Text, Element)] +-- | close current tag. popStack :: Stack -> Stack popStack ((k,v) : (name,elm) : tl) = (name, addChild' (k,v) elm) : tl popStack _ = error "popStack: can't pop root elmect." -finishStack :: Stack -> Element -finishStack [] = error "finishStack: empty stack." -finishStack [(_, elm)] = elm -finishStack st = finishStack (popStack st) +-- | close all unclosed tags and return the root element. +closeStack :: Stack -> Element +closeStack [] = error "closeStack: empty stack." +closeStack [(_, elm)] = elm +closeStack st = closeStack (popStack st) + +-- | `Builder' is a `State' monad to transform a `Stack'. +type Builder = State Stack () + +-- | exec the state monad and close the result stack. +runBuilder :: Builder -> Element +runBuilder b = closeStack $ execState b [("", emptyElement)] -beginElement :: Str -> Builder +-- | open element +beginElement :: Text -> Builder beginElement name = modify ( (name, emptyElement) : ) +-- | close element endElement :: Builder endElement = modify popStack +-- | util to modify top element. modifyTopElement :: (Element -> Element) -> Builder modifyTopElement f = modify $ \st -> @@ -57,14 +91,18 @@ modifyTopElement f = ((k, v) : tl) -> (k, f v) : tl _ -> fail "modifyTopElement: impossible: empty stack." -addValue :: Str -> Builder +-- | add value to top element. +addValue :: Text -> Builder addValue = modifyTopElement . addValue' -addAttr :: (Str, Str) -> Builder +-- | add attribute to top element. +addAttr :: (Text, Text) -> Builder addAttr = modifyTopElement . addAttr' -addAttrs :: [(Str, Str)] -> Builder +-- | add multiple attributes to top element. +addAttrs :: [(Text, Text)] -> Builder addAttrs = modifyTopElement . addAttrs' -addChild :: (Str, Element) -> Builder +-- | add child element to top element. +addChild :: (Text, Element) -> Builder addChild = modifyTopElement . addChild'