Skip to content

Commit

Permalink
api and haddock overhaul.
Browse files Browse the repository at this point in the history
  • Loading branch information
yihuang committed Oct 4, 2012
1 parent d554361 commit f834d69
Show file tree
Hide file tree
Showing 2 changed files with 147 additions and 65 deletions.
126 changes: 85 additions & 41 deletions Text/XML/ToJSON.hs
@@ -1,22 +1,39 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE OverloadedStrings, DeriveDataTypeable #-}

module Text.XML.ToJSON
( elementToJSON
, tokensToJSON
(

{-| This library provide a way to convert xml to json.
Further more, by combining with aeson's parsing facility, it provide a way to parse xml to haskell data type.
-}
parseXML
, xmlToJSON
, JSONParseError
-- * streamlined api
, bsSourceToJSON
, bsRSourceToJSON
-- * utils
, tokenToBuilder
, elementToJSON
, tokensToJSON
) where

import Control.Monad (when)
import Control.Monad (when, liftM)
import Control.Arrow (second)
import Control.Applicative ( (<$>), (*>), (<|>) )
import Control.Exception (Exception)
import Control.Applicative ( (*>), (<|>) )

import Data.Typeable (Typeable)
import Data.Maybe (fromMaybe)
import qualified Data.CaseInsensitive as CI
import qualified Data.Text as T
import qualified Data.ByteString.Lazy as L
import Data.ByteString (ByteString)
import qualified Blaze.ByteString.Builder as B
import qualified Data.Attoparsec as A
import Data.Attoparsec.ByteString.Char8 (char, skipSpace)
import Data.Conduit
import Data.Conduit (Source, yield, (=$), ($$++), ($$+-), MonadThrow(monadThrow))
import Data.Conduit.Internal (ResumableSource(ResumableSource))
import qualified Data.Conduit.List as C
import qualified Data.Conduit.Attoparsec as C
Expand All @@ -28,8 +45,9 @@ import Text.HTML.TagStream
import qualified Text.HTML.TagStream.Text as T
import qualified Text.HTML.TagStream.ByteString as S
import Text.XML.ToJSON.Builder
import Data.Aeson (Value(..), Object)
import Data.Aeson (Value(..), Object, FromJSON, fromJSON, Result(Error, Success))

-- | Convert tagstream-conduit `Token' to xml element `Builder'
tokenToBuilder :: T.Token -> Builder
tokenToBuilder (TagOpen s as selfClose) = do
beginElement s
Expand All @@ -39,13 +57,9 @@ tokenToBuilder (TagClose _) = endElement -- FIXME should match tag name?
tokenToBuilder (Text s) = addValue s
tokenToBuilder _ = return ()

attrsToObject :: [(Str, Str)] -> Object
attrsToObject = HM.fromList . map (second String)

mergeObject :: Value -> Value -> Value
mergeObject (Array arr) v = Array (V.cons v arr)
mergeObject v1 v2 = Array (V.fromList [v1, v2])

-- |Convert xml `Element' to aeson `Value' .
--
-- xml attributes and text values are converted to special object attribute @__attributes@ and @__values@.
elementToJSON :: Element -> Value
elementToJSON (Element as vs cs) =
if null as && null cs
Expand All @@ -64,21 +78,29 @@ elementToJSON (Element as vs cs) =
then []
else [("__values", Array (V.fromList (map String vs)))]

attrsToObject :: [(T.Text, T.Text)] -> Object
attrsToObject = HM.fromList . map (second String)

mergeObject :: Value -> Value -> Value
mergeObject (Array arr) v = Array (V.cons v arr)
mergeObject v1 v2 = Array (V.fromList [v1, v2])

-- |Convert list of tagstream-conduit `Token` to aeson `Value'
tokensToJSON :: [T.Token] -> Value
tokensToJSON tokens =
elementToJSON $ runBuilder (mapM_ tokenToBuilder tokens)

xmlToJSON :: (Functor m, Monad m, MonadThrow m) => Source m ByteString -> m Value
xmlToJSON src = xmlToJSONResumable (ResumableSource src (return ()))

skipBOM :: A.Parser ()
skipBOM =
( A.string "\xff\xfe"
<|> A.string "\xef\xbb\xbf"
) *> return ()

xmlToJSONResumable :: (Functor m, Monad m, MonadThrow m) => ResumableSource m ByteString -> m Value
xmlToJSONResumable src = do
-- | Consume a source and convert the content to aeson `Value', it try to inspect xml encoding from first tag.
--
-- e.g. @bsSourceToJSON (C.sourceFile path_to_xml_file)@
bsSourceToJSON :: MonadThrow m => Source m ByteString -> m Value
bsSourceToJSON src = bsRSourceToJSON (ResumableSource src (return ()))

-- | Consume a source and convert the content to aeson `Value', it try to inspect xml encoding from first tag.
--
-- e.g. @xmlStreamToJSONResumable (requestBody req)@
bsRSourceToJSON :: MonadThrow m => ResumableSource m ByteString -> m Value
bsRSourceToJSON src = do
-- try to peek the first tag to find the xml encoding.
(src', token) <- src $$++ C.sinkParser (skipBOM *> skipSpace *> char '<' *> S.tag)

Expand All @@ -95,19 +117,41 @@ xmlToJSONResumable src = do

codec = fromMaybe C.utf8 (mencoding >>= getCodec . CI.mk)

tokensToJSON <$> (src'' $$+- (C.decode codec =$ T.tokenStream =$ C.consume))

prependRSrc :: Monad m
=> Source m a
-> ResumableSource m a
-> ResumableSource m a
prependRSrc src (ResumableSource src' close) = ResumableSource (src >> src') close

getCodec :: CI.CI ByteString -> Maybe C.Codec
getCodec c =
case c of
"utf-8" -> Just C.utf8
"utf8" -> Just C.utf8
"iso8859" -> Just C.iso8859_1
_ -> Nothing

liftM tokensToJSON (src'' $$+- (C.decode codec =$ T.tokenStream =$ C.consume))
where
skipBOM :: A.Parser ()
skipBOM =
( A.string "\xff\xfe"
<|> A.string "\xef\xbb\xbf"
) *> return ()

prependRSrc :: Monad m
=> Source m a
-> ResumableSource m a
-> ResumableSource m a
prependRSrc src (ResumableSource src' close) = ResumableSource (src >> src') close

getCodec :: CI.CI ByteString -> Maybe C.Codec
getCodec c =
case c of
"utf-8" -> Just C.utf8
"utf8" -> Just C.utf8
"iso8859" -> Just C.iso8859_1
_ -> Nothing

newtype JSONParseError = JSONParseError String
deriving (Typeable, Show)
instance Exception JSONParseError

-- | parse xml to haskell data type by using aeson's `FromJSON'.
parseXML :: (MonadThrow m, FromJSON a) => L.ByteString -> m a
parseXML s = xmlToJSON s >>= convert
where
convert v =
case fromJSON v of
Error err -> monadThrow (JSONParseError err)
Success a -> return a

-- | convert lazy xml `ByteString' to aeson `Value'.
xmlToJSON :: MonadThrow m => L.ByteString -> m Value
xmlToJSON s = bsSourceToJSON (C.sourceList (L.toChunks s))
86 changes: 62 additions & 24 deletions Text/XML/ToJSON/Builder.hs
@@ -1,70 +1,108 @@
{-# LANGUAGE OverloadedStrings #-}
module Text.XML.ToJSON.Builder where
module Text.XML.ToJSON.Builder
( -- * Element type and operations
Element(..)
, emptyElement
, addChild'
, addValue'
, addAttr'
, addAttrs'
-- * Stack type and operations
, Stack
, popStack
, closeStack
-- * Builder type and operations
, Builder
, runBuilder
, beginElement
, endElement
, modifyTopElement
, addChild
, addValue
, addAttr
, addAttrs
) where

import Data.Text (Text)
import Control.Monad.Trans.State

type Str = Text

-- | represent a XML element.
data Element = Element
{ elAttrs :: [(Str, Str)]
, elValues :: [Str]
, elChildren :: [(Str, Element)]
{ elAttrs :: [(Text, Text)] -- ^ tag attributes.
, elValues :: [Text] -- ^ text values.
, elChildren :: [(Text, Element)] -- ^ child elements.
} deriving (Show)

emptyElement :: Element
emptyElement = Element [] [] []

addChild' :: (Str, Element) -> Element -> Element
-- | add a child element to an element
addChild' :: (Text, Element) -> Element -> Element
addChild' item o = o { elChildren = item : elChildren o }

addValue' :: Str -> Element -> Element
-- | add a text value to an element
addValue' :: Text -> Element -> Element
addValue' v o = o { elValues = v : elValues o }

addAttr' :: (Str, Str) -> Element -> Element
-- | add an attribute to an element
addAttr' :: (Text, Text) -> Element -> Element
addAttr' attr o = o { elAttrs = attr : elAttrs o }

addAttrs' :: [(Str, Str)] -> Element -> Element
-- | add multiple attributes to an element
addAttrs' :: [(Text, Text)] -> Element -> Element
addAttrs' as o = o { elAttrs = as ++ elAttrs o }

type Stack = [(Str, Element)]
type Builder = State Stack ()

runBuilder :: Builder -> Element
runBuilder b = finishStack $ execState b [("", emptyElement)]
-- | xml element stack with recent opened element at the top.
type Stack = [(Text, Element)]

-- | close current tag.
popStack :: Stack -> Stack
popStack ((k,v) : (name,elm) : tl) = (name, addChild' (k,v) elm) : tl
popStack _ = error "popStack: can't pop root elmect."

finishStack :: Stack -> Element
finishStack [] = error "finishStack: empty stack."
finishStack [(_, elm)] = elm
finishStack st = finishStack (popStack st)
-- | close all unclosed tags and return the root element.
closeStack :: Stack -> Element
closeStack [] = error "closeStack: empty stack."
closeStack [(_, elm)] = elm
closeStack st = closeStack (popStack st)

-- | `Builder' is a `State' monad to transform a `Stack'.
type Builder = State Stack ()

-- | exec the state monad and close the result stack.
runBuilder :: Builder -> Element
runBuilder b = closeStack $ execState b [("", emptyElement)]

beginElement :: Str -> Builder
-- | open element
beginElement :: Text -> Builder
beginElement name =
modify ( (name, emptyElement) : )

-- | close element
endElement :: Builder
endElement =
modify popStack

-- | util to modify top element.
modifyTopElement :: (Element -> Element) -> Builder
modifyTopElement f =
modify $ \st ->
case st of
((k, v) : tl) -> (k, f v) : tl
_ -> fail "modifyTopElement: impossible: empty stack."

addValue :: Str -> Builder
-- | add value to top element.
addValue :: Text -> Builder
addValue = modifyTopElement . addValue'

addAttr :: (Str, Str) -> Builder
-- | add attribute to top element.
addAttr :: (Text, Text) -> Builder
addAttr = modifyTopElement . addAttr'

addAttrs :: [(Str, Str)] -> Builder
-- | add multiple attributes to top element.
addAttrs :: [(Text, Text)] -> Builder
addAttrs = modifyTopElement . addAttrs'

addChild :: (Str, Element) -> Builder
-- | add child element to top element.
addChild :: (Text, Element) -> Builder
addChild = modifyTopElement . addChild'

0 comments on commit f834d69

Please sign in to comment.