Skip to content
Browse files

support gunzip in enumerator parser

  • Loading branch information...
1 parent 0ddf2a5 commit e4e45dd30ba6c79ac1f93a2e32d1b9a19ed7f6ae @wavewave committed Feb 17, 2012
Showing with 134 additions and 27 deletions.
  1. +3 −1 CHANGES
  2. +81 −4 src/Text/Xournal/Parse.hs
  3. +18 −2 src/Text/Xournal/Parse/Enumerator.hs
  4. +29 −6 src/Text/Xournal/Parse/Zlib.hs
  5. +3 −14 xournal-parser.cabal
View
4 CHANGES
@@ -3,4 +3,6 @@
0.3: 18 Dec 2011
* Refactor out type definition to xournal-types package
0.4: 12 Feb 2012
- * faster parser using xml-enumerator. Parsing variable width strokes is possible
+ * faster parser using xml-enumerator. Parsing variable width strokes is possible
+0.4.1: 17 Feb 2012
+ * support for gunzip in enumerator parser
View
85 src/Text/Xournal/Parse.hs
@@ -29,31 +29,47 @@ import Data.Strict.Tuple
import Prelude hiding (takeWhile)
+-- |
+
skipSpaces :: Parser ()
skipSpaces = satisfy isHorizontalSpace *> skipWhile isHorizontalSpace
+-- |
+
trim_starting_space :: Parser ()
trim_starting_space = do try endOfInput
<|> takeWhile (inClass " \n") *> return ()
-
+
+-- |
+
langle :: Parser Char
langle = char '<'
+-- |
+
rangle :: Parser Char
rangle = char '>'
+-- |
+
xmlheader :: Parser B.ByteString
xmlheader = string "<?" *> takeTill (inClass "?>") <* string "?>"
+
+-- |
headercontentWorker :: B.ByteString -> Parser B.ByteString
headercontentWorker bstr = do
h <- takeWhile1 (notInClass "?>")
((string "?>" >>= return . (bstr `B.append` h `B.append`))
<|> headercontentWorker (bstr `B.append` h))
+-- |
+
headercontent :: Parser B.ByteString
headercontent = headercontentWorker B.empty
-
+
+-- |
+
stroketagopen :: Parser Stroke -- B.ByteString
stroketagopen = do
trim
@@ -76,9 +92,13 @@ stroketagopen = do
char '>'
return $ Stroke tool color width []
+-- |
+
stroketagclose :: Parser B.ByteString
stroketagclose = string "</stroke>"
+-- |
+
onestroke :: Parser Stroke
onestroke = do trim
strokeinit <- stroketagopen
@@ -91,15 +111,20 @@ onestroke = do trim
stroketagclose
return $ strokeinit { stroke_data = coordlist }
+-- |
+
trim :: Parser ()
trim = trim_starting_space
+-- |
+
parser_xournal :: Parser Xournal
parser_xournal = do trim
xmlheader <?> "xmlheader"
trim
xournal <?> "xournal"
+-- |
xournal :: Parser Xournal
xournal = do trim
@@ -113,6 +138,8 @@ xournal = do trim
trim
xournalclose
return $ Xournal t pgs
+
+-- |
page :: Parser Page
page = do trim
@@ -125,7 +152,8 @@ page = do trim
pageclose
return $ Page dim bkg layers
-
+-- |
+
layer :: Parser Layer
layer = do trim
layerheader
@@ -135,6 +163,7 @@ layer = do trim
layerclose
return $ Layer strokes
+-- |
title :: Parser B.ByteString
title = do trim
@@ -143,37 +172,57 @@ title = do trim
titleclose
return str
+-- |
+
titleheader :: Parser B.ByteString
titleheader = string "<title>"
+-- |
+
titleclose :: Parser B.ByteString
titleclose = string "</title>"
+-- |
+
preview :: Parser ()
preview = do trim
previewheader
str <- takeTill (inClass "<")
previewclose
trim
+-- |
+
previewheader :: Parser B.ByteString
previewheader = string "<preview>"
+-- |
+
previewclose :: Parser B.ByteString
previewclose = string "</preview>"
+-- |
+
xournalheader :: Parser B.ByteString
xournalheader = xournalheaderstart *> takeTill (inClass ">") <* xournalheaderend
+-- |
+
xournalheaderstart :: Parser B.ByteString
xournalheaderstart = string "<xournal"
+-- |
+
xournalheaderend :: Parser Char
xournalheaderend = char '>'
+-- |
+
xournalclose :: Parser B.ByteString
xournalclose = string "</xournal>"
+-- |
+
pageheader :: Parser Dimension
pageheader = do pageheaderstart
trim
@@ -189,22 +238,34 @@ pageheader = do pageheaderstart
takeTill (inClass ">")
pageheaderend
return $ Dim w h
+
+-- |
pageheaderstart :: Parser B.ByteString
pageheaderstart = string "<page"
+-- |
+
pageheaderend :: Parser Char
pageheaderend = char '>'
+-- |
+
pageclose :: Parser B.ByteString
pageclose = string "</page>"
+-- |
+
layerheader :: Parser B.ByteString
layerheader = string "<layer>"
+-- |
+
layerclose :: Parser B.ByteString
layerclose = string "</layer>"
+-- |
+
background :: Parser Background
background = do
trim
@@ -257,44 +318,60 @@ background = do
backgroundclose
return $ BackgroundPdf typ mdomain mfilename pnum
-
+-- |
alphabet :: Parser B.ByteString
alphabet = takeWhile1 (\w -> (w >= 65 && w <= 90) || (w >= 97 && w <= 122))
+-- |
+
alphanumsharp :: Parser B.ByteString
alphanumsharp = takeWhile1 (\w -> (w >= 65 && w <= 90)
|| (w >= 97 && w <= 122)
|| ( w >= 48 && w<= 57 )
|| ( w== 35) )
-- | need to be reimplemented
+
parseFileName :: Parser B.ByteString
parseFileName = takeTill (inClass ['"'])
-- takeWhilw1 (\w -> (w >= 65 && w <= 90)
-- || (w >= 97 && w <= 122)
-- || (w >= 48 && w <= 57)
-- || (w == 35)
+-- |
+
backgroundheader :: Parser B.ByteString
backgroundheader = string "<background"
+-- |
+
backgroundclose :: Parser B.ByteString
backgroundclose = string "/>"
+-- |
+
iter_xournal :: Iter.Iteratee B.ByteString IO Xournal
iter_xournal = AI.parserToIteratee parser_xournal
+-- |
+
read_xournal :: String -> IO Xournal
read_xournal str = Iter.fileDriver iter_xournal str
+-- |
+
read_xojgz :: String -> IO Xournal
read_xojgz str = Iter.fileDriver (Iter.joinIM (ungzipXoj iter_xournal)) str
+-- |
cat_xournalgz :: String -> IO ()
cat_xournalgz str = Iter.fileDriver
(Iter.joinIM (ungzipXoj printLinesUnterminated)) str
+-- |
+
onlyresult (Done _ r) = r
View
20 src/Text/Xournal/Parse/Enumerator.hs
@@ -18,7 +18,8 @@ module Text.Xournal.Parse.Enumerator where
import Debug.Trace
import qualified Data.ByteString as S
import Data.Enumerator as E hiding (foldl')
-import qualified Data.Enumerator.List as EL
+import qualified Data.Enumerator.List as EL
+import qualified Codec.Zlib.Enum as EZ
import Control.Applicative
import Control.Monad.Trans
import Control.Monad
@@ -34,10 +35,11 @@ import Data.Label
import Data.XML.Types
import Text.XML.Stream.Render
import Text.XML.Stream.Parse hiding (many)
+import Text.Xournal.Parse.Zlib
import System.IO
import Data.Xournal.Simple
-import Data.Enumerator.Binary (enumHandle)
+import Data.Enumerator.Binary (enumHandle, enumFile)
import Prelude hiding ((.),id)
@@ -374,6 +376,20 @@ parseXmlFile h iter = do
parseXojFile :: FilePath -> IO (Either String Xournal)
parseXojFile fp = withFile fp ReadMode $ \ih -> parseXmlFile ih pXournal
+-- |
+
+parseXojGzFile :: FilePath -> IO (Either String Xournal)
+parseXojGzFile fp =
+ run_ $ enumFile fp $$ EZ.ungzip =$ parseBytes def =$ pXournal
+
+-- |
+
+parseXournal :: FilePath -> IO (Either String Xournal)
+parseXournal fname =
+ checkIfBinary fname >>= \b ->
+ if b then parseXojGzFile fname else parseXojFile fname
+
+
-- | printing for debug
iterPrint :: (Show s,MonadIO m) => E.Iteratee s m ()
View
35 src/Text/Xournal/Parse/Zlib.hs
@@ -1,4 +1,3 @@
-
-----------------------------------------------------------------------------
-- |
-- Module : Text.Xournal.Parse.Zlib
@@ -9,13 +8,37 @@
-- Stability : experimental
-- Portability : GHC
--
+-----------------------------------------------------------------------------
+
module Text.Xournal.Parse.Zlib where
import Control.Monad.IO.Class
-import Data.Iteratee.ZLib
-import Data.Iteratee
-import Data.ByteString
+import Data.Iteratee.ZLib as IZ
+import Data.Iteratee as I
+-- import Codec.Zlib.Enum as EZ
+-- import Data.Enumerator as E
+import Data.ByteString
+import qualified Data.ByteString.Lazy as LB
+
+import System.IO
+
+-- |
+
+ungzipXoj :: MonadIO m => I.Enumerator ByteString m a
+ungzipXoj = IZ.enumInflate IZ.GZip IZ.defaultDecompressParams
+
+-- |
+
+-- gunzipXojEnum :: MonadIO m => E.Enumeratee ByteString ByteString m a
+-- gunzipXojEnum = EZ.decompress EZ.defaultWindowBits
+
+
+
+-- | check if gzip or not
-ungzipXoj :: MonadIO m => Enumerator ByteString m a
-ungzipXoj = enumInflate GZip defaultDecompressParams
+checkIfBinary :: FilePath -> IO Bool
+checkIfBinary fname =
+ withFile fname ReadMode $ \h -> do
+ b <- return . LB.any ( == 0 ) . LB.take 100 =<< LB.hGetContents h
+ b `seq` return b
View
17 xournal-parser.cabal
@@ -1,5 +1,5 @@
Name: xournal-parser
-Version: 0.4
+Version: 0.4.1
Synopsis: Xournal file parser
Description: Text parser for xournal xml file
Homepage: http://ianwookim.org/hxournal
@@ -14,18 +14,6 @@ Source-repository head
type: git
location: http://www.github.com/wavewave/xournal-parser
--- Executable xptest
--- Main-is: xptest.hs
--- hs-source-dirs: test
--- ghc-options: -Wall -threaded -funbox-strict-fields -fno-warn-unused-do-bind
--- ghc-prof-options: -caf-all -auto-all
--- Build-Depends:
--- base>4,
--- xml-types == 0.3.*,
--- enumerator == 0.4.*,
--- mtl,
--- xournal-parser
-
Library
hs-source-dirs: src
@@ -46,7 +34,8 @@ Library
xournal-types == 0.4.*,
xml-types == 0.3.*,
text == 0.11.*,
- fclabels == 1.0.*
+ fclabels == 1.0.*,
+ zlib-enum == 0.2.*
Exposed-Modules:
Text.Xournal.Parse

0 comments on commit e4e45dd

Please sign in to comment.
Something went wrong with that request. Please try again.