Permalink
Browse files

introduce a central parser module. wip.

  • Loading branch information...
1 parent b83219a commit d6bbd61947e971c56b0628c68a9e752f9e70fb76 @vincenthz committed Aug 14, 2015
View
@@ -13,16 +13,13 @@ module Data.Git.Delta
, deltaApply
) where
-import Data.Attoparsec
-import qualified Data.Attoparsec as A
-import qualified Data.Attoparsec.Lazy as AL
+import Control.Applicative (many)
import Data.ByteString (ByteString)
-import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
import Data.Bits
import Data.Word
-import Control.Applicative ((<$>), many)
+import qualified Data.Git.Parser as P
-- | a delta is a source size, a destination size and a list of delta cmd
data Delta = Delta Word64 Word64 [DeltaCmd]
@@ -42,13 +39,10 @@ data DeltaCmd =
deltaParse = do
srcSize <- getDeltaHdrSize
resSize <- getDeltaHdrSize
- dcmds <- many (anyWord8 >>= parseWithCmd)
+ dcmds <- many (P.anyWord8 >>= parseWithCmd)
return $ Delta srcSize resSize dcmds
where
- getDeltaHdrSize = do
- z <- A.takeWhile (\w -> w `testBit` 7)
- l <- anyWord8
- return $ unbytes 0 $ (map (\w -> w `clearBit` 7) (B.unpack z) ++ [l])
+ getDeltaHdrSize = unbytes 0 <$> P.vlf
-- use a foldl ..
unbytes _ [] = 0
unbytes sh (x:xs) = (fromIntegral x) `shiftL` sh + unbytes (sh+7) xs
@@ -66,12 +60,12 @@ deltaParse = do
let offset = o1 .|. o2 .|. o3 .|. o4
let size = s1 .|. s2 .|. s3
return $ DeltaSrc offset (if size == 0 then 0x10000 else size)
- | otherwise = DeltaCopy <$> A.take (fromIntegral cmd)
+ | otherwise = DeltaCopy <$> P.takeBytes (fromIntegral cmd)
word8cond cond sh =
- if cond then (flip shiftL sh . fromIntegral) <$> anyWord8 else return 0
+ if cond then (flip shiftL sh . fromIntegral) <$> P.anyWord8 else return 0
-- | read one delta from a lazy bytestring.
-deltaRead = AL.maybeResult . AL.parse deltaParse
+deltaRead = P.maybeParseChunks deltaParse
-- | apply a delta on a lazy bytestring, returning a new bytestring.
deltaApply :: L.ByteString -> Delta -> L.ByteString
View
@@ -0,0 +1,50 @@
+module Data.Git.Parser
+ (
+ -- * Basic parser functions
+ Parser
+ , parseWith
+ , IResult(..)
+ , maybeParseChunks
+ , eitherParseChunks
+ -- * Specific functions
+ , word32
+ , ref
+ , referenceBin
+ , referenceHex
+ , vlf
+ -- * Simple re-export
+ , A.anyWord8
+ , takeBytes
+ , AC.string
+ , decimal
+ ) where
+
+import Data.Attoparsec.ByteString (parseWith, Parser, IResult(..))
+import qualified Data.Attoparsec.ByteString as A
+import qualified Data.Attoparsec.ByteString.Char8 as AC
+import qualified Data.Attoparsec.ByteString.Lazy as AL
+import Data.Bits
+
+import qualified Data.ByteString as B
+
+import Data.Git.Ref
+import Data.Git.Internal
+
+vlf = do
+ bs <- A.takeWhile (\w -> w `testBit` 7)
+ l <- A.anyWord8
+ return $ (map (\w -> w `clearBit` 7) $ B.unpack bs) ++ [l]
+
+word32 = be32 <$> A.take 4
+
+ref = referenceBin
+referenceBin = fromBinary <$> A.take 20
+referenceHex = fromHex <$> A.take 40
+
+decimal :: Parser Int
+decimal = AC.decimal
+
+maybeParseChunks f = AL.maybeResult . AL.parse f
+eitherParseChunks f = AL.eitherResult . AL.parse f
+
+takeBytes = A.take
@@ -26,9 +26,6 @@ module Data.Git.Storage.FileReader
import Control.Exception (bracket, throwIO)
-import Data.Attoparsec (parseWith, Parser, IResult(..))
-import qualified Data.Attoparsec as A
-import Data.Bits
import Data.ByteString (ByteString)
import Data.ByteString.Unsafe
import qualified Data.ByteString as B
@@ -37,6 +34,7 @@ import Data.ByteString.Lazy.Internal (defaultChunkSize)
import Data.IORef
import Data.Git.Imports
+import qualified Data.Git.Parser as P
import Data.Data
import Data.Word
@@ -129,21 +127,18 @@ fileReaderSeek (FileReader { fbHandle = handle, fbRemaining = ref, fbPos = pos }
writeIORef ref B.empty >> writeIORef pos absPos >> hSeek handle AbsoluteSeek (fromIntegral absPos)
-- | parse from a filebuffer
-fileReaderParse :: FileReader -> Parser a -> IO a
+fileReaderParse :: FileReader -> P.Parser a -> IO a
fileReaderParse fr@(FileReader { fbRemaining = ref }) parseF = do
initBS <- readIORef ref
- result <- parseWith (fileReaderGetNext fr) parseF initBS
+ result <- P.parseWith (fileReaderGetNext fr) parseF initBS
case result of
- Done remaining a -> writeIORef ref remaining >> return a
- Partial _ -> error "parsing failed: partial with a handle, reached EOF ?"
- Fail _ ctxs err -> error ("parsing failed: " ++ show ctxs ++ " : " ++ show err)
+ P.Done remaining a -> writeIORef ref remaining >> return a
+ P.Partial _ -> error "parsing failed: partial with a handle, reached EOF ?"
+ P.Fail _ ctxs err -> error ("parsing failed: " ++ show ctxs ++ " : " ++ show err)
-- | get a Variable Length Field. get byte as long as MSB is set, and one byte after
fileReaderGetVLF :: FileReader -> IO [Word8]
-fileReaderGetVLF fr = fileReaderParse fr $ do
- bs <- A.takeWhile (\w -> w `testBit` 7)
- l <- A.anyWord8
- return $ (map (\w -> w `clearBit` 7) $ B.unpack bs) ++ [l]
+fileReaderGetVLF fr = fileReaderParse fr P.vlf
fileReaderInflateToSize :: FileReader -> Word64 -> IO L.ByteString
fileReaderInflateToSize fb@(FileReader { fbRemaining = ref }) outputSize = do
View
@@ -36,6 +36,7 @@ import Data.Git.Internal
import Data.Git.Imports
import Data.Git.Storage.FileWriter
import Data.Git.Storage.Object
+import qualified Data.Git.Parser as P
import Filesystem
import Filesystem.Path
@@ -46,7 +47,6 @@ import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString as B
import Data.Attoparsec.Lazy
-import qualified Data.Attoparsec.Char8 as PC
import Control.Exception (onException, SomeException)
import qualified Control.Exception as E
@@ -58,20 +58,17 @@ import Prelude hiding (FilePath)
isObjectPrefix [a,b] = isHexDigit a && isHexDigit b
isObjectPrefix _ = False
-decimal :: Parser Int
-decimal = PC.decimal
-
-- loose object parsing
parseHeader = do
h <- takeWhile1 ((/=) 0x20)
_ <- word8 0x20
- sz <- decimal
- return (objectTypeUnmarshall $ BC.unpack h, fromIntegral sz, Nothing)
+ sz <- P.decimal
+ return (objectTypeUnmarshall h, fromIntegral sz, Nothing)
-parseTreeHeader = string "tree " >> decimal >> word8 0
-parseTagHeader = string "tag " >> decimal >> word8 0
-parseCommitHeader = string "commit " >> decimal >> word8 0
-parseBlobHeader = string "blob " >> decimal >> word8 0
+parseTreeHeader = P.string "tree " >> P.decimal >> word8 0
+parseTagHeader = P.string "tag " >> P.decimal >> word8 0
+parseCommitHeader = P.string "commit " >> P.decimal >> word8 0
+parseBlobHeader = P.string "blob " >> P.decimal >> word8 0
parseTree = parseTreeHeader >> objectParseTree
parseTag = parseTagHeader >> objectParseTag
@@ -80,7 +77,7 @@ parseBlob = parseBlobHeader >> objectParseBlob
parseObject :: L.ByteString -> Object
parseObject = parseSuccess (parseTree <|> parseBlob <|> parseCommit <|> parseTag)
- where parseSuccess p = either error id . eitherResult . parse p
+ where parseSuccess p = either error id . P.eitherParseChunks p
-- | unmarshall an object (with header) from a bytestring.
looseUnmarshall :: L.ByteString -> Object
@@ -110,7 +107,7 @@ looseReadRaw repoPath ref = looseUnmarshallZippedRaw <$> readZippedFile (objectP
-- | read only the header of a loose object.
looseReadHeader repoPath ref = toHeader <$> readZippedFile (objectPathOfRef repoPath ref)
- where toHeader = either error id . eitherResult . parse parseHeader . dezip
+ where toHeader = either error id . P.eitherParseChunks parseHeader . dezip
-- | read a specific ref from a loose object and returns an object
looseRead repoPath ref = looseUnmarshallZipped <$> readZippedFile (objectPathOfRef repoPath ref)
View
@@ -43,6 +43,7 @@ module Data.Git.Storage.Object
import Data.Git.Ref
import Data.Git.Types
import Data.Git.Imports
+import qualified Data.Git.Parser as P
import Data.Byteable (toBytes)
import Data.ByteString (ByteString)
@@ -51,7 +52,6 @@ import qualified Data.ByteString.Char8 as BC
import qualified Data.ByteString.Lazy as L
import Data.Attoparsec.Lazy
-import qualified Data.Attoparsec.Lazy as P
import qualified Data.Attoparsec.Char8 as PC
import Data.List (intersperse)
@@ -126,7 +126,7 @@ objectTypeMarshall TypeCommit = "commit"
objectTypeMarshall TypeTag = "tag"
objectTypeMarshall _ = error "deltas cannot be marshalled"
-objectTypeUnmarshall :: String -> ObjectType
+objectTypeUnmarshall :: ByteString -> ObjectType
objectTypeUnmarshall "tree" = TypeTree
objectTypeUnmarshall "blob" = TypeBlob
objectTypeUnmarshall "commit" = TypeCommit
@@ -175,52 +175,49 @@ skipEOL = skipChar '\n'
skipChar :: Char -> Parser ()
skipChar c = PC.char c >> return ()
-referenceHex = fromHex <$> P.take 40
-referenceBin = fromBinary <$> P.take 20
-
-- | parse a tree content
treeParse = Tree <$> parseEnts
where parseEnts = atEnd >>= \end -> if end then return [] else liftM2 (:) parseEnt parseEnts
- parseEnt = liftM3 (,,) modeperm parseEntName (word8 0 >> referenceBin)
+ parseEnt = liftM3 (,,) modeperm parseEntName (word8 0 >> P.referenceBin)
parseEntName = entName <$> (PC.char ' ' >> takeTill ((==) 0))
-- | parse a blob content
blobParse = (Blob <$> takeLazyByteString)
-- | parse a commit content
commitParse = do
- tree <- string "tree " >> referenceHex
+ tree <- P.string "tree " >> P.referenceHex
skipChar '\n'
parents <- many parseParentRef
- author <- string "author " >> parsePerson
- committer <- string "committer " >> parsePerson
- encoding <- option Nothing $ Just <$> (string "encoding " >> tillEOL)
+ author <- P.string "author " >> parsePerson
+ committer <- P.string "committer " >> parsePerson
+ encoding <- option Nothing $ Just <$> (PC.string "encoding " >> tillEOL)
extras <- many parseExtra
skipChar '\n'
message <- takeByteString
return $ Commit tree parents author committer encoding extras message
where
parseParentRef = do
- tree <- string "parent " >> referenceHex
+ tree <- P.string "parent " >> P.referenceHex
skipChar '\n'
return tree
parseExtra = do
- f <- B.pack . (:[]) <$> notWord8 0xa
+ f <- B.singleton <$> notWord8 0xa
r <- tillEOL
skipEOL
- v <- concatLines <$> many (string " " *> tillEOL <* skipEOL)
+ v <- concatLines <$> many (P.string " " *> tillEOL <* skipEOL)
return $ CommitExtra (f `B.append` r) v
concatLines = B.concat . intersperse (B.pack [0xa])
-- | parse a tag content
tagParse = do
- object <- string "object " >> referenceHex
+ object <- P.string "object " >> P.referenceHex
skipChar '\n'
- type_ <- objectTypeUnmarshall . BC.unpack <$> (string "type " >> takeTill ((==) 0x0a))
+ type_ <- objectTypeUnmarshall <$> (P.string "type " >> takeTill ((==) 0x0a))
skipChar '\n'
- tag <- string "tag " >> takeTill ((==) 0x0a)
+ tag <- P.string "tag " >> takeTill ((==) 0x0a)
skipChar '\n'
- tagger <- string "tagger " >> parsePerson
+ tagger <- P.string "tagger " >> parsePerson
skipChar '\n'
signature <- takeByteString
return $ Tag object type_ tag tagger signature
@@ -229,9 +226,9 @@ parsePerson = do
name <- B.init <$> PC.takeWhile ((/=) '<')
skipChar '<'
email <- PC.takeWhile ((/=) '>')
- _ <- string "> "
+ _ <- P.string "> "
time <- PC.decimal :: Parser Integer
- _ <- string " "
+ _ <- P.string " "
timezoneFmt <- PC.signed PC.decimal
let timezoneSign = if timezoneFmt < 0 then negate else id
let (h,m) = abs timezoneFmt `divMod` 100
View
@@ -35,10 +35,7 @@ import Data.Bits
import Data.List
import qualified Data.ByteString.Lazy as L
-import Data.Attoparsec (anyWord8)
-import qualified Data.Attoparsec as A
-import qualified Data.Attoparsec.Lazy as AL
-
+import qualified Data.Git.Parser as P
import Data.Git.Internal
import Data.Git.Imports
import Data.Git.Path
@@ -82,11 +79,11 @@ packReadHeader repoPath packRef =
withFileReader (packPath repoPath packRef) $ \filereader ->
fileReaderParse filereader parseHeader
where parseHeader = do
- packMagic <- be32 <$> A.take 4
+ packMagic <- P.word32
when (packMagic /= 0x5041434b) $ error "not a git packfile"
- ver <- be32 <$> A.take 4
+ ver <- P.word32
when (ver /= 2) $ error ("pack file version not supported: " ++ show ver)
- be32 <$> A.take 4
+ P.word32
-- | read an object at a specific position using a map function on the objectData
packReadMapAtOffset fr offset mapData = fileReaderSeek fr offset >> getNextObject fr mapData
@@ -116,17 +113,17 @@ getNextObject fr mapData =
packedObjectToObject (PackedObjectInfo { poiType = ty, poiExtra = extra }, objData) =
packObjectFromRaw (ty, extra, objData)
-packObjectFromRaw (TypeCommit, Nothing, objData) = AL.maybeResult $ AL.parse objectParseCommit objData
-packObjectFromRaw (TypeTree, Nothing, objData) = AL.maybeResult $ AL.parse objectParseTree objData
-packObjectFromRaw (TypeBlob, Nothing, objData) = AL.maybeResult $ AL.parse objectParseBlob objData
-packObjectFromRaw (TypeTag, Nothing, objData) = AL.maybeResult $ AL.parse objectParseTag objData
+packObjectFromRaw (TypeCommit, Nothing, objData) = P.maybeParseChunks objectParseCommit objData
+packObjectFromRaw (TypeTree, Nothing, objData) = P.maybeParseChunks objectParseTree objData
+packObjectFromRaw (TypeBlob, Nothing, objData) = P.maybeParseChunks objectParseBlob objData
+packObjectFromRaw (TypeTag, Nothing, objData) = P.maybeParseChunks objectParseTag objData
packObjectFromRaw (TypeDeltaOff, Just (PtrOfs o), objData) = toObject . DeltaOfs o <$> deltaRead objData
packObjectFromRaw (TypeDeltaRef, Just (PtrRef r), objData) = toObject . DeltaRef r <$> deltaRead objData
packObjectFromRaw _ = error "can't happen unless someone change getNextObjectRaw"
getNextObjectRaw :: FileReader -> IO PackedObjectRaw
getNextObjectRaw fr = do
- sobj <- fileReaderGetPos fr
+ sobj <- fileReaderGetPos fr
(ty, size) <- fileReaderParse fr parseObjectHeader
extra <- case ty of
TypeDeltaRef -> Just . PtrRef . fromBinary <$> fileReaderGetBS 20 fr
@@ -138,12 +135,12 @@ getNextObjectRaw fr = do
return (PackedObjectInfo ty sobj (eobj - sobj) size extra, objData)
where
parseObjectHeader = do
- (m, ty, sz) <- splitFirst <$> anyWord8
+ (m, ty, sz) <- splitFirst <$> P.anyWord8
size <- if m then (sz +) <$> getNextSize 4 else return sz
return (ty, size)
where
getNextSize n = do
- (c, sz) <- splitOther n <$> anyWord8
+ (c, sz) <- splitOther n <$> P.anyWord8
if c then (sz +) <$> getNextSize (n+7) else return sz
splitFirst :: Word8 -> (Bool, ObjectType, Word64)
Oops, something went wrong.

0 comments on commit d6bbd61

Please sign in to comment.