-
Notifications
You must be signed in to change notification settings - Fork 4
/
Parse.hs
167 lines (150 loc) · 6.17 KB
/
Parse.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
{-# LANGUAGE CPP #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FlexibleContexts #-}
-- |
-- Module: Codec.RPM.Parse
-- Copyright: (c) 2016-2017 Red Hat, Inc.
-- License: LGPL
--
-- Maintainer: https://github.com/weldr
-- Stability: stable
-- Portability: portable
--
-- A module for creating 'RPM' records from various data sources.
module Codec.RPM.Parse(
#ifdef TEST
parseLead,
parseSectionHeader,
parseOneTag,
parseSection,
#endif
parseRPM,
parseRPMC)
where
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative((<$>))
#endif
import Control.Monad(void)
import Control.Monad.Except(MonadError, throwError)
import Conduit((.|), Conduit, awaitForever, yield)
import Data.Attoparsec.Binary
import Data.Attoparsec.ByteString(Parser, anyWord8, count, take, takeByteString, word8)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as C
import Data.Conduit.Attoparsec(ParseError(..), conduitParserEither)
import Data.Maybe(mapMaybe)
import Prelude hiding(take)
import Codec.RPM.Internal.Numbers(asWord32)
import Codec.RPM.Tags(Tag, mkTag)
import Codec.RPM.Types(Header(..), Lead(..), RPM(..), SectionHeader(..))
-- "a <$> b <$> c" looks better than "a . b <$> c"
{-# ANN parseLead "HLint: ignore Functor law" #-}
parseLead :: Parser Lead
parseLead = do
-- Verify this is an RPM by checking the first four bytes.
void $ word32be 0xedabeedb
rpmMajor <- anyWord8
rpmMinor <- anyWord8
rpmType <- anyWord16be
rpmArchNum <- anyWord16be
rpmName <- C.unpack <$> BS.takeWhile (/= 0) <$> take 66
rpmOSNum <- anyWord16be
rpmSigType <- anyWord16be
-- Skip 16 reserved bytes at the end of the lead.
void $ take 16
return Lead { rpmMajor,
rpmMinor,
rpmType,
rpmArchNum,
rpmName,
rpmOSNum,
rpmSigType }
parseSectionHeader :: Parser SectionHeader
parseSectionHeader = do
-- Verify this is a header section header by checking the first three bytes.
void $ word8 0x8e >> word8 0xad >> word8 0xe8
sectionVersion <- anyWord8
-- Skip four reserved bytes.
void $ take 4
sectionCount <- anyWord32be
sectionSize <- anyWord32be
return SectionHeader { sectionVersion,
sectionCount,
sectionSize }
parseOneTag :: C.ByteString -> C.ByteString -> Maybe Tag
parseOneTag store bs | BS.length bs < 16 = Nothing
| otherwise = let
tag = fromIntegral . asWord32 $ BS.take 4 bs
ty = fromIntegral . asWord32 $ BS.take 4 (BS.drop 4 bs)
off = fromIntegral . asWord32 $ BS.take 4 (BS.drop 8 bs)
cnt = fromIntegral . asWord32 $ BS.take 4 (BS.drop 12 bs)
in
mkTag store tag ty off cnt
parseSection :: Parser Header
parseSection = do
headerSectionHeader <- parseSectionHeader
-- Grab the tags as a list of bytestrings. We need the store before we can process the tags, as
-- that's where all the values for the tags are kept. However, grabbing each individual tag here
-- makes it a lot easier to process them later.
rawTags <- count (fromIntegral $ sectionCount headerSectionHeader) (take 16)
headerStore <- take (fromIntegral $ sectionSize headerSectionHeader)
-- Now that we've got the store, process each tag by looking up its values in the store.
-- NOTE: mapMaybe will reject tags which are Nothing
let headerTags = mapMaybe (parseOneTag headerStore) rawTags
return Header { headerSectionHeader,
headerTags,
headerStore }
-- | A parser (in the attoparsec sense of the term) that constructs 'RPM' records. The parser
-- can be run against a 'ByteString' of RPM data using any of the usual functions. 'parse' and
-- 'parseOnly' are especially useful:
--
-- > import Data.Attoparsec.ByteString(parse)
-- > import qualified Data.ByteString as BS
-- > s <- BS.readFile "some.rpm"
-- > result <- parse parseRPM s
--
-- The 'Result' can then be examined directly or converted using 'maybeResult' (for converting
-- it into a 'Maybe RPM') or 'eitherResult' (for converting it into an 'Either String RPM').
-- In the latter case, the String contains any parse error that occurred when reading the
-- RPM data.
parseRPM :: Parser RPM
parseRPM = do
-- First comes the (mostly useless) lead.
rpmLead <- parseLead
-- Then comes the signature, which is like a regular section except it's also padded.
sig <- parseSection
void $ take (signaturePadding sig)
-- And then comes the real header. There could be several, but for now there's only ever one.
hdr <- parseSection
rpmArchive <- takeByteString
return RPM { rpmLead,
rpmSignatures=[sig],
rpmHeaders=[hdr],
rpmArchive }
where
signaturePadding :: Header -> Int
signaturePadding hdr = let
remainder = (sectionSize . headerSectionHeader) hdr `mod` 8
in
if remainder > 0 then fromIntegral $ 8 - remainder else 0
-- | Like 'parseRPM', but puts the result into a 'Conduit' as an 'Either', containing either a
-- 'ParseError' or an 'RPM'. The result can be extracted with 'runExceptT', like so:
--
-- > import Conduit((.|), runConduitRes, sourceFile)
-- > import Control.Monad.Except(runExceptT)
-- > result <- runExceptT $ runConduitRes $ sourceFile "some.rpm" .| parseRPMC .| someConsumer
--
-- On success, the 'RPM' record will be passed down the conduit for futher processing or
-- consumption. Functions can be written to extract just one element out of the 'RPM' and
-- pass it along. For instance:
--
-- > payloadC :: MonadError e m => Conduit RPM m BS.ByteStrin
-- > payloadC = awaitForever (yield . rpmArchive)
--
-- On error, the rest of the conduit will be skipped and the 'ParseError' will be returned
-- as the result to be dealt with.
parseRPMC :: MonadError String m => Conduit C.ByteString m RPM
parseRPMC =
conduitParserEither parseRPM .| consumer
where
consumer = awaitForever $ either (throwError . errorMessage) (yield . snd)