never executed always true always false
1 {-# LANGUAGE CPP #-}
2 -----------------------------------------------------------------------------
3 -- |
4 -- Module : Distribution.Deprecated.ParseUtils
5 -- Copyright : (c) The University of Glasgow 2004
6 -- License : BSD3
7 --
8 -- Maintainer : cabal-devel@haskell.org
9 -- Portability : portable
10 --
11 -- Utilities for parsing 'PackageDescription' and 'InstalledPackageInfo'.
12 --
13 -- The @.cabal@ file format is not trivial, especially with the introduction
14 -- of configurations and the section syntax that goes with that. This module
15 -- has a bunch of parsing functions that is used by the @.cabal@ parser and a
16 -- couple others. It has the parsing framework code and also little parsers for
17 -- many of the formats we get in various @.cabal@ file fields, like module
18 -- names, comma separated lists etc.
19
20 -- This module is meant to be local-only to Distribution...
21
22 {-# OPTIONS_HADDOCK hide #-}
23 {-# LANGUAGE Rank2Types #-}
24 module Distribution.Deprecated.ParseUtils (
25 LineNo, PError(..), PWarning(..), locatedErrorMsg, syntaxError, warning,
26 runP, runE, ParseResult(..), parseFail, showPWarning,
27 Field(..), lineNo,
28 FieldDescr(..), readFields,
29 parseHaskellString, parseTokenQ,
30 parseOptCommaList,
31 showFilePath, showToken, showFreeText,
32 field, simpleField, listField, listFieldWithSep, spaceListField,
33 newLineListField,
34 liftField,
35 readPToMaybe,
36
37 fieldParsec, simpleFieldParsec,
38 listFieldParsec,
39 commaListFieldParsec,
40 commaNewLineListFieldParsec,
41
42 UnrecFieldParser,
43 ) where
44
45 import Distribution.Client.Compat.Prelude hiding (get)
46 import Prelude ()
47
48 import Distribution.Deprecated.ReadP as ReadP hiding (get)
49
50 import Distribution.Pretty
51 import Distribution.ReadE
52 import Distribution.Utils.Generic
53
54 import System.FilePath (normalise)
55 import Text.PrettyPrint (Doc, punctuate, comma, fsep, sep)
56 import qualified Text.Read as Read
57
58 import qualified Control.Monad.Fail as Fail
59 import Distribution.Parsec (ParsecParser, parsecLeadingCommaList, parsecLeadingOptCommaList)
60
61 import qualified Data.ByteString as BS
62 import qualified Distribution.Fields as Fields
63 import qualified Distribution.Fields.Field as Fields
64 import qualified Distribution.Parsec as Parsec
65 import qualified Distribution.Fields.LexerMonad as Fields
66 import qualified Text.Parsec.Error as PE
67 import qualified Text.Parsec.Pos as PP
68
69 -- -----------------------------------------------------------------------------
70
71 type LineNo = Int
72
73 data PError = AmbiguousParse String LineNo
74 | NoParse String LineNo
75 | TabsError LineNo
76 | FromString String (Maybe LineNo)
77 deriving (Eq, Show)
78
79 data PWarning = PWarning String
80 | UTFWarning LineNo String
81 deriving (Eq, Show)
82
83 showPWarning :: FilePath -> PWarning -> String
84 showPWarning fpath (PWarning msg) =
85 normalise fpath ++ ": " ++ msg
86 showPWarning fpath (UTFWarning line fname) =
87 normalise fpath ++ ":" ++ show line
88 ++ ": Invalid UTF-8 text in the '" ++ fname ++ "' field."
89
90 data ParseResult a = ParseFailed PError | ParseOk [PWarning] a
91 deriving Show
92
93 instance Functor ParseResult where
94 fmap _ (ParseFailed err) = ParseFailed err
95 fmap f (ParseOk ws x) = ParseOk ws $ f x
96
97 instance Applicative ParseResult where
98 pure = ParseOk []
99 (<*>) = ap
100
101
102 instance Monad ParseResult where
103 return = pure
104 ParseFailed err >>= _ = ParseFailed err
105 ParseOk ws x >>= f = case f x of
106 ParseFailed err -> ParseFailed err
107 ParseOk ws' x' -> ParseOk (ws'++ws) x'
108
109 #if !(MIN_VERSION_base(4,9,0))
110 fail = parseResultFail
111 #elif !(MIN_VERSION_base(4,13,0))
112 fail = Fail.fail
113 #endif
114
115 instance Fail.MonadFail ParseResult where
116 fail = parseResultFail
117
118 parseResultFail :: String -> ParseResult a
119 parseResultFail s = parseFail (FromString s Nothing)
120
121 parseFail :: PError -> ParseResult a
122 parseFail = ParseFailed
123
124 runP :: LineNo -> String -> ReadP a a -> String -> ParseResult a
125 runP line fieldname p s =
126 case [ x | (x,"") <- results ] of
127 [a] -> ParseOk (utf8Warnings line fieldname s) a
128 --TODO: what is this double parse thing all about?
129 -- Can't we just do the all isSpace test the first time?
130 [] -> case [ x | (x,ys) <- results, all isSpace ys ] of
131 [a] -> ParseOk (utf8Warnings line fieldname s) a
132 [] -> ParseFailed (NoParse fieldname line)
133 _ -> ParseFailed (AmbiguousParse fieldname line)
134 _ -> ParseFailed (AmbiguousParse fieldname line)
135 where results = readP_to_S p s
136
137 runE :: LineNo -> String -> ReadE a -> String -> ParseResult a
138 runE line fieldname p s =
139 case runReadE p s of
140 Right a -> ParseOk (utf8Warnings line fieldname s) a
141 Left e -> syntaxError line $
142 "Parse of field '" ++ fieldname ++ "' failed (" ++ e ++ "): " ++ s
143
144 utf8Warnings :: LineNo -> String -> String -> [PWarning]
145 utf8Warnings line fieldname s =
146 take 1 [ UTFWarning n fieldname
147 | (n,l) <- zip [line..] (lines s)
148 , '\xfffd' `elem` l ]
149
150 locatedErrorMsg :: PError -> (Maybe LineNo, String)
151 locatedErrorMsg (AmbiguousParse f n) = (Just n,
152 "Ambiguous parse in field '"++f++"'.")
153 locatedErrorMsg (NoParse f n) = (Just n,
154 "Parse of field '"++f++"' failed.")
155 locatedErrorMsg (TabsError n) = (Just n, "Tab used as indentation.")
156 locatedErrorMsg (FromString s n) = (n, s)
157
158 syntaxError :: LineNo -> String -> ParseResult a
159 syntaxError n s = ParseFailed $ FromString s (Just n)
160
161
162 warning :: String -> ParseResult ()
163 warning s = ParseOk [PWarning s] ()
164
165 -- | Field descriptor. The parameter @a@ parameterizes over where the field's
166 -- value is stored in.
167 data FieldDescr a
168 = FieldDescr
169 { fieldName :: String
170 , fieldGet :: a -> Doc
171 , fieldSet :: LineNo -> String -> a -> ParseResult a
172 -- ^ @fieldSet n str x@ Parses the field value from the given input
173 -- string @str@ and stores the result in @x@ if the parse was
174 -- successful. Otherwise, reports an error on line number @n@.
175 }
176
177 field :: String -> (a -> Doc) -> ReadP a a -> FieldDescr a
178 field name showF readF =
179 FieldDescr name showF (\line val _st -> runP line name readF val)
180
181 fieldParsec :: String -> (a -> Doc) -> ParsecParser a -> FieldDescr a
182 fieldParsec name showF readF =
183 FieldDescr name showF $ \line val _st -> case explicitEitherParsec readF val of
184 Left err -> ParseFailed (FromString err (Just line))
185 Right x -> ParseOk [] x
186
187 -- Lift a field descriptor storing into an 'a' to a field descriptor storing
188 -- into a 'b'.
189 liftField :: (b -> a) -> (a -> b -> b) -> FieldDescr a -> FieldDescr b
190 liftField get set (FieldDescr name showF parseF)
191 = FieldDescr name (showF . get)
192 (\line str b -> do
193 a <- parseF line str (get b)
194 return (set a b))
195
196 -- Parser combinator for simple fields. Takes a field name, a pretty printer,
197 -- a parser function, an accessor, and a setter, returns a FieldDescr over the
198 -- compoid structure.
199 simpleField :: String -> (a -> Doc) -> ReadP a a
200 -> (b -> a) -> (a -> b -> b) -> FieldDescr b
201 simpleField name showF readF get set
202 = liftField get set $ field name showF readF
203
204 simpleFieldParsec :: String -> (a -> Doc) -> ParsecParser a
205 -> (b -> a) -> (a -> b -> b) -> FieldDescr b
206 simpleFieldParsec name showF readF get set
207 = liftField get set $ fieldParsec name showF readF
208
209 commaListFieldWithSepParsec :: Separator -> String -> (a -> Doc) -> ParsecParser a
210 -> (b -> [a]) -> ([a] -> b -> b) -> FieldDescr b
211 commaListFieldWithSepParsec separator name showF readF get set =
212 liftField get set' $
213 fieldParsec name showF' (parsecLeadingCommaList readF)
214 where
215 set' xs b = set (get b ++ xs) b
216 showF' = separator . punctuate comma . map showF
217
218 commaListFieldParsec :: String -> (a -> Doc) -> ParsecParser a
219 -> (b -> [a]) -> ([a] -> b -> b) -> FieldDescr b
220 commaListFieldParsec = commaListFieldWithSepParsec fsep
221
222 commaNewLineListFieldParsec
223 :: String -> (a -> Doc) -> ParsecParser a
224 -> (b -> [a]) -> ([a] -> b -> b) -> FieldDescr b
225 commaNewLineListFieldParsec = commaListFieldWithSepParsec sep
226
227 spaceListField :: String -> (a -> Doc) -> ReadP [a] a
228 -> (b -> [a]) -> ([a] -> b -> b) -> FieldDescr b
229 spaceListField name showF readF get set =
230 liftField get set' $
231 field name showF' (parseSpaceList readF)
232 where
233 set' xs b = set (get b ++ xs) b
234 showF' = fsep . map showF
235
236 -- this is a different definition from listField, like
237 -- commaNewLineListField it pretty prints on multiple lines
238 newLineListField :: String -> (a -> Doc) -> ReadP [a] a
239 -> (b -> [a]) -> ([a] -> b -> b) -> FieldDescr b
240 newLineListField = listFieldWithSep sep
241
242 listFieldWithSep :: Separator -> String -> (a -> Doc) -> ReadP [a] a
243 -> (b -> [a]) -> ([a] -> b -> b) -> FieldDescr b
244 listFieldWithSep separator name showF readF get set =
245 liftField get set' $
246 field name showF' (parseOptCommaList readF)
247 where
248 set' xs b = set (get b ++ xs) b
249 showF' = separator . map showF
250
251 listFieldWithSepParsec :: Separator -> String -> (a -> Doc) -> ParsecParser a
252 -> (b -> [a]) -> ([a] -> b -> b) -> FieldDescr b
253 listFieldWithSepParsec separator name showF readF get set =
254 liftField get set' $
255 fieldParsec name showF' (parsecLeadingOptCommaList readF)
256 where
257 set' xs b = set (get b ++ xs) b
258 showF' = separator . map showF
259
260 listField :: String -> (a -> Doc) -> ReadP [a] a
261 -> (b -> [a]) -> ([a] -> b -> b) -> FieldDescr b
262 listField = listFieldWithSep fsep
263
264 listFieldParsec
265 :: String -> (a -> Doc) -> ParsecParser a
266 -> (b -> [a]) -> ([a] -> b -> b) -> FieldDescr b
267 listFieldParsec = listFieldWithSepParsec fsep
268
269 -- | The type of a function which, given a name-value pair of an
270 -- unrecognized field, and the current structure being built,
271 -- decides whether to incorporate the unrecognized field
272 -- (by returning Just x, where x is a possibly modified version
273 -- of the structure being built), or not (by returning Nothing).
274 type UnrecFieldParser a = (String,String) -> a -> Maybe a
275
276 ------------------------------------------------------------------------------
277
278 -- The data type for our three syntactic categories
279 data Field
280 = F LineNo String String
281 -- ^ A regular @<property>: <value>@ field
282 | Section LineNo String String [Field]
283 -- ^ A section with a name and possible parameter. The syntactic
284 -- structure is:
285 --
286 -- @
287 -- <sectionname> <arg> {
288 -- <field>*
289 -- }
290 -- @
291 deriving (Show
292 ,Eq) -- for testing
293
294 lineNo :: Field -> LineNo
295 lineNo (F n _ _) = n
296 lineNo (Section n _ _ _) = n
297
298 readFields :: BS.ByteString -> ParseResult [Field]
299 readFields input = case Fields.readFields' input of
300 Right (fs, ws) -> ParseOk
301 [ PWarning msg | Fields.PWarning _ _ msg <- Fields.toPWarnings ws ]
302 (legacyFields fs)
303 Left perr -> ParseFailed $ NoParse
304 (PE.showErrorMessages
305 "or" "unknown parse error" "expecting" "unexpected" "end of file"
306 (PE.errorMessages perr))
307 (PP.sourceLine pos)
308 where
309 pos = PE.errorPos perr
310
311 legacyFields :: [Fields.Field Parsec.Position] -> [Field]
312 legacyFields = map legacyField
313
314 legacyField :: Fields.Field Parsec.Position -> Field
315 legacyField (Fields.Field (Fields.Name pos name) fls) =
316 F (posToLineNo pos) (fromUTF8BS name) (Fields.fieldLinesToString fls)
317 legacyField (Fields.Section (Fields.Name pos name) args fs) =
318 Section (posToLineNo pos) (fromUTF8BS name) (Fields.sectionArgsToString args) (legacyFields fs)
319
320 posToLineNo :: Parsec.Position -> LineNo
321 posToLineNo (Parsec.Position row _) = row
322
323 ------------------------------------------------------------------------------
324
325 -- urgh, we can't define optQuotes :: ReadP r a -> ReadP r a
326 -- because the "compat" version of ReadP isn't quite powerful enough. In
327 -- particular, the type of <++ is ReadP r r -> ReadP r a -> ReadP r a
328 -- Hence the trick above to make 'lic' polymorphic.
329
330 -- Different than the naive version. it turns out Read instance for String accepts
331 -- the ['a', 'b'] syntax, which we do not want. In particular it messes
332 -- up any token starting with [].
333 parseHaskellString :: ReadP r String
334 parseHaskellString =
335 readS_to_P $
336 Read.readPrec_to_S (do Read.String s <- Read.lexP; return s) 0
337
338 parseTokenQ :: ReadP r String
339 parseTokenQ = parseHaskellString <++ munch1 (\x -> not (isSpace x) && x /= ',')
340
341 parseSpaceList :: ReadP r a -- ^The parser for the stuff between commas
342 -> ReadP r [a]
343 parseSpaceList p = sepBy p skipSpaces
344
345 -- This version avoid parse ambiguity for list element parsers
346 -- that have multiple valid parses of prefixes.
347 parseOptCommaList :: ReadP r a -> ReadP r [a]
348 parseOptCommaList p = sepBy p localSep
349 where
350 -- The separator must not be empty or it introduces ambiguity
351 localSep = (skipSpaces >> char ',' >> skipSpaces)
352 +++ (satisfy isSpace >> skipSpaces)
353
354 readPToMaybe :: ReadP a a -> String -> Maybe a
355 readPToMaybe p str = listToMaybe [ r | (r,s) <- readP_to_S p str
356 , all isSpace s ]