never executed always true always false
    1 {-# LANGUAGE CPP #-}
    2 -----------------------------------------------------------------------------
    3 -- |
    4 -- Module      :  Distribution.Deprecated.ParseUtils
    5 -- Copyright   :  (c) The University of Glasgow 2004
    6 -- License     :  BSD3
    7 --
    8 -- Maintainer  :  cabal-devel@haskell.org
    9 -- Portability :  portable
   10 --
   11 -- Utilities for parsing 'PackageDescription' and 'InstalledPackageInfo'.
   12 --
   13 -- The @.cabal@ file format is not trivial, especially with the introduction
   14 -- of configurations and the section syntax that goes with that. This module
   15 -- has a bunch of parsing functions that is used by the @.cabal@ parser and a
   16 -- couple others. It has the parsing framework code and also little parsers for
   17 -- many of the formats we get in various @.cabal@ file fields, like module
   18 -- names, comma separated lists etc.
   19 
   20 -- This module is meant to be local-only to Distribution...
   21 
   22 {-# OPTIONS_HADDOCK hide #-}
   23 {-# LANGUAGE Rank2Types #-}
   24 module Distribution.Deprecated.ParseUtils (
   25         LineNo, PError(..), PWarning(..), locatedErrorMsg, syntaxError, warning,
   26         runP, runE, ParseResult(..), parseFail, showPWarning,
   27         Field(..), lineNo,
   28         FieldDescr(..), readFields,
   29         parseHaskellString, parseTokenQ,
   30         parseOptCommaList,
   31         showFilePath, showToken, showFreeText,
   32         field, simpleField, listField, listFieldWithSep, spaceListField,
   33         newLineListField,
   34         liftField,
   35         readPToMaybe,
   36 
   37         fieldParsec, simpleFieldParsec,
   38         listFieldParsec,
   39         commaListFieldParsec,
   40         commaNewLineListFieldParsec,
   41 
   42         UnrecFieldParser,
   43   ) where
   44 
   45 import Distribution.Client.Compat.Prelude hiding (get)
   46 import Prelude ()
   47 
   48 import Distribution.Deprecated.ReadP as ReadP hiding (get)
   49 
   50 import Distribution.Pretty
   51 import Distribution.ReadE
   52 import Distribution.Utils.Generic
   53 
   54 import System.FilePath  (normalise)
   55 import Text.PrettyPrint (Doc, punctuate, comma, fsep, sep)
   56 import qualified Text.Read as Read
   57 
   58 import qualified Control.Monad.Fail as Fail
   59 import Distribution.Parsec (ParsecParser, parsecLeadingCommaList, parsecLeadingOptCommaList)
   60 
   61 import qualified Data.ByteString as BS
   62 import qualified Distribution.Fields as Fields
   63 import qualified Distribution.Fields.Field as Fields
   64 import qualified Distribution.Parsec as Parsec
   65 import qualified Distribution.Fields.LexerMonad as Fields
   66 import qualified Text.Parsec.Error as PE
   67 import qualified Text.Parsec.Pos as PP
   68 
   69 -- -----------------------------------------------------------------------------
   70 
   71 type LineNo    = Int
   72 
   73 data PError = AmbiguousParse String LineNo
   74             | NoParse String LineNo
   75             | TabsError LineNo
   76             | FromString String (Maybe LineNo)
   77         deriving (Eq, Show)
   78 
   79 data PWarning = PWarning String
   80               | UTFWarning LineNo String
   81         deriving (Eq, Show)
   82 
   83 showPWarning :: FilePath -> PWarning -> String
   84 showPWarning fpath (PWarning msg) =
   85   normalise fpath ++ ": " ++ msg
   86 showPWarning fpath (UTFWarning line fname) =
   87   normalise fpath ++ ":" ++ show line
   88         ++ ": Invalid UTF-8 text in the '" ++ fname ++ "' field."
   89 
   90 data ParseResult a = ParseFailed PError | ParseOk [PWarning] a
   91         deriving Show
   92 
   93 instance Functor ParseResult where
   94         fmap _ (ParseFailed err) = ParseFailed err
   95         fmap f (ParseOk ws x) = ParseOk ws $ f x
   96 
   97 instance Applicative ParseResult where
   98         pure = ParseOk []
   99         (<*>) = ap
  100 
  101 
  102 instance Monad ParseResult where
  103         return = pure
  104         ParseFailed err >>= _ = ParseFailed err
  105         ParseOk ws x >>= f = case f x of
  106                                ParseFailed err -> ParseFailed err
  107                                ParseOk ws' x' -> ParseOk (ws'++ws) x'
  108 
  109 #if !(MIN_VERSION_base(4,9,0))
  110         fail = parseResultFail
  111 #elif !(MIN_VERSION_base(4,13,0))
  112         fail = Fail.fail
  113 #endif
  114 
  115 instance Fail.MonadFail ParseResult where
  116         fail = parseResultFail
  117 
  118 parseResultFail :: String -> ParseResult a
  119 parseResultFail s = parseFail (FromString s Nothing)
  120 
  121 parseFail :: PError -> ParseResult a
  122 parseFail = ParseFailed
  123 
  124 runP :: LineNo -> String -> ReadP a a -> String -> ParseResult a
  125 runP line fieldname p s =
  126   case [ x | (x,"") <- results ] of
  127     [a] -> ParseOk (utf8Warnings line fieldname s) a
  128     --TODO: what is this double parse thing all about?
  129     --      Can't we just do the all isSpace test the first time?
  130     []  -> case [ x | (x,ys) <- results, all isSpace ys ] of
  131              [a] -> ParseOk (utf8Warnings line fieldname s) a
  132              []  -> ParseFailed (NoParse fieldname line)
  133              _   -> ParseFailed (AmbiguousParse fieldname line)
  134     _   -> ParseFailed (AmbiguousParse fieldname line)
  135   where results = readP_to_S p s
  136 
  137 runE :: LineNo -> String -> ReadE a -> String -> ParseResult a
  138 runE line fieldname p s =
  139     case runReadE p s of
  140       Right a -> ParseOk (utf8Warnings line fieldname s) a
  141       Left  e -> syntaxError line $
  142         "Parse of field '" ++ fieldname ++ "' failed (" ++ e ++ "): " ++ s
  143 
  144 utf8Warnings :: LineNo -> String -> String -> [PWarning]
  145 utf8Warnings line fieldname s =
  146   take 1 [ UTFWarning n fieldname
  147          | (n,l) <- zip [line..] (lines s)
  148          , '\xfffd' `elem` l ]
  149 
  150 locatedErrorMsg :: PError -> (Maybe LineNo, String)
  151 locatedErrorMsg (AmbiguousParse f n) = (Just n,
  152                                         "Ambiguous parse in field '"++f++"'.")
  153 locatedErrorMsg (NoParse f n)        = (Just n,
  154                                         "Parse of field '"++f++"' failed.")
  155 locatedErrorMsg (TabsError n)        = (Just n, "Tab used as indentation.")
  156 locatedErrorMsg (FromString s n)     = (n, s)
  157 
  158 syntaxError :: LineNo -> String -> ParseResult a
  159 syntaxError n s = ParseFailed $ FromString s (Just n)
  160 
  161 
  162 warning :: String -> ParseResult ()
  163 warning s = ParseOk [PWarning s] ()
  164 
  165 -- | Field descriptor.  The parameter @a@ parameterizes over where the field's
  166 --   value is stored in.
  167 data FieldDescr a
  168   = FieldDescr
  169       { fieldName     :: String
  170       , fieldGet      :: a -> Doc
  171       , fieldSet      :: LineNo -> String -> a -> ParseResult a
  172         -- ^ @fieldSet n str x@ Parses the field value from the given input
  173         -- string @str@ and stores the result in @x@ if the parse was
  174         -- successful.  Otherwise, reports an error on line number @n@.
  175       }
  176 
  177 field :: String -> (a -> Doc) -> ReadP a a -> FieldDescr a
  178 field name showF readF =
  179   FieldDescr name showF (\line val _st -> runP line name readF val)
  180 
  181 fieldParsec :: String -> (a -> Doc) -> ParsecParser a -> FieldDescr a
  182 fieldParsec name showF readF =
  183   FieldDescr name showF $ \line val _st -> case explicitEitherParsec readF val of
  184     Left err -> ParseFailed (FromString err (Just line))
  185     Right x  -> ParseOk [] x
  186 
  187 -- Lift a field descriptor storing into an 'a' to a field descriptor storing
  188 -- into a 'b'.
  189 liftField :: (b -> a) -> (a -> b -> b) -> FieldDescr a -> FieldDescr b
  190 liftField get set (FieldDescr name showF parseF)
  191  = FieldDescr name (showF . get)
  192         (\line str b -> do
  193             a <- parseF line str (get b)
  194             return (set a b))
  195 
  196 -- Parser combinator for simple fields.  Takes a field name, a pretty printer,
  197 -- a parser function, an accessor, and a setter, returns a FieldDescr over the
  198 -- compoid structure.
  199 simpleField :: String -> (a -> Doc) -> ReadP a a
  200             -> (b -> a) -> (a -> b -> b) -> FieldDescr b
  201 simpleField name showF readF get set
  202   = liftField get set $ field name showF readF
  203 
  204 simpleFieldParsec :: String -> (a -> Doc) -> ParsecParser a
  205             -> (b -> a) -> (a -> b -> b) -> FieldDescr b
  206 simpleFieldParsec name showF readF get set
  207   = liftField get set $ fieldParsec name showF readF
  208 
  209 commaListFieldWithSepParsec :: Separator -> String -> (a -> Doc) -> ParsecParser a
  210                       -> (b -> [a]) -> ([a] -> b -> b) -> FieldDescr b
  211 commaListFieldWithSepParsec separator name showF readF get set =
  212    liftField get set' $
  213      fieldParsec name showF' (parsecLeadingCommaList readF)
  214    where
  215      set' xs b = set (get b ++ xs) b
  216      showF'    = separator . punctuate comma . map showF
  217 
  218 commaListFieldParsec :: String -> (a -> Doc) -> ParsecParser a
  219                  -> (b -> [a]) -> ([a] -> b -> b) -> FieldDescr b
  220 commaListFieldParsec = commaListFieldWithSepParsec fsep
  221 
  222 commaNewLineListFieldParsec
  223     :: String -> (a -> Doc) ->  ParsecParser a
  224     -> (b -> [a]) -> ([a] -> b -> b) -> FieldDescr b
  225 commaNewLineListFieldParsec = commaListFieldWithSepParsec sep
  226 
  227 spaceListField :: String -> (a -> Doc) -> ReadP [a] a
  228                  -> (b -> [a]) -> ([a] -> b -> b) -> FieldDescr b
  229 spaceListField name showF readF get set =
  230   liftField get set' $
  231     field name showF' (parseSpaceList readF)
  232   where
  233     set' xs b = set (get b ++ xs) b
  234     showF'    = fsep . map showF
  235 
  236 -- this is a different definition from listField, like
  237 -- commaNewLineListField it pretty prints on multiple lines
  238 newLineListField :: String -> (a -> Doc) -> ReadP [a] a
  239                  -> (b -> [a]) -> ([a] -> b -> b) -> FieldDescr b
  240 newLineListField = listFieldWithSep sep
  241 
  242 listFieldWithSep :: Separator -> String -> (a -> Doc) -> ReadP [a] a
  243                  -> (b -> [a]) -> ([a] -> b -> b) -> FieldDescr b
  244 listFieldWithSep separator name showF readF get set =
  245   liftField get set' $
  246     field name showF' (parseOptCommaList readF)
  247   where
  248     set' xs b = set (get b ++ xs) b
  249     showF'    = separator . map showF
  250 
  251 listFieldWithSepParsec :: Separator -> String -> (a -> Doc) -> ParsecParser a
  252                  -> (b -> [a]) -> ([a] -> b -> b) -> FieldDescr b
  253 listFieldWithSepParsec separator name showF readF get set =
  254   liftField get set' $
  255     fieldParsec name showF' (parsecLeadingOptCommaList readF)
  256   where
  257     set' xs b = set (get b ++ xs) b
  258     showF'    = separator . map showF
  259 
  260 listField :: String -> (a -> Doc) -> ReadP [a] a
  261           -> (b -> [a]) -> ([a] -> b -> b) -> FieldDescr b
  262 listField = listFieldWithSep fsep
  263 
  264 listFieldParsec
  265     :: String -> (a -> Doc) -> ParsecParser a
  266     -> (b -> [a]) -> ([a] -> b -> b) -> FieldDescr b
  267 listFieldParsec = listFieldWithSepParsec fsep
  268 
  269 -- | The type of a function which, given a name-value pair of an
  270 --   unrecognized field, and the current structure being built,
  271 --   decides whether to incorporate the unrecognized field
  272 --   (by returning  Just x, where x is a possibly modified version
  273 --   of the structure being built), or not (by returning Nothing).
  274 type UnrecFieldParser a = (String,String) -> a -> Maybe a
  275 
  276 ------------------------------------------------------------------------------
  277 
  278 -- The data type for our three syntactic categories
  279 data Field
  280     = F LineNo String String
  281       -- ^ A regular @<property>: <value>@ field
  282     | Section LineNo String String [Field]
  283       -- ^ A section with a name and possible parameter.  The syntactic
  284       -- structure is:
  285       --
  286       -- @
  287       --   <sectionname> <arg> {
  288       --     <field>*
  289       --   }
  290       -- @
  291       deriving (Show
  292                ,Eq)   -- for testing
  293 
  294 lineNo :: Field -> LineNo
  295 lineNo (F n _ _) = n
  296 lineNo (Section n _ _ _) = n
  297 
  298 readFields :: BS.ByteString -> ParseResult [Field]
  299 readFields input = case Fields.readFields' input of
  300     Right (fs, ws) -> ParseOk
  301         [ PWarning msg | Fields.PWarning _ _ msg <- Fields.toPWarnings ws ]
  302         (legacyFields fs)
  303     Left perr      -> ParseFailed $ NoParse
  304         (PE.showErrorMessages
  305             "or" "unknown parse error" "expecting" "unexpected" "end of file"
  306             (PE.errorMessages perr))
  307         (PP.sourceLine pos)
  308       where
  309         pos = PE.errorPos perr
  310 
  311 legacyFields :: [Fields.Field Parsec.Position] -> [Field]
  312 legacyFields = map legacyField
  313 
  314 legacyField :: Fields.Field Parsec.Position -> Field
  315 legacyField (Fields.Field (Fields.Name pos name) fls) =
  316     F (posToLineNo pos) (fromUTF8BS name) (Fields.fieldLinesToString fls)
  317 legacyField (Fields.Section (Fields.Name pos name) args fs) =
  318     Section (posToLineNo pos) (fromUTF8BS name) (Fields.sectionArgsToString args) (legacyFields fs)
  319 
  320 posToLineNo :: Parsec.Position -> LineNo
  321 posToLineNo (Parsec.Position row _) = row
  322 
  323 ------------------------------------------------------------------------------
  324 
  325 -- urgh, we can't define optQuotes :: ReadP r a -> ReadP r a
  326 -- because the "compat" version of ReadP isn't quite powerful enough.  In
  327 -- particular, the type of <++ is ReadP r r -> ReadP r a -> ReadP r a
  328 -- Hence the trick above to make 'lic' polymorphic.
  329 
  330 -- Different than the naive version. it turns out Read instance for String accepts
  331 -- the ['a', 'b'] syntax, which we do not want. In particular it messes
  332 -- up any token starting with [].
  333 parseHaskellString :: ReadP r String
  334 parseHaskellString =
  335   readS_to_P $
  336     Read.readPrec_to_S (do Read.String s <- Read.lexP; return s) 0
  337 
  338 parseTokenQ :: ReadP r String
  339 parseTokenQ = parseHaskellString <++ munch1 (\x -> not (isSpace x) && x /= ',')
  340 
  341 parseSpaceList :: ReadP r a -- ^The parser for the stuff between commas
  342                -> ReadP r [a]
  343 parseSpaceList p = sepBy p skipSpaces
  344 
  345 -- This version avoid parse ambiguity for list element parsers
  346 -- that have multiple valid parses of prefixes.
  347 parseOptCommaList :: ReadP r a -> ReadP r [a]
  348 parseOptCommaList p = sepBy p localSep
  349   where
  350     -- The separator must not be empty or it introduces ambiguity
  351     localSep = (skipSpaces >> char ',' >> skipSpaces)
  352       +++ (satisfy isSpace >> skipSpaces)
  353 
  354 readPToMaybe :: ReadP a a -> String -> Maybe a
  355 readPToMaybe p str = listToMaybe [ r | (r,s) <- readP_to_S p str
  356                                      , all isSpace s ]