haskell · Ericson2314 · Dec 25, 2020 · Dec 25, 2020 · Dec 25, 2020 · Dec 25, 2020
diff --git a/.travis.yml b/.travis.yml
@@ -17,8 +17,8 @@ env:
 before_install:
  - sudo add-apt-repository -y ppa:hvr/ghc
  - sudo apt-get update
- - sudo apt-get install alex-3.1.7 happy-1.19.5 cabal-install-3.4 ghc-$GHCVER
- - export PATH=/opt/cabal/3.4/bin:/opt/ghc/$GHCVER/bin:/opt/alex/3.1.7/bin:/opt/happy/1.19.5/bin:$PATH
+ - sudo apt-get install happy-1.19.5 cabal-install-3.4 ghc-$GHCVER
+ - export PATH=/opt/cabal/3.4/bin:/opt/ghc/$GHCVER/bin:/opt/happy/1.19.5/bin:$PATH
 
 install:
  - cabal update

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,9 @@
 ## Unreleased
 
+ * No longer include pre-generated Parser.hs in the Hackage upload, as
+   Happy's new ability to bootstrap from parser combinators is a better
+   way to make building easy and less trustful.
+
  * Allow arbitary repetitions in regexps.
    Previously, the `r{n,m}` and related forms were restricted to single
    digit numbers `n` and `m`.

diff --git a/Makefile b/Makefile
@@ -18,10 +18,6 @@ sdist ::
 		echo "Error: Tree is not clean"; \
 		exit 1; \
 	fi
-	$(HAPPY) $(HAPPY_OPTS) src/Parser.y -o src/Parser.hs
-	$(ALEX) $(ALEX_OPTS) src/Scan.x -o src/Scan.hs
-	mv src/Parser.y src/Parser.y.boot
-	mv src/Scan.x src/Scan.x.boot
 	$(CABAL) v2-run gen-alex-sdist
 	$(CABAL) v2-sdist
 	@if [ ! -f "${SDIST_DIR}/alex-$(ALEX_VER).tar.gz" ]; then \
@@ -42,7 +38,14 @@ sdist-test-only ::
 	rm -rf "${SDIST_DIR}/alex-$(ALEX_VER)/"
 	tar -xf "${SDIST_DIR}/alex-$(ALEX_VER).tar.gz" -C ${SDIST_DIR}/
 	echo "packages: ." > "${SDIST_DIR}/alex-$(ALEX_VER)/cabal.project"
-	cd "${SDIST_DIR}/alex-$(ALEX_VER)/" && cabal v2-test --enable-tests all
+	echo "tests: True" >> "${SDIST_DIR}/alex-$(ALEX_VER)/cabal.project"
+	cd "${SDIST_DIR}/alex-$(ALEX_VER)/" \
+		&& cabal v2-build all --flag -bootstrap \
+		&& cabal v2-install --flag -bootstrap --installdir="./bootstrap-root" \
+		&& cabal v2-test all -j --flag -bootstrap \
+		&& export PATH=./bootstrap-root:$$PATH \
+		&& cabal v2-build all --flag +bootstrap \
+		&& cabal v2-test all -j --flag +bootstrap
 	@echo ""
 	@echo "Success! ${SDIST_DIR}/alex-$(ALEX_VER).tar.gz is ready for distribution!"
 	@echo ""
diff --git a/alex.cabal b/alex.cabal
@@ -91,8 +91,6 @@ extra-source-files:
         examples/words.x
         examples/words_monad.x
         examples/words_posn.x
-        src/Parser.y.boot
-        src/Scan.x.boot
         src/ghc_hooks.c
         templates/GenericTemplate.hs
         templates/wrappers.hs
@@ -125,6 +123,11 @@ extra-source-files:
         tests/issue_119.x
         tests/issue_141.x
 
+flag bootstrap
+  description: Optimize the implementation of happy using a pre-built alex
+  manual: True
+  default: False
+
 source-repository head
     type:     git
     location: https://github.com/simonmar/alex.git
@@ -137,6 +140,11 @@ executable alex
                , array
                , containers
                , directory
+               -- min bounds for Except and ExceptT
+               , transformers >=0.4 && <0.6
+               , mtl >= 2.2.1
+
+  build-tools: happy
 
   default-language: Haskell98
   default-extensions: CPP
@@ -157,16 +165,32 @@ executable alex
         Paths_alex
         Parser
         ParseMonad
+        ParseMonad.Class
         Scan
         Set
         Sort
+        Token
         Util
         UTF8
         Data.Ranged
         Data.Ranged.Boundaries
         Data.Ranged.RangedSet
         Data.Ranged.Ranges
 
+  if flag(bootstrap)
+    -- TODO put this back when Cabal can use it's qualified goals to better
+    -- understand bootstrapping, see
+    -- https://github.com/haskell/cabal/issues/7189
+    --build-tools: alex
+    cpp-options: -DALEX_BOOTSTRAP
+    other-modules:
+      ParseMonad.Bootstrapped
+      Scan.Bootstrapped
+  else
+    other-modules:
+      ParseMonad.Oracle
+      Scan.Oracle
+
 test-suite tests
   type: exitcode-stdio-1.0
   main-is: test.hs

diff --git a/src/Main.hs b/src/Main.hs
@@ -18,9 +18,8 @@ import Info
 import Map ( Map )
 import qualified Map hiding ( Map )
 import Output
-import ParseMonad ( runP, Warning(..) )
+import ParseMonad ( runP, AlexPosn(..), Warning(..) )
 import Parser
-import Scan
 import Util ( hline )
 import Paths_alex ( version, getDataDir )
 

diff --git a/src/ParseMonad.hs b/src/ParseMonad.hs
@@ -1,3 +1,6 @@
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE GeneralisedNewtypeDeriving #-}
 -- -----------------------------------------------------------------------------
 --
 -- ParseMonad.hs, part of Alex
@@ -7,182 +10,115 @@
 -- ----------------------------------------------------------------------------}
 
 module ParseMonad (
-        AlexInput, alexInputPrevChar, alexGetChar, alexGetByte,
-        AlexPosn(..), alexStartPos,
+        module ParseMonad.Class,
         Warning(..), warnIfNullable,
-        P, runP, StartCode, failP, lookupSMac, lookupRMac, newSMac, newRMac,
-        setStartCode, getStartCode, getInput, setInput,
+        P, P', PBase, runP, raiseP, failP,
+        lookupSMac, lookupRMac, newSMac, newRMac,
  ) where
 
 import AbsSyn hiding ( StartCode )
 import CharSet ( CharSet )
 import Map ( Map )
 import qualified Map hiding ( Map )
-import UTF8
+import ParseMonad.Class
 
-#if __GLASGOW_HASKELL__ < 710
-import Control.Applicative ( Applicative(..) )
-#endif
-import Control.Monad ( liftM, ap, when )
-import Data.Word (Word8)
--- -----------------------------------------------------------------------------
--- The input type
---import Codec.Binary.UTF8.Light as UTF8
-
-type Byte = Word8
-
-type AlexInput = (AlexPosn,     -- current position,
-                  Char,         -- previous char
-                  [Byte],
-                  String)       -- current input string
-
-alexInputPrevChar :: AlexInput -> Char
-alexInputPrevChar (_,c,_,_) = c
-
-
-alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
-alexGetChar (_,_,[],[]) = Nothing
-alexGetChar (p,_,[],(c:s))  = let p' = alexMove p c in p' `seq`
-                                Just (c, (p', c, [], s))
-alexGetChar (_, _ ,_ : _, _) = undefined -- hide compiler warning
-
-alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)
-alexGetByte (p,c,(b:bs),s) = Just (b,(p,c,bs,s))
-alexGetByte (_,_,[],[]) = Nothing
-alexGetByte (p,_,[],(c:s))  = let p' = alexMove p c
-                                  (b:bs) = UTF8.encode c
-                              in p' `seq`  Just (b, (p', c, bs, s))
-
--- -----------------------------------------------------------------------------
--- Token positions
-
--- `Posn' records the location of a token in the input text.  It has three
--- fields: the address (number of charaters preceding the token), line number
--- and column of a token within the file. `start_pos' gives the position of the
--- start of the file and `eof_pos' a standard encoding for the end of file.
--- `move_pos' calculates the new position after traversing a given character,
--- assuming the usual eight character tab stops.
-
-data AlexPosn = AlexPn !Int !Int !Int
-        deriving (Eq,Show)
+import Control.Applicative
+import Control.Monad.State ( StateT(..), get, modify )
+import Control.Monad.Trans ( MonadTrans, lift )
+import Control.Monad ( MonadPlus, when )
 
-alexStartPos :: AlexPosn
-alexStartPos = AlexPn 0 1 1
-
-alexMove :: AlexPosn -> Char -> AlexPosn
-alexMove (AlexPn a l c) '\t' = AlexPn (a+1)  l     (((c+7) `div` 8)*8+1)
-alexMove (AlexPn a l _) '\n' = AlexPn (a+1) (l+1)   1
-alexMove (AlexPn a l c) _    = AlexPn (a+1)  l     (c+1)
+#if ALEX_BOOTSTRAP
+import ParseMonad.Bootstrapped (PBase)
+#else
+import ParseMonad.Oracle (PBase)
+#endif
 
 -- -----------------------------------------------------------------------------
--- Alex lexing/parsing monad
+-- Alex parsing monad transformerx
 
 data Warning
   = WarnNullableRExp
     { _warnPos  :: AlexPosn  -- ^ The position of the code following the regex.
     , _warnText :: String    -- ^ Warning text.
     }
 
-type ParseError = (Maybe AlexPosn, String)
-type StartCode = Int
-
 data PState = PState
   { warnings  :: [Warning]           -- ^ Stack of warnings, top = last warning.
   , smac_env  :: Map String CharSet
   , rmac_env  :: Map String RExp
-  , startcode :: Int
-  , input     :: AlexInput
   }
 
-newtype P a = P { unP :: PState -> Either ParseError (PState,a) }
+newtype P' m a = P { unP :: StateT PState m a }
+  deriving (Functor, Applicative, Alternative, Monad, MonadTrans, MonadPlus)
 
-instance Functor P where
-  fmap = liftM
-
-instance Applicative P where
-  pure a = P $ \env -> Right (env,a)
-  (<*>) = ap
-
-instance Monad P where
- (P m) >>= k = P $ \env -> case m env of
-                        Left err -> Left err
-                        Right (env',ok) -> unP (k ok) env'
- return = pure
+type P = P' PBase
 
 -- | Run the parser on given input.
-runP :: String
+runP :: MonadBasicParse m
+     => String
           -- ^ Input string.
      -> (Map String CharSet, Map String RExp)
           -- ^ Character set and regex definitions.
-     -> P a
+     -> P' m a
           -- ^ Parsing computation.
      -> Either ParseError ([Warning], a)
           -- ^ List of warnings in first-to-last order, result.
-runP str (senv,renv) (P p)
-  = case p initial_state of
-        Left err -> Left err
-        Right (s, a) -> Right (reverse (warnings s), a)
+runP str (senv,renv) p = runPBase str $ do
+  (a, s) <- runStateT (unP p) initial_state
+  return (reverse (warnings s), a)
   where
   initial_state = PState
     { warnings  = []
     , smac_env  = senv
     , rmac_env  = renv
-    , startcode = 0
-    , input     = (alexStartPos, '\n', [], str)
     }
 
-failP :: String -> P a
-failP str = P $ \PState{ input = (p,_,_,_) } -> Left (Just p,str)
+raiseP :: MonadBasicParse m => ParseError -> P' m a
+raiseP = P . lift . raisePBase
+
+failP :: MonadBasicParse m => String -> P' m a
+failP = P . lift . failPBase
 
 -- Macros are expanded during parsing, to simplify the abstract
 -- syntax.  The parsing monad passes around two environments mapping
 -- macro names to sets and regexps respectively.
 
-lookupSMac :: (AlexPosn,String) -> P CharSet
-lookupSMac (posn,smac)
- = P $ \s@PState{ smac_env = senv } ->
-       case Map.lookup smac senv of
-        Just ok -> Right (s,ok)
-        Nothing -> Left (Just posn, "unknown set macro: $" ++ smac)
-
-lookupRMac :: String -> P RExp
-lookupRMac rmac
- = P $ \s@PState{ rmac_env = renv } ->
-       case Map.lookup rmac renv of
-        Just ok -> Right (s,ok)
-        Nothing -> Left (Nothing, "unknown regex macro: %" ++ rmac)
-
-newSMac :: String -> CharSet -> P ()
+lookupSMac :: MonadBasicParse m => (AlexPosn,String) -> P' m CharSet
+lookupSMac (posn, smac) = do
+  PState{ smac_env = senv } <- P get
+  case Map.lookup smac senv of
+    Just ok -> return ok
+    Nothing -> raiseP (Just posn, "unknown set macro: $" ++ smac)
+
+lookupRMac :: MonadBasicParse m => String -> P' m RExp
+lookupRMac rmac = do
+  PState{ rmac_env = renv } <- P get
+  case Map.lookup rmac renv of
+    Just ok -> return ok
+    Nothing -> raiseP (Nothing, "unknown regex macro: %" ++ rmac)
+
+newSMac :: Monad m => String -> CharSet -> P' m ()
 newSMac smac set
-  = P $ \s -> Right (s{smac_env = Map.insert smac set (smac_env s)}, ())
+  = P $ modify $ \s -> s { smac_env = Map.insert smac set (smac_env s) }
 
-newRMac :: String -> RExp -> P ()
+newRMac :: Monad m => String -> RExp -> P' m ()
 newRMac rmac rexp
-  = P $ \s -> Right (s{rmac_env = Map.insert rmac rexp (rmac_env s)}, ())
-
-setStartCode :: StartCode -> P ()
-setStartCode sc = P $ \s -> Right (s{ startcode = sc }, ())
-
-getStartCode :: P StartCode
-getStartCode = P $ \s -> Right (s, startcode s)
-
-getInput :: P AlexInput
-getInput = P $ \s -> Right (s, input s)
-
-setInput :: AlexInput -> P ()
-setInput inp = P $ \s -> Right (s{ input = inp }, ())
+  = P $ modify $ \s -> s { rmac_env = Map.insert rmac rexp (rmac_env s) }
 
 -- | Add a warning if given regular expression is nullable
 --   unless the user wrote the regex 'Eps'.
 warnIfNullable
-  :: RExp       -- ^ Regular expression.
+  :: Monad m
+  => RExp       -- ^ Regular expression.
   -> AlexPosn   -- ^ Position associated to regular expression.
-  -> P ()
+  -> P' m ()
 -- If the user wrote @()@, they wanted to match the empty sequence!
 -- Thus, skip the warning then.
 warnIfNullable Eps _ = return ()
-warnIfNullable r pos = when (nullable r) $ P $ \ s ->
-  Right (s{ warnings = WarnNullableRExp pos w : warnings s}, ())
+warnIfNullable r pos = P $
+  when (nullable r) $ modify $ \ s -> s {
+      warnings = WarnNullableRExp pos w : warnings s
+    }
   where
   w = unwords
       [ "Regular expression"