Skip to content

WIP: Optional bootstrap #174

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -17,8 +17,8 @@ env:
before_install:
- sudo add-apt-repository -y ppa:hvr/ghc
- sudo apt-get update
- sudo apt-get install alex-3.1.7 happy-1.19.5 cabal-install-3.4 ghc-$GHCVER
- export PATH=/opt/cabal/3.4/bin:/opt/ghc/$GHCVER/bin:/opt/alex/3.1.7/bin:/opt/happy/1.19.5/bin:$PATH
- sudo apt-get install happy-1.19.5 cabal-install-3.4 ghc-$GHCVER
- export PATH=/opt/cabal/3.4/bin:/opt/ghc/$GHCVER/bin:/opt/happy/1.19.5/bin:$PATH

install:
- cabal update
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
## Unreleased

* No longer include pre-generated Parser.hs in the Hackage upload, as
Happy's new ability to bootstrap from parser combinators is a better
way to make building easy and less trustful.

* Allow arbitary repetitions in regexps.
Previously, the `r{n,m}` and related forms were restricted to single
digit numbers `n` and `m`.
13 changes: 8 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
@@ -18,10 +18,6 @@ sdist ::
echo "Error: Tree is not clean"; \
exit 1; \
fi
$(HAPPY) $(HAPPY_OPTS) src/Parser.y -o src/Parser.hs
$(ALEX) $(ALEX_OPTS) src/Scan.x -o src/Scan.hs
mv src/Parser.y src/Parser.y.boot
mv src/Scan.x src/Scan.x.boot
$(CABAL) v2-run gen-alex-sdist
$(CABAL) v2-sdist
@if [ ! -f "${SDIST_DIR}/alex-$(ALEX_VER).tar.gz" ]; then \
@@ -42,7 +38,14 @@ sdist-test-only ::
rm -rf "${SDIST_DIR}/alex-$(ALEX_VER)/"
tar -xf "${SDIST_DIR}/alex-$(ALEX_VER).tar.gz" -C ${SDIST_DIR}/
echo "packages: ." > "${SDIST_DIR}/alex-$(ALEX_VER)/cabal.project"
cd "${SDIST_DIR}/alex-$(ALEX_VER)/" && cabal v2-test --enable-tests all
echo "tests: True" >> "${SDIST_DIR}/alex-$(ALEX_VER)/cabal.project"
cd "${SDIST_DIR}/alex-$(ALEX_VER)/" \
&& cabal v2-build all --flag -bootstrap \
&& cabal v2-install --flag -bootstrap --installdir="./bootstrap-root" \
&& cabal v2-test all -j --flag -bootstrap \
&& export PATH=./bootstrap-root:$$PATH \
&& cabal v2-build all --flag +bootstrap \
&& cabal v2-test all -j --flag +bootstrap
@echo ""
@echo "Success! ${SDIST_DIR}/alex-$(ALEX_VER).tar.gz is ready for distribution!"
@echo ""
28 changes: 26 additions & 2 deletions alex.cabal
Original file line number Diff line number Diff line change
@@ -91,8 +91,6 @@ extra-source-files:
examples/words.x
examples/words_monad.x
examples/words_posn.x
src/Parser.y.boot
src/Scan.x.boot
src/ghc_hooks.c
templates/GenericTemplate.hs
templates/wrappers.hs
@@ -125,6 +123,11 @@ extra-source-files:
tests/issue_119.x
tests/issue_141.x

flag bootstrap
description: Optimize the implementation of happy using a pre-built alex
manual: True
default: False

source-repository head
type: git
location: https://github.com/simonmar/alex.git
@@ -137,6 +140,11 @@ executable alex
, array
, containers
, directory
-- min bounds for Except and ExceptT
, transformers >=0.4 && <0.6
, mtl >= 2.2.1

build-tools: happy

default-language: Haskell98
default-extensions: CPP
@@ -157,16 +165,32 @@ executable alex
Paths_alex
Parser
ParseMonad
ParseMonad.Class
Scan
Set
Sort
Token
Util
UTF8
Data.Ranged
Data.Ranged.Boundaries
Data.Ranged.RangedSet
Data.Ranged.Ranges

if flag(bootstrap)
-- TODO put this back when Cabal can use it's qualified goals to better
-- understand bootstrapping, see
-- https://github.com/haskell/cabal/issues/7189
--build-tools: alex
cpp-options: -DALEX_BOOTSTRAP
other-modules:
ParseMonad.Bootstrapped
Scan.Bootstrapped
else
other-modules:
ParseMonad.Oracle
Scan.Oracle

test-suite tests
type: exitcode-stdio-1.0
main-is: test.hs
3 changes: 1 addition & 2 deletions src/Main.hs
Original file line number Diff line number Diff line change
@@ -18,9 +18,8 @@ import Info
import Map ( Map )
import qualified Map hiding ( Map )
import Output
import ParseMonad ( runP, Warning(..) )
import ParseMonad ( runP, AlexPosn(..), Warning(..) )
import Parser
import Scan
import Util ( hline )
import Paths_alex ( version, getDataDir )

176 changes: 56 additions & 120 deletions src/ParseMonad.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralisedNewtypeDeriving #-}
-- -----------------------------------------------------------------------------
--
-- ParseMonad.hs, part of Alex
@@ -7,182 +10,115 @@
-- ----------------------------------------------------------------------------}

module ParseMonad (
AlexInput, alexInputPrevChar, alexGetChar, alexGetByte,
AlexPosn(..), alexStartPos,
module ParseMonad.Class,
Warning(..), warnIfNullable,
P, runP, StartCode, failP, lookupSMac, lookupRMac, newSMac, newRMac,
setStartCode, getStartCode, getInput, setInput,
P, P', PBase, runP, raiseP, failP,
lookupSMac, lookupRMac, newSMac, newRMac,
) where

import AbsSyn hiding ( StartCode )
import CharSet ( CharSet )
import Map ( Map )
import qualified Map hiding ( Map )
import UTF8
import ParseMonad.Class

#if __GLASGOW_HASKELL__ < 710
import Control.Applicative ( Applicative(..) )
#endif
import Control.Monad ( liftM, ap, when )
import Data.Word (Word8)
-- -----------------------------------------------------------------------------
-- The input type
--import Codec.Binary.UTF8.Light as UTF8

type Byte = Word8

type AlexInput = (AlexPosn, -- current position,
Char, -- previous char
[Byte],
String) -- current input string

alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (_,c,_,_) = c


alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
alexGetChar (_,_,[],[]) = Nothing
alexGetChar (p,_,[],(c:s)) = let p' = alexMove p c in p' `seq`
Just (c, (p', c, [], s))
alexGetChar (_, _ ,_ : _, _) = undefined -- hide compiler warning

alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)
alexGetByte (p,c,(b:bs),s) = Just (b,(p,c,bs,s))
alexGetByte (_,_,[],[]) = Nothing
alexGetByte (p,_,[],(c:s)) = let p' = alexMove p c
(b:bs) = UTF8.encode c
in p' `seq` Just (b, (p', c, bs, s))

-- -----------------------------------------------------------------------------
-- Token positions

-- `Posn' records the location of a token in the input text. It has three
-- fields: the address (number of charaters preceding the token), line number
-- and column of a token within the file. `start_pos' gives the position of the
-- start of the file and `eof_pos' a standard encoding for the end of file.
-- `move_pos' calculates the new position after traversing a given character,
-- assuming the usual eight character tab stops.

data AlexPosn = AlexPn !Int !Int !Int
deriving (Eq,Show)
import Control.Applicative
import Control.Monad.State ( StateT(..), get, modify )
import Control.Monad.Trans ( MonadTrans, lift )
import Control.Monad ( MonadPlus, when )

alexStartPos :: AlexPosn
alexStartPos = AlexPn 0 1 1

alexMove :: AlexPosn -> Char -> AlexPosn
alexMove (AlexPn a l c) '\t' = AlexPn (a+1) l (((c+7) `div` 8)*8+1)
alexMove (AlexPn a l _) '\n' = AlexPn (a+1) (l+1) 1
alexMove (AlexPn a l c) _ = AlexPn (a+1) l (c+1)
#if ALEX_BOOTSTRAP
import ParseMonad.Bootstrapped (PBase)
#else
import ParseMonad.Oracle (PBase)
#endif

-- -----------------------------------------------------------------------------
-- Alex lexing/parsing monad
-- Alex parsing monad transformerx

data Warning
= WarnNullableRExp
{ _warnPos :: AlexPosn -- ^ The position of the code following the regex.
, _warnText :: String -- ^ Warning text.
}

type ParseError = (Maybe AlexPosn, String)
type StartCode = Int

data PState = PState
{ warnings :: [Warning] -- ^ Stack of warnings, top = last warning.
, smac_env :: Map String CharSet
, rmac_env :: Map String RExp
, startcode :: Int
, input :: AlexInput
}

newtype P a = P { unP :: PState -> Either ParseError (PState,a) }
newtype P' m a = P { unP :: StateT PState m a }
deriving (Functor, Applicative, Alternative, Monad, MonadTrans, MonadPlus)

instance Functor P where
fmap = liftM

instance Applicative P where
pure a = P $ \env -> Right (env,a)
(<*>) = ap

instance Monad P where
(P m) >>= k = P $ \env -> case m env of
Left err -> Left err
Right (env',ok) -> unP (k ok) env'
return = pure
type P = P' PBase

-- | Run the parser on given input.
runP :: String
runP :: MonadBasicParse m
=> String
-- ^ Input string.
-> (Map String CharSet, Map String RExp)
-- ^ Character set and regex definitions.
-> P a
-> P' m a
-- ^ Parsing computation.
-> Either ParseError ([Warning], a)
-- ^ List of warnings in first-to-last order, result.
runP str (senv,renv) (P p)
= case p initial_state of
Left err -> Left err
Right (s, a) -> Right (reverse (warnings s), a)
runP str (senv,renv) p = runPBase str $ do
(a, s) <- runStateT (unP p) initial_state
return (reverse (warnings s), a)
where
initial_state = PState
{ warnings = []
, smac_env = senv
, rmac_env = renv
, startcode = 0
, input = (alexStartPos, '\n', [], str)
}

failP :: String -> P a
failP str = P $ \PState{ input = (p,_,_,_) } -> Left (Just p,str)
raiseP :: MonadBasicParse m => ParseError -> P' m a
raiseP = P . lift . raisePBase

failP :: MonadBasicParse m => String -> P' m a
failP = P . lift . failPBase

-- Macros are expanded during parsing, to simplify the abstract
-- syntax. The parsing monad passes around two environments mapping
-- macro names to sets and regexps respectively.

lookupSMac :: (AlexPosn,String) -> P CharSet
lookupSMac (posn,smac)
= P $ \s@PState{ smac_env = senv } ->
case Map.lookup smac senv of
Just ok -> Right (s,ok)
Nothing -> Left (Just posn, "unknown set macro: $" ++ smac)

lookupRMac :: String -> P RExp
lookupRMac rmac
= P $ \s@PState{ rmac_env = renv } ->
case Map.lookup rmac renv of
Just ok -> Right (s,ok)
Nothing -> Left (Nothing, "unknown regex macro: %" ++ rmac)

newSMac :: String -> CharSet -> P ()
lookupSMac :: MonadBasicParse m => (AlexPosn,String) -> P' m CharSet
lookupSMac (posn, smac) = do
PState{ smac_env = senv } <- P get
case Map.lookup smac senv of
Just ok -> return ok
Nothing -> raiseP (Just posn, "unknown set macro: $" ++ smac)

lookupRMac :: MonadBasicParse m => String -> P' m RExp
lookupRMac rmac = do
PState{ rmac_env = renv } <- P get
case Map.lookup rmac renv of
Just ok -> return ok
Nothing -> raiseP (Nothing, "unknown regex macro: %" ++ rmac)

newSMac :: Monad m => String -> CharSet -> P' m ()
newSMac smac set
= P $ \s -> Right (s{smac_env = Map.insert smac set (smac_env s)}, ())
= P $ modify $ \s -> s { smac_env = Map.insert smac set (smac_env s) }

newRMac :: String -> RExp -> P ()
newRMac :: Monad m => String -> RExp -> P' m ()
newRMac rmac rexp
= P $ \s -> Right (s{rmac_env = Map.insert rmac rexp (rmac_env s)}, ())

setStartCode :: StartCode -> P ()
setStartCode sc = P $ \s -> Right (s{ startcode = sc }, ())

getStartCode :: P StartCode
getStartCode = P $ \s -> Right (s, startcode s)

getInput :: P AlexInput
getInput = P $ \s -> Right (s, input s)

setInput :: AlexInput -> P ()
setInput inp = P $ \s -> Right (s{ input = inp }, ())
= P $ modify $ \s -> s { rmac_env = Map.insert rmac rexp (rmac_env s) }

-- | Add a warning if given regular expression is nullable
-- unless the user wrote the regex 'Eps'.
warnIfNullable
:: RExp -- ^ Regular expression.
:: Monad m
=> RExp -- ^ Regular expression.
-> AlexPosn -- ^ Position associated to regular expression.
-> P ()
-> P' m ()
-- If the user wrote @()@, they wanted to match the empty sequence!
-- Thus, skip the warning then.
warnIfNullable Eps _ = return ()
warnIfNullable r pos = when (nullable r) $ P $ \ s ->
Right (s{ warnings = WarnNullableRExp pos w : warnings s}, ())
warnIfNullable r pos = P $
when (nullable r) $ modify $ \ s -> s {
warnings = WarnNullableRExp pos w : warnings s
}
where
w = unwords
[ "Regular expression"
Loading