From 232ee4f37f0e50a807568f4e921dea48825aeb53 Mon Sep 17 00:00:00 2001 From: Vladislav Zavialov Date: Sat, 15 Aug 2020 12:25:21 +0300 Subject: [PATCH 01/12] No bootstrapping Before this patch, building 'happy' required a pre-built binary of 'happy'. This was elegant in the same way a self-hosting compiler is elegant. But it also made building purely from source more complicated than needed. This patch introduces a small, bespoke parsing library, and applies it for parsing .y and .ly files. Now 'happy' doesn't depend on itself, and can be built using just GHC. --- Makefile | 15 +-- src/AttrGrammar.lhs | 38 ++++--- src/AttrGrammarParser.hs | 76 ++++++++++++++ src/AttrGrammarParser.ly | 68 ------------- src/Grammar.lhs | 2 +- src/Lexer.lhs | 98 +++++++++--------- src/Main.lhs | 2 +- src/ParseMonad.hs | 107 ++++++++++++++++++-- src/Parser.hs | 208 +++++++++++++++++++++++++++++++++++++++ src/Parser.ly | 150 ---------------------------- 10 files changed, 451 insertions(+), 313 deletions(-) create mode 100644 src/AttrGrammarParser.hs delete mode 100644 src/AttrGrammarParser.ly create mode 100644 src/Parser.hs delete mode 100644 src/Parser.ly diff --git a/Makefile b/Makefile index e8028bed..9f45db2d 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,5 @@ CABAL = cabal -HAPPY = happy -HAPPY_OPTS = -agc HAPPY_VER = `awk '/^version:/ { print $$2 }' happy.cabal` ALEX = alex @@ -9,13 +7,6 @@ ALEX_OPTS = -g SDIST_DIR=dist-newstyle/sdist -GEN = src/gen/Parser.hs src/gen/AttrGrammarParser.hs - -all : $(GEN) - -src/gen/%.hs : src/boot/%.ly - $(HAPPY) $(HAPPYFLAGS) $< -o $@ - sdist :: @case "`$(CABAL) --numeric-version`" in \ 2.[2-9].* | [3-9].* ) ;; \ @@ -25,11 +16,7 @@ sdist :: echo Tree is not clean; \ exit 1; \ fi - $(HAPPY) $(HAPPY_OPTS) src/Parser.ly -o src/Parser.hs - $(HAPPY) $(HAPPY_OPTS) src/AttrGrammarParser.ly -o src/AttrGrammarParser.hs - mv src/Parser.ly src/Parser.ly.boot - mv src/AttrGrammarParser.ly src/AttrGrammarParser.ly.boot - $(CABAL) v2-run gen-happy-sdist + $(CABAL) v2-run gen-happy-sdist cabal v2-sdist @if [ ! -f "${SDIST_DIR}/happy-$(HAPPY_VER).tar.gz" ]; then \ echo "Error: source tarball not found: dist/happy-$(HAPPY_VER).tar.gz"; \ diff --git a/src/AttrGrammar.lhs b/src/AttrGrammar.lhs index 466ce17c..378638c0 100644 --- a/src/AttrGrammar.lhs +++ b/src/AttrGrammar.lhs @@ -1,8 +1,8 @@ > module AttrGrammar > ( AgToken (..) > , AgRule (..) +> , HasLexer (..) > , agLexAll -> , agLexer > , subRefVal > , selfRefVal > , rightRefVal @@ -62,32 +62,30 @@ -- will wreck column alignment so attribute grammar specifications must -- not rely on layout. -> type Pfunc a = String -> Int -> ParseResult a - -> agLexAll :: P [AgToken] -> agLexAll = mkP $ aux [] +> agLexAll :: String -> Int -> ParseResult [AgToken] +> agLexAll = aux [] > where aux toks [] _ = Right (reverse toks) -> aux toks s l = agLexer' (\t -> aux (t:toks)) s l +> aux toks s l = agLexer (\t -> aux (t:toks)) s l -> agLexer :: (AgToken -> P a) -> P a -> agLexer m = mkP $ agLexer' (\x -> runP (m x)) +> instance HasLexer AgToken where +> lexToken = agLexer -> agLexer' :: (AgToken -> Pfunc a) -> Pfunc a -> agLexer' cont [] = cont AgTok_EOF [] -> agLexer' cont ('{':rest) = cont AgTok_LBrace rest -> agLexer' cont ('}':rest) = cont AgTok_RBrace rest -> agLexer' cont (';':rest) = cont AgTok_Semicolon rest -> agLexer' cont ('=':rest) = cont AgTok_Eq rest -> agLexer' cont ('w':'h':'e':'r':'e':rest) = cont AgTok_Where rest -> agLexer' cont ('$':'$':rest) = agLexAttribute cont (\a -> AgTok_SelfRef a) rest -> agLexer' cont ('$':'>':rest) = agLexAttribute cont (\a -> AgTok_RightmostRef a) rest -> agLexer' cont s@('$':rest) = +> agLexer :: (AgToken -> Pfunc a) -> Pfunc a +> agLexer cont [] = cont AgTok_EOF [] +> agLexer cont ('{':rest) = cont AgTok_LBrace rest +> agLexer cont ('}':rest) = cont AgTok_RBrace rest +> agLexer cont (';':rest) = cont AgTok_Semicolon rest +> agLexer cont ('=':rest) = cont AgTok_Eq rest +> agLexer cont ('w':'h':'e':'r':'e':rest) = cont AgTok_Where rest +> agLexer cont ('$':'$':rest) = agLexAttribute cont (\a -> AgTok_SelfRef a) rest +> agLexer cont ('$':'>':rest) = agLexAttribute cont (\a -> AgTok_RightmostRef a) rest +> agLexer cont s@('$':rest) = > let (n,rest') = span isDigit rest > in if null n > then agLexUnknown cont s > else agLexAttribute cont (\a -> AgTok_SubRef (read n,a)) rest' -> agLexer' cont s@(c:rest) -> | isSpace c = agLexer' cont (dropWhile isSpace rest) +> agLexer cont s@(c:rest) +> | isSpace c = agLexer cont (dropWhile isSpace rest) > | otherwise = agLexUnknown cont s > agLexUnknown :: (AgToken -> Pfunc a) -> Pfunc a diff --git a/src/AttrGrammarParser.hs b/src/AttrGrammarParser.hs new file mode 100644 index 00000000..89128ef6 --- /dev/null +++ b/src/AttrGrammarParser.hs @@ -0,0 +1,76 @@ +-- This parser parses the contents of the attribute grammar +-- into a list of rules. A rule can either be an assignment +-- to an attribute of the LHS (synthesized attribute), and +-- assignment to an attribute of the RHS (an inherited attribute), +-- or a conditional statement. + +module AttrGrammarParser (agParser) where + +#if !MIN_VERSION_base(4,8,0) +import Control.Applicative +#endif + +import ParseMonad +import AttrGrammar + +type Parser = P AgToken + +agParser :: Parser [AgRule] +agParser = manySepByP isSemi optRuleP + +optRuleP :: Parser (Maybe AgRule) +optRuleP = withToken match where + match (AgTok_SelfRef v) = + Consume `andThenJust` + pure (SelfAssign v) <* eqP <*> codeP + match (AgTok_SubRef v) = + Consume `andThenJust` + pure (SubAssign v) <* eqP <*> codeP + match (AgTok_RightmostRef v) = + Consume `andThenJust` + pure (RightmostAssign v) <* eqP <*> codeP + match AgTok_Where = + Consume `andThenJust` + fmap Conditional codeP + match tok = PutBack tok `andReturn` Nothing + +eqP :: Parser AgToken +eqP = withToken match where + match tok@AgTok_Eq = Consume `andReturn` tok + match tok = PutBack tok `andThen` parseError "Expected '='" + +rBraceP :: Parser AgToken +rBraceP = withToken match where + match tok@AgTok_RBrace = Consume `andReturn` tok + match tok = PutBack tok `andThen` parseError "Expected '}'" + +codeP :: Parser [AgToken] +codeP = codeP' False + +codeP' :: Bool -> Parser [AgToken] +codeP' consume_semi = withToken match where + match tok@AgTok_LBrace = + Consume `andThen` do + c1 <- codeP' True + tok' <- rBraceP + c2 <- codeP' consume_semi + return $ [tok] ++ c1 ++ [tok'] ++ c2 + match tok = + let consume = Consume `andThen` do + c <- codeP' consume_semi + return (tok : c) + in case tok of + AgTok_Semicolon | consume_semi -> consume + AgTok_Eq -> consume + AgTok_SelfRef _ -> consume + AgTok_SubRef _ -> consume + AgTok_RightmostRef _ -> consume + AgTok_Unknown _ -> consume + _ -> PutBack tok `andReturn` [] + +isSemi :: AgToken -> Bool +isSemi AgTok_Semicolon = True +isSemi _ = False + +parseError :: String -> Parser a +parseError s = failP $ \l -> show l ++ ": " ++ s ++ "\n" diff --git a/src/AttrGrammarParser.ly b/src/AttrGrammarParser.ly deleted file mode 100644 index fc73b65b..00000000 --- a/src/AttrGrammarParser.ly +++ /dev/null @@ -1,68 +0,0 @@ -This parser parses the contents of the attribute grammar -into a list of rules. A rule can either be an assignment -to an attribute of the LHS (synthesized attribute), and -assignment to an attribute of the RHS (an inherited attribute), -or a conditional statement. - -> { -> {-# OPTIONS_GHC -w #-} -> module AttrGrammarParser (agParser) where -> import ParseMonad -> import AttrGrammar -> } - -> %name agParser -> %tokentype { AgToken } -> %token -> "{" { AgTok_LBrace } -> "}" { AgTok_RBrace } -> ";" { AgTok_Semicolon } -> "=" { AgTok_Eq } -> where { AgTok_Where } -> selfRef { AgTok_SelfRef _ } -> subRef { AgTok_SubRef _ } -> rightRef { AgTok_RightmostRef _ } -> unknown { AgTok_Unknown _ } -> -> %monad { P } -> %lexer { agLexer } { AgTok_EOF } - -> %% - -> agParser :: { [AgRule] } -> : rules { $1 } - -> rules :: { [AgRule] } -> : rule ";" rules { $1 : $3 } -> | rule { $1 : [] } -> | { [] } - -> rule :: { AgRule } -> : selfRef "=" code { SelfAssign (selfRefVal $1) $3 } -> | subRef "=" code { SubAssign (subRefVal $1) $3 } -> | rightRef "=" code { RightmostAssign (rightRefVal $1) $3 } -> | where code { Conditional $2 } - -> code :: { [AgToken] } -> : "{" code0 "}" code { [$1] ++ $2 ++ [$3] ++ $4 } -> | "=" code { $1 : $2 } -> | selfRef code { $1 : $2 } -> | subRef code { $1 : $2 } -> | rightRef code { $1 : $2 } -> | unknown code { $1 : $2 } -> | { [] } - -> code0 :: { [AgToken] } -> : "{" code0 "}" code0 { [$1] ++ $2 ++ [$3] ++ $4 } -> | "=" code0 { $1 : $2 } -> | ";" code0 { $1 : $2 } -> | selfRef code0 { $1 : $2 } -> | subRef code0 { $1 : $2 } -> | rightRef code { $1 : $2 } -> | unknown code0 { $1 : $2 } -> | { [] } - -> { -> happyError :: P a -> happyError = failP ("Parse error\n") -> } diff --git a/src/Grammar.lhs b/src/Grammar.lhs index d3ce625e..45cfa978 100644 --- a/src/Grammar.lhs +++ b/src/Grammar.lhs @@ -424,7 +424,7 @@ So is this. first we need to parse the body of the code block -> case runP agParser code 0 of +> case runP agParser (PS code 0 Nothing) of > Left msg -> do addErr ("error in attribute grammar rules: "++msg) > return ("",[]) > Right rules -> diff --git a/src/Lexer.lhs b/src/Lexer.lhs index 602dae31..162ac948 100644 --- a/src/Lexer.lhs +++ b/src/Lexer.lhs @@ -7,7 +7,7 @@ The lexer. > module Lexer ( > Token(..), > TokenId(..), -> lexer ) where +> HasLexer(..) ) where > import ParseMonad @@ -72,34 +72,34 @@ The lexer. ToDo: proper text instance here, for use in parser error messages. -> lexer :: (Token -> P a) -> P a -> lexer cont = mkP lexer' -> where lexer' "" = returnToken cont TokenEOF "" +> instance HasLexer Token where +> lexToken = lexer + +> lexer :: (Token -> Pfunc a) -> Pfunc a +> lexer cont = lexer' +> where lexer' "" = cont TokenEOF "" > lexer' ('-':'-':r) = lexer' (dropWhile (/= '\n') r) > lexer' ('{':'-':r) = \line -> lexNestedComment line lexer' r line > lexer' (c:rest) = nextLex cont c rest -> returnToken :: (t -> P a) -> t -> String -> Int -> ParseResult a -> returnToken cont tok = runP (cont tok) - -> nextLex :: (Token -> P a) -> Char -> String -> Int -> ParseResult a +> nextLex :: (Token -> Pfunc a) -> Char -> String -> Int -> ParseResult a > nextLex cont c = case c of -> '\n' -> \rest line -> returnToken lexer cont rest (line+1) +> '\n' -> \rest line -> lexer cont rest (line+1) > '%' -> lexPercent cont > ':' -> lexColon cont -> ';' -> returnToken cont (TokenKW TokSemiColon) +> ';' -> cont (TokenKW TokSemiColon) -> '|' -> returnToken cont (TokenKW TokBar) +> '|' -> cont (TokenKW TokBar) > '\'' -> lexChar cont > '"'{-"-}-> lexString cont > '{' -> lexCode cont -> '(' -> returnToken cont (TokenKW TokParenL) -> ')' -> returnToken cont (TokenKW TokParenR) -> ',' -> returnToken cont (TokenKW TokComma) +> '(' -> cont (TokenKW TokParenL) +> ')' -> cont (TokenKW TokParenR) +> ',' -> cont (TokenKW TokComma) > _ -> | isSpace c -> runP (lexer cont) +> | isSpace c -> lexer cont > | c >= 'a' && c <= 'z' > || c >= 'A' && c <= 'Z' -> lexId cont c > | isDigit c -> lexNum cont c @@ -108,69 +108,69 @@ ToDo: proper text instance here, for use in parser error messages. Percents come in two forms, in pairs, or followed by a special identifier. -> lexPercent :: (Token -> P a) -> [Char] -> Int -> ParseResult a +> lexPercent :: (Token -> Pfunc a) -> [Char] -> Int -> ParseResult a > lexPercent cont s = case s of -> '%':rest -> returnToken cont (TokenKW TokDoublePercent) rest +> '%':rest -> cont (TokenKW TokDoublePercent) rest > 't':'o':'k':'e':'n':'t':'y':'p':'e':rest -> -> returnToken cont (TokenKW TokSpecId_TokenType) rest +> cont (TokenKW TokSpecId_TokenType) rest > 't':'o':'k':'e':'n':rest -> -> returnToken cont (TokenKW TokSpecId_Token) rest +> cont (TokenKW TokSpecId_Token) rest > 'n':'a':'m':'e':rest -> -> returnToken cont (TokenKW TokSpecId_Name) rest +> cont (TokenKW TokSpecId_Name) rest > 'p':'a':'r':'t':'i':'a':'l':rest -> -> returnToken cont (TokenKW TokSpecId_Partial) rest +> cont (TokenKW TokSpecId_Partial) rest > 'i':'m':'p':'o':'r':'t':'e':'d':'i':'d':'e':'n':'t':'i':'t':'y':rest -> -> returnToken cont (TokenKW TokSpecId_ImportedIdentity) rest +> cont (TokenKW TokSpecId_ImportedIdentity) rest > 'm':'o':'n':'a':'d':rest -> -> returnToken cont (TokenKW TokSpecId_Monad) rest +> cont (TokenKW TokSpecId_Monad) rest > 'l':'e':'x':'e':'r':rest -> -> returnToken cont (TokenKW TokSpecId_Lexer) rest +> cont (TokenKW TokSpecId_Lexer) rest > 'n':'o':'n':'a':'s':'s':'o':'c':rest -> -> returnToken cont (TokenKW TokSpecId_Nonassoc) rest +> cont (TokenKW TokSpecId_Nonassoc) rest > 'l':'e':'f':'t':rest -> -> returnToken cont (TokenKW TokSpecId_Left) rest +> cont (TokenKW TokSpecId_Left) rest > 'r':'i':'g':'h':'t':rest -> -> returnToken cont (TokenKW TokSpecId_Right) rest +> cont (TokenKW TokSpecId_Right) rest > 'p':'r':'e':'c':rest -> -> returnToken cont (TokenKW TokSpecId_Prec) rest +> cont (TokenKW TokSpecId_Prec) rest > 's':'h':'i':'f':'t':rest -> -> returnToken cont (TokenKW TokSpecId_Shift) rest +> cont (TokenKW TokSpecId_Shift) rest > 'e':'x':'p':'e':'c':'t':rest -> -> returnToken cont (TokenKW TokSpecId_Expect) rest +> cont (TokenKW TokSpecId_Expect) rest > 'e':'r':'r':'o':'r':'h':'a':'n':'d':'l':'e':'r':'t':'y':'p':'e':rest -> -> returnToken cont (TokenKW TokSpecId_ErrorHandlerType) rest +> cont (TokenKW TokSpecId_ErrorHandlerType) rest > 'e':'r':'r':'o':'r':rest -> -> returnToken cont (TokenKW TokSpecId_Error) rest +> cont (TokenKW TokSpecId_Error) rest > 'a':'t':'t':'r':'i':'b':'u':'t':'e':'t':'y':'p':'e':rest -> -> returnToken cont (TokenKW TokSpecId_Attributetype) rest +> cont (TokenKW TokSpecId_Attributetype) rest > 'a':'t':'t':'r':'i':'b':'u':'t':'e':rest -> -> returnToken cont (TokenKW TokSpecId_Attribute) rest +> cont (TokenKW TokSpecId_Attribute) rest > _ -> lexError ("unrecognised directive: %" ++ > takeWhile (not.isSpace) s) s -> lexColon :: (Token -> P a) -> [Char] -> Int -> ParseResult a -> lexColon cont (':':rest) = returnToken cont (TokenKW TokDoubleColon) rest -> lexColon cont rest = returnToken cont (TokenKW TokColon) rest +> lexColon :: (Token -> Pfunc a) -> [Char] -> Int -> ParseResult a +> lexColon cont (':':rest) = cont (TokenKW TokDoubleColon) rest +> lexColon cont rest = cont (TokenKW TokColon) rest -> lexId :: (Token -> P a) -> Char -> String -> Int -> ParseResult a +> lexId :: (Token -> Pfunc a) -> Char -> String -> Int -> ParseResult a > lexId cont c rest = -> readId rest (\ ident rest' -> returnToken cont (TokenInfo (c:ident) TokId) rest') +> readId rest (\ ident rest' -> cont (TokenInfo (c:ident) TokId) rest') -> lexChar :: (Token -> P a) -> String -> Int -> ParseResult a +> lexChar :: (Token -> Pfunc a) -> String -> Int -> ParseResult a > lexChar cont rest = lexReadChar rest -> (\ ident -> returnToken cont (TokenInfo ("'" ++ ident ++ "'") TokId)) +> (\ ident -> cont (TokenInfo ("'" ++ ident ++ "'") TokId)) -> lexString :: (Token -> P a) -> String -> Int -> ParseResult a +> lexString :: (Token -> Pfunc a) -> String -> Int -> ParseResult a > lexString cont rest = lexReadString rest -> (\ ident -> returnToken cont (TokenInfo ("\"" ++ ident ++ "\"") TokId)) +> (\ ident -> cont (TokenInfo ("\"" ++ ident ++ "\"") TokId)) -> lexCode :: (Token -> P a) -> String -> Int -> ParseResult a +> lexCode :: (Token -> Pfunc a) -> String -> Int -> ParseResult a > lexCode cont rest = lexReadCode rest (0 :: Integer) "" cont -> lexNum :: (Token -> P a) -> Char -> String -> Int -> ParseResult a +> lexNum :: (Token -> Pfunc a) -> Char -> String -> Int -> ParseResult a > lexNum cont c rest = > readNum rest (\ num rest' -> -> returnToken cont (TokenNum (stringToInt (c:num)) TokNum) rest') +> cont (TokenNum (stringToInt (c:num)) TokNum) rest') > where stringToInt = foldl (\n c' -> digitToInt c' + 10*n) 0 > cleanupCode :: String -> String @@ -181,7 +181,7 @@ This has to match for @}@ that are {\em not} in strings. The code here is a bit tricky, but should work in most cases. > lexReadCode :: (Eq a, Num a) -> => String -> a -> String -> (Token -> P b) -> Int +> => String -> a -> String -> (Token -> Pfunc b) -> Int > -> ParseResult b > lexReadCode s n c = case s of > '\n':r -> \cont l -> lexReadCode r n ('\n':c) cont (l+1) @@ -189,7 +189,7 @@ here is a bit tricky, but should work in most cases. > '{' :r -> lexReadCode r (n+1) ('{':c) > > '}' :r -> | n == 0 -> \cont -> returnToken cont (TokenInfo ( +> | n == 0 -> \cont -> cont (TokenInfo ( > cleanupCode (reverse c)) TokCodeQuote) r > | otherwise -> lexReadCode r (n-1) ('}':c) > @@ -243,7 +243,7 @@ Utilities that read the rest of a token. > lexReadString [] fn = fn "" [] > lexError :: String -> String -> Int -> ParseResult a -> lexError err = runP (lineP >>= \l -> failP (show l ++ ": " ++ err ++ "\n")) +> lexError err = \_ l -> Left (show l ++ ": " ++ err ++ "\n") > lexNestedComment :: Int -> ([Char] -> Int -> ParseResult a) -> [Char] -> Int > -> ParseResult a diff --git a/src/Main.lhs b/src/Main.lhs index 20c5eb62..e596c7cb 100644 --- a/src/Main.lhs +++ b/src/Main.lhs @@ -76,7 +76,7 @@ Open the file. Parse, using bootstrapping parser. -> case runP ourParser file 1 of { +> case runP ourParser (PS file 1 Nothing) of { > Left err -> die (fl_name ++ ':' : err); > Right abssyn@(AbsSyn hd _ _ tl) -> diff --git a/src/ParseMonad.hs b/src/ParseMonad.hs index b9e7bf0f..cdab357d 100644 --- a/src/ParseMonad.hs +++ b/src/ParseMonad.hs @@ -1,18 +1,105 @@ +{-# LANGUAGE RankNTypes #-} + module ParseMonad where -import Control.Monad.Reader +#if !MIN_VERSION_base(4,8,0) +import Control.Applicative +#endif +import Control.Monad + +type Pfunc a = String -> Int -> ParseResult a type ParseResult = Either String -type P a = ReaderT (String, Int) ParseResult a -failP :: String -> P a -failP str = ReaderT (\_ -> Left str) +data PState token = + PS !String + !Int + !(Maybe token) + +class HasLexer token where + lexToken :: (token -> Pfunc r) -> Pfunc r + +data Decision token = Consume | PutBack token + +type P' token a = + forall r. (a -> PState token -> ParseResult r) -> Pfunc r + +andThen :: Decision token -> P token a -> P' token a +d `andThen` m = \cont s l -> unP m cont (PS s l mTok) + where mTok = case d of Consume -> Nothing + PutBack tok -> Just tok + +andReturn :: Decision token -> a -> P' token a +d `andReturn` a = d `andThen` return a + +andThenJust :: Decision token -> P token a -> P' token (Maybe a) +d `andThenJust` p = d `andThen` fmap Just p + +infix 0 `andThen` +infix 0 `andReturn` +infix 0 `andThenJust` + +withToken :: HasLexer token => (token -> P' token a) -> P token a +withToken f = + MkP $ \cont (PS s l mTok) -> + case mTok of + Nothing -> lexToken (\tok -> f tok cont) s l + Just tok -> f tok cont s l + +newtype P token a = + MkP { unP :: forall r. (a -> PState token -> ParseResult r) -> + PState token -> ParseResult r } + +instance Functor (P token) where + fmap = liftM + +instance Applicative (P token) where + pure a = MkP ($ a) + MkP f <*> MkP v = MkP $ \cont -> f (\g -> v (cont . g)) + +instance Monad (P token) where +#if !MIN_VERSION_base(4,8,0) + return = pure +#endif + MkP m >>= k = MkP $ \cont -> m (\x -> unP (k x) cont) + +failP :: (Int -> String) -> P token a +failP mkErr = MkP $ \_ (PS _ l _) -> Left (mkErr l) + +lineP :: P token Int +lineP = MkP $ \cont pstate@(PS _ l _) -> cont l pstate + +runP :: P token a -> PState token -> ParseResult a +runP (MkP p) = p (\a _ -> Right a) -mkP :: (String -> Int -> ParseResult a) -> P a -mkP = ReaderT . uncurry +manyP :: P token (Maybe a) -> P token [a] +manyP p = go [] + where + go acc = do + mX <- p + case mX of + Nothing -> return (reverse acc) + Just x -> go (x : acc) -runP :: P a -> String -> Int -> ParseResult a -runP f s l = runReaderT f (s, l) +manySepByP :: HasLexer token => (token -> Bool) -> P token (Maybe a) -> P token [a] +manySepByP isSep p = go [] where + go acc = do + mX <- p + case mX of + Nothing -> return (reverse acc) + Just x -> do + let acc' = x : acc + withToken $ \tok -> + if isSep tok + then Consume `andThen` go acc' + else PutBack tok `andReturn` reverse acc' -lineP :: P Int -lineP = asks snd +someSepByP :: HasLexer token => (token -> Bool) -> P token a -> P token [a] +someSepByP isSep p = go [] where + go acc = do + x <- p + let acc' = x : acc + withToken $ \tok -> + if isSep tok + then Consume `andThen` go acc' + else PutBack tok `andReturn` reverse acc' diff --git a/src/Parser.hs b/src/Parser.hs new file mode 100644 index 00000000..56717924 --- /dev/null +++ b/src/Parser.hs @@ -0,0 +1,208 @@ +{-# LANGUAGE RankNTypes #-} + +module Parser (ourParser, AbsSyn) where + +#if !MIN_VERSION_base(4,8,0) +import Control.Applicative +#endif + +import Control.Monad (when) +import Data.Maybe (isJust) +import ParseMonad +import AbsSyn +import Lexer + +type Parser = P Token + +ourParser :: Parser AbsSyn +ourParser = do + headerCode <- optCodeP + tokInfos <- manyP optTokInfoP + expectKW "Expected %%" TokDoublePercent + rules <- rulesP + footerCode <- optCodeP + eofP + return (AbsSyn headerCode tokInfos rules footerCode) + +optCodeP :: Parser (Maybe String) +optCodeP = withToken match where + match (TokenInfo str TokCodeQuote) = Consume `andReturn` Just str + match tok = PutBack tok `andReturn` Nothing + +codeP :: Parser String +codeP = do + mCode <- optCodeP + case mCode of + Nothing -> parseError "Expected a code block" + Just code -> return code + +optTokInfoP :: Parser (Maybe (Directive String)) +optTokInfoP = withToken match where + match (TokenKW TokSpecId_TokenType) = + Consume `andThenJust` + pure TokenType <*> codeP + match (TokenKW TokSpecId_Token) = + Consume `andThenJust` + pure TokenSpec <*> manyP optTokenSpecP + match (TokenKW TokSpecId_Name) = + Consume `andThenJust` + pure TokenName <*> idtP <*> optIdtP <*> pure False + match (TokenKW TokSpecId_Partial) = + Consume `andThenJust` + pure TokenName <*> idtP <*> optIdtP <*> pure True + match (TokenKW TokSpecId_ImportedIdentity) = + Consume `andThenJust` + pure TokenImportedIdentity + match (TokenKW TokSpecId_Lexer) = + Consume `andThenJust` + pure TokenLexer <*> codeP <*> codeP + match (TokenKW TokSpecId_Monad) = + Consume `andThenJust` do + codes <- manyP optCodeP + case codes of + [c1] -> return $ TokenMonad "()" c1 "Prelude.>>=" "Prelude.return" + [c1, c2] -> return $ TokenMonad c1 c2 "Prelude.>>=" "Prelude.return" + [c1, c2, c3] -> return $ TokenMonad "()" c1 c2 c3 + [c1, c2, c3, c4] -> return $ TokenMonad c1 c2 c3 c4 + [] -> parseError "Expected a code block" + _ -> parseError "Too many code blocks" + match (TokenKW TokSpecId_Nonassoc) = + Consume `andThenJust` + pure TokenNonassoc <*> manyP optIdtP + match (TokenKW TokSpecId_Right) = + Consume `andThenJust` + pure TokenRight <*> manyP optIdtP + match (TokenKW TokSpecId_Left) = + Consume `andThenJust` + pure TokenLeft <*> manyP optIdtP + match (TokenKW TokSpecId_Expect) = + Consume `andThenJust` + pure TokenExpect <*> numP + match (TokenKW TokSpecId_Error) = + Consume `andThenJust` + pure TokenError <*> codeP + match (TokenKW TokSpecId_ErrorHandlerType) = + Consume `andThenJust` + pure TokenErrorHandlerType <*> idtP + match (TokenKW TokSpecId_Attributetype) = + Consume `andThenJust` + pure TokenAttributetype <*> codeP + match (TokenKW TokSpecId_Attribute) = + Consume `andThenJust` + pure TokenAttribute <*> idtP <*> codeP + match tok = PutBack tok `andReturn` Nothing + +optIdtP :: Parser (Maybe String) +optIdtP = withToken match where + match (TokenInfo idt TokId) = Consume `andReturn` Just idt + match tok = PutBack tok `andReturn` Nothing + +idtP :: Parser String +idtP = do + mIdt <- optIdtP + case mIdt of + Nothing -> parseError "Expected an identifier" + Just idt -> return idt + +numP :: Parser Int +numP = withToken match where + match (TokenNum n TokNum) = Consume `andReturn` n + match tok = PutBack tok `andThen` parseError "Expected a number" + +optTokenSpecP :: Parser (Maybe (String, String)) +optTokenSpecP = withToken match where + match (TokenInfo idt TokId) = + Consume `andThenJust` do + code <- codeP + return (idt, code) + match tok = PutBack tok `andReturn` Nothing + +rulesP :: Parser [Rule] +rulesP = do + rules <- manyP optRuleP + when (null rules) (parseError "At least one rule required") + return rules + +optRuleP :: Parser (Maybe Rule) +optRuleP = do + mIdt <- optIdtP + case mIdt of + Nothing -> return Nothing + Just idt -> do + params <- paramsP idtP + mSig <- optSigP + mIdt' <- if isJust mSig then optIdtP else return Nothing + case mIdt' of + Just idt' | idt' /= idt -> + parseError "Name mismatch in signature and definition" + _ -> return () + expectKW "Expected ':'" TokColon + prods <- someSepByP (isKW TokBar) prodP + let rule = Rule idt params prods mSig + return (Just rule) + +optSigP :: Parser (Maybe String) +optSigP = withToken match where + match (TokenKW TokDoubleColon) = Consume `andThenJust` codeP + match tok = PutBack tok `andReturn` Nothing + +paramsP :: Parser a -> Parser [a] +paramsP p = withToken match where + match (TokenKW TokParenL) = + Consume `andThen` do + params <- someSepByP (isKW TokComma) p + expectKW "Expected ')'" TokParenR + return params + match tok = PutBack tok `andReturn` [] + +optSemiP :: Parser () +optSemiP = withToken match where + match (TokenKW TokSemiColon) = Consume `andReturn` () + match tok = PutBack tok `andReturn` () + +prodP :: Parser Prod +prodP = do + terms <- manyP optTermP + prec <- precP + code <- codeP + optSemiP + l <- lineP + return (Prod terms code l prec) + +termP :: Parser Term +termP = do + mTerm <- optTermP + case mTerm of + Nothing -> parseError "Expected a term" + Just term -> return term + +optTermP :: Parser (Maybe Term) +optTermP = withToken match where + match (TokenInfo idt TokId) = + Consume `andThenJust` do + termParams <- paramsP termP + return (App idt termParams) + match tok = PutBack tok `andReturn` Nothing + +precP :: Parser Prec +precP = withToken match where + match (TokenKW TokSpecId_Shift) = Consume `andReturn` PrecShift + match (TokenKW TokSpecId_Prec) = Consume `andThen` fmap PrecId idtP + match tok = PutBack tok `andReturn` PrecNone + +eofP :: Parser () +eofP = withToken match where + match (TokenEOF) = Consume `andReturn` () + match tok = PutBack tok `andThen` parseError "Parse error" + +parseError :: String -> Parser a +parseError s = failP $ \l -> show l ++ ": " ++ s ++ "\n" + +isKW :: TokenId -> Token -> Bool +isKW tokId (TokenKW tokId') = tokId == tokId' +isKW _ _ = False + +expectKW :: String -> TokenId -> Parser () +expectKW err_msg kw = withToken match where + match (TokenKW tokId) | tokId == kw = Consume `andReturn` () + match tok = PutBack tok `andThen` parseError err_msg diff --git a/src/Parser.ly b/src/Parser.ly deleted file mode 100644 index 6b6ae7a1..00000000 --- a/src/Parser.ly +++ /dev/null @@ -1,150 +0,0 @@ ------------------------------------------------------------------------------ -$Id: Parser.ly,v 1.15 2005/01/26 01:10:42 ross Exp $ - -The parser. - -(c) 1993-2000 Andy Gill, Simon Marlow ------------------------------------------------------------------------------ - -> { -> {-# OPTIONS_GHC -w #-} -> module Parser (ourParser,AbsSyn) where -> import ParseMonad -> import AbsSyn -> import Lexer -> } - -> %name ourParser -> %tokentype { Token } -> %token -> id { TokenInfo $$ TokId } -> spec_tokentype { TokenKW TokSpecId_TokenType } -> spec_token { TokenKW TokSpecId_Token } -> spec_name { TokenKW TokSpecId_Name } -> spec_partial { TokenKW TokSpecId_Partial } -> spec_lexer { TokenKW TokSpecId_Lexer } -> spec_imported_identity { TokenKW TokSpecId_ImportedIdentity } -> spec_monad { TokenKW TokSpecId_Monad } -> spec_nonassoc { TokenKW TokSpecId_Nonassoc } -> spec_left { TokenKW TokSpecId_Left } -> spec_right { TokenKW TokSpecId_Right } -> spec_prec { TokenKW TokSpecId_Prec } -> spec_shift { TokenKW TokSpecId_Shift } -> spec_expect { TokenKW TokSpecId_Expect } -> spec_error { TokenKW TokSpecId_Error } -> spec_errorhandlertype { TokenKW TokSpecId_ErrorHandlerType } -> spec_attribute { TokenKW TokSpecId_Attribute } -> spec_attributetype { TokenKW TokSpecId_Attributetype } -> code { TokenInfo $$ TokCodeQuote } -> int { TokenNum $$ TokNum } -> ":" { TokenKW TokColon } -> ";" { TokenKW TokSemiColon } -> "::" { TokenKW TokDoubleColon } -> "%%" { TokenKW TokDoublePercent } -> "|" { TokenKW TokBar } -> "(" { TokenKW TokParenL } -> ")" { TokenKW TokParenR } -> "," { TokenKW TokComma } - -> %monad { P } -> %lexer { lexer } { TokenEOF } - -> %% - -> parser :: { AbsSyn } -> : optCode tokInfos "%%" rules optCode -> { AbsSyn $1 (reverse $2) (reverse $4) $5 } - -> rules :: { [Rule] } -> : rules rule { $2 : $1 } -> | rule { [$1] } - -> rule :: { Rule } -> : id params "::" code ":" prods { Rule $1 $2 $6 (Just $4) } -> | id params "::" code id ":" prods { Rule $1 $2 $7 (Just $4) } -> | id params ":" prods { Rule $1 $2 $4 Nothing } - -> params :: { [String] } -> : "(" comma_ids ")" { reverse $2 } -> | {- empty -} { [] } - -> comma_ids :: { [String] } -> : id { [$1] } -> | comma_ids "," id { $3 : $1 } - -> prods :: { [Prod] } -> : prod "|" prods { $1 : $3 } -> | prod { [$1] } - -> prod :: { Prod } -> : terms prec code ";" {% lineP >>= \l -> return (Prod $1 $3 l $2) } -> | terms prec code {% lineP >>= \l -> return (Prod $1 $3 l $2) } - -> term :: { Term } -> : id { App $1 [] } -> | id "(" comma_terms ")" { App $1 (reverse $3) } - -> terms :: { [Term] } -> : terms_rev { reverse $1 } -> | { [] } - -> terms_rev :: { [Term] } -> : term { [$1] } -> | terms_rev term { $2 : $1 } - -> comma_terms :: { [Term] } -> : term { [$1] } -> | comma_terms "," term { $3 : $1 } - -> prec :: { Prec } -> : spec_prec id { PrecId $2 } -> | spec_shift { PrecShift } -> | { PrecNone } - -> tokInfos :: { [Directive String] } -> : tokInfos tokInfo { $2 : $1 } -> | tokInfo { [$1] } - -> tokInfo :: { Directive String } -> : spec_tokentype code { TokenType $2 } -> | spec_token tokenSpecs { TokenSpec $2 } -> | spec_name id optStart { TokenName $2 $3 False } -> | spec_partial id optStart { TokenName $2 $3 True } -> | spec_imported_identity { TokenImportedIdentity } -> | spec_lexer code code { TokenLexer $2 $3 } -> | spec_monad code { TokenMonad "()" $2 "Prelude.>>=" "Prelude.return" } -> | spec_monad code code { TokenMonad $2 $3 "Prelude.>>=" "Prelude.return" } -> | spec_monad code code code { TokenMonad "()" $2 $3 $4 } -> | spec_monad code code code code { TokenMonad $2 $3 $4 $5 } -> | spec_nonassoc ids { TokenNonassoc $2 } -> | spec_right ids { TokenRight $2 } -> | spec_left ids { TokenLeft $2 } -> | spec_expect int { TokenExpect $2 } -> | spec_error code { TokenError $2 } -> | spec_errorhandlertype id { TokenErrorHandlerType $2 } -> | spec_attributetype code { TokenAttributetype $2 } -> | spec_attribute id code { TokenAttribute $2 $3 } - -> optStart :: { Maybe String } -> : id { Just $1 } -> | {- nothing -} { Nothing } - -> tokenSpecs :: { [(String,String)] } -> : tokenSpec tokenSpecs { $1:$2 } -> | tokenSpec { [$1] } - -> tokenSpec :: { (String,String) } -> : id code { ($1,$2) } - -> ids :: { [String] } -> : id ids { $1 : $2 } -> | {- nothing -} { [] } - -> optCode :: { Maybe String } -> : code { Just $1 } -> | {- nothing -} { Nothing } - -> { -> happyError :: P a -> happyError = lineP >>= \l -> failP (show l ++ ": Parse error\n") -> } From d8f23e0ec546b605f2d7148df388e4e8837c5315 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sat, 5 Dec 2020 14:36:14 -0500 Subject: [PATCH 02/12] WIP --- happy.cabal | 6 +- src/AttrGrammarParser/Bootstrapped.ly | 68 ++++++++ .../Oracle.hs} | 2 +- src/Grammar.lhs | 2 +- src/Parser/Bootstrapped.ly | 150 ++++++++++++++++++ src/{Parser.hs => Parser/Oracle.hs} | 2 +- 6 files changed, 225 insertions(+), 5 deletions(-) create mode 100644 src/AttrGrammarParser/Bootstrapped.ly rename src/{AttrGrammarParser.hs => AttrGrammarParser/Oracle.hs} (97%) create mode 100644 src/Parser/Bootstrapped.ly rename src/{Parser.hs => Parser/Oracle.hs} (99%) diff --git a/happy.cabal b/happy.cabal index d1e42fb4..815aee83 100644 --- a/happy.cabal +++ b/happy.cabal @@ -177,13 +177,15 @@ executable happy LALR Lexer ParseMonad - Parser + Parser.Oracle + Parser.Bootstrapped ProduceCode ProduceGLRCode NameSet Target AttrGrammar - AttrGrammarParser + AttrGrammarParser.Oracle + AttrGrammarParser.Bootstrapped ParamRules PrettyGrammar diff --git a/src/AttrGrammarParser/Bootstrapped.ly b/src/AttrGrammarParser/Bootstrapped.ly new file mode 100644 index 00000000..fda8575e --- /dev/null +++ b/src/AttrGrammarParser/Bootstrapped.ly @@ -0,0 +1,68 @@ +This parser parses the contents of the attribute grammar +into a list of rules. A rule can either be an assignment +to an attribute of the LHS (synthesized attribute), and +assignment to an attribute of the RHS (an inherited attribute), +or a conditional statement. + +> { +> {-# OPTIONS_GHC -w #-} +> module AttrGrammarParser.Bootstrapped (agParser) where +> import ParseMonad +> import AttrGrammar +> } + +> %name agParser +> %tokentype { AgToken } +> %token +> "{" { AgTok_LBrace } +> "}" { AgTok_RBrace } +> ";" { AgTok_Semicolon } +> "=" { AgTok_Eq } +> where { AgTok_Where } +> selfRef { AgTok_SelfRef _ } +> subRef { AgTok_SubRef _ } +> rightRef { AgTok_RightmostRef _ } +> unknown { AgTok_Unknown _ } +> +> %monad { P AgToken } +> %lexer { (fmap (maybe AgTok_EOF) . lexToken') } { AgTok_EOF } + +> %% + +> agParser :: { [AgRule] } +> : rules { $1 } + +> rules :: { [AgRule] } +> : rule ";" rules { $1 : $3 } +> | rule { $1 : [] } +> | { [] } + +> rule :: { AgRule } +> : selfRef "=" code { SelfAssign (selfRefVal $1) $3 } +> | subRef "=" code { SubAssign (subRefVal $1) $3 } +> | rightRef "=" code { RightmostAssign (rightRefVal $1) $3 } +> | where code { Conditional $2 } + +> code :: { [AgToken] } +> : "{" code0 "}" code { [$1] ++ $2 ++ [$3] ++ $4 } +> | "=" code { $1 : $2 } +> | selfRef code { $1 : $2 } +> | subRef code { $1 : $2 } +> | rightRef code { $1 : $2 } +> | unknown code { $1 : $2 } +> | { [] } + +> code0 :: { [AgToken] } +> : "{" code0 "}" code0 { [$1] ++ $2 ++ [$3] ++ $4 } +> | "=" code0 { $1 : $2 } +> | ";" code0 { $1 : $2 } +> | selfRef code0 { $1 : $2 } +> | subRef code0 { $1 : $2 } +> | rightRef code { $1 : $2 } +> | unknown code0 { $1 : $2 } +> | { [] } + +> { +> happyError :: P AgToken a +> happyError = failP (\l -> show l ++ ": Parse error\n") +> } diff --git a/src/AttrGrammarParser.hs b/src/AttrGrammarParser/Oracle.hs similarity index 97% rename from src/AttrGrammarParser.hs rename to src/AttrGrammarParser/Oracle.hs index 89128ef6..a97e9db9 100644 --- a/src/AttrGrammarParser.hs +++ b/src/AttrGrammarParser/Oracle.hs @@ -4,7 +4,7 @@ -- assignment to an attribute of the RHS (an inherited attribute), -- or a conditional statement. -module AttrGrammarParser (agParser) where +module AttrGrammarParser.Oracle (agParser) where #if !MIN_VERSION_base(4,8,0) import Control.Applicative diff --git a/src/Grammar.lhs b/src/Grammar.lhs index 45cfa978..a6412059 100644 --- a/src/Grammar.lhs +++ b/src/Grammar.lhs @@ -22,7 +22,7 @@ Here is our mid-section datatype > import AbsSyn > import ParseMonad > import AttrGrammar -> import AttrGrammarParser +> import AttrGrammarParser.Oracle > import ParamRules > import Data.Array diff --git a/src/Parser/Bootstrapped.ly b/src/Parser/Bootstrapped.ly new file mode 100644 index 00000000..f1fabc45 --- /dev/null +++ b/src/Parser/Bootstrapped.ly @@ -0,0 +1,150 @@ +----------------------------------------------------------------------------- +$Id: Parser.ly,v 1.15 2005/01/26 01:10:42 ross Exp $ + +The parser. + +(c) 1993-2000 Andy Gill, Simon Marlow +----------------------------------------------------------------------------- + +> { +> {-# OPTIONS_GHC -w #-} +> module Parser.Bootstrapped (ourParser,AbsSyn) where +> import ParseMonad +> import AbsSyn +> import Lexer +> } + +> %name ourParser +> %tokentype { Token } +> %token +> id { TokenInfo $$ TokId } +> spec_tokentype { TokenKW TokSpecId_TokenType } +> spec_token { TokenKW TokSpecId_Token } +> spec_name { TokenKW TokSpecId_Name } +> spec_partial { TokenKW TokSpecId_Partial } +> spec_lexer { TokenKW TokSpecId_Lexer } +> spec_imported_identity { TokenKW TokSpecId_ImportedIdentity } +> spec_monad { TokenKW TokSpecId_Monad } +> spec_nonassoc { TokenKW TokSpecId_Nonassoc } +> spec_left { TokenKW TokSpecId_Left } +> spec_right { TokenKW TokSpecId_Right } +> spec_prec { TokenKW TokSpecId_Prec } +> spec_shift { TokenKW TokSpecId_Shift } +> spec_expect { TokenKW TokSpecId_Expect } +> spec_error { TokenKW TokSpecId_Error } +> spec_errorhandlertype { TokenKW TokSpecId_ErrorHandlerType } +> spec_attribute { TokenKW TokSpecId_Attribute } +> spec_attributetype { TokenKW TokSpecId_Attributetype } +> code { TokenInfo $$ TokCodeQuote } +> int { TokenNum $$ TokNum } +> ":" { TokenKW TokColon } +> ";" { TokenKW TokSemiColon } +> "::" { TokenKW TokDoubleColon } +> "%%" { TokenKW TokDoublePercent } +> "|" { TokenKW TokBar } +> "(" { TokenKW TokParenL } +> ")" { TokenKW TokParenR } +> "," { TokenKW TokComma } + +> %monad { P AgToken } +> %lexer { lexer } { TokenEOF } + +> %% + +> parser :: { AbsSyn } +> : optCode tokInfos "%%" rules optCode +> { AbsSyn $1 (reverse $2) (reverse $4) $5 } + +> rules :: { [Rule] } +> : rules rule { $2 : $1 } +> | rule { [$1] } + +> rule :: { Rule } +> : id params "::" code ":" prods { Rule $1 $2 $6 (Just $4) } +> | id params "::" code id ":" prods { Rule $1 $2 $7 (Just $4) } +> | id params ":" prods { Rule $1 $2 $4 Nothing } + +> params :: { [String] } +> : "(" comma_ids ")" { reverse $2 } +> | {- empty -} { [] } + +> comma_ids :: { [String] } +> : id { [$1] } +> | comma_ids "," id { $3 : $1 } + +> prods :: { [Prod] } +> : prod "|" prods { $1 : $3 } +> | prod { [$1] } + +> prod :: { Prod } +> : terms prec code ";" {% lineP >>= \l -> return (Prod $1 $3 l $2) } +> | terms prec code {% lineP >>= \l -> return (Prod $1 $3 l $2) } + +> term :: { Term } +> : id { App $1 [] } +> | id "(" comma_terms ")" { App $1 (reverse $3) } + +> terms :: { [Term] } +> : terms_rev { reverse $1 } +> | { [] } + +> terms_rev :: { [Term] } +> : term { [$1] } +> | terms_rev term { $2 : $1 } + +> comma_terms :: { [Term] } +> : term { [$1] } +> | comma_terms "," term { $3 : $1 } + +> prec :: { Prec } +> : spec_prec id { PrecId $2 } +> | spec_shift { PrecShift } +> | { PrecNone } + +> tokInfos :: { [Directive String] } +> : tokInfos tokInfo { $2 : $1 } +> | tokInfo { [$1] } + +> tokInfo :: { Directive String } +> : spec_tokentype code { TokenType $2 } +> | spec_token tokenSpecs { TokenSpec $2 } +> | spec_name id optStart { TokenName $2 $3 False } +> | spec_partial id optStart { TokenName $2 $3 True } +> | spec_imported_identity { TokenImportedIdentity } +> | spec_lexer code code { TokenLexer $2 $3 } +> | spec_monad code { TokenMonad "()" $2 "Prelude.>>=" "Prelude.return" } +> | spec_monad code code { TokenMonad $2 $3 "Prelude.>>=" "Prelude.return" } +> | spec_monad code code code { TokenMonad "()" $2 $3 $4 } +> | spec_monad code code code code { TokenMonad $2 $3 $4 $5 } +> | spec_nonassoc ids { TokenNonassoc $2 } +> | spec_right ids { TokenRight $2 } +> | spec_left ids { TokenLeft $2 } +> | spec_expect int { TokenExpect $2 } +> | spec_error code { TokenError $2 } +> | spec_errorhandlertype id { TokenErrorHandlerType $2 } +> | spec_attributetype code { TokenAttributetype $2 } +> | spec_attribute id code { TokenAttribute $2 $3 } + +> optStart :: { Maybe String } +> : id { Just $1 } +> | {- nothing -} { Nothing } + +> tokenSpecs :: { [(String,String)] } +> : tokenSpec tokenSpecs { $1:$2 } +> | tokenSpec { [$1] } + +> tokenSpec :: { (String,String) } +> : id code { ($1,$2) } + +> ids :: { [String] } +> : id ids { $1 : $2 } +> | {- nothing -} { [] } + +> optCode :: { Maybe String } +> : code { Just $1 } +> | {- nothing -} { Nothing } + +> { +> happyError :: P a +> happyError = failP (\l -> show l ++ ": Parse error\n") +> } diff --git a/src/Parser.hs b/src/Parser/Oracle.hs similarity index 99% rename from src/Parser.hs rename to src/Parser/Oracle.hs index 56717924..5542cf13 100644 --- a/src/Parser.hs +++ b/src/Parser/Oracle.hs @@ -1,6 +1,6 @@ {-# LANGUAGE RankNTypes #-} -module Parser (ourParser, AbsSyn) where +module Parser.Oracle (ourParser, AbsSyn) where #if !MIN_VERSION_base(4,8,0) import Control.Applicative From 51ed05db6f17d4322e959ab2be9ec6eaddc83b3d Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sat, 5 Dec 2020 15:18:30 -0500 Subject: [PATCH 03/12] Get old and new versions building --- happy.cabal | 2 + src/AttrGrammarParser/Bootstrapped.ly | 7 +- src/AttrGrammarParser/Oracle.hs | 1 + src/Grammar.lhs | 2 +- src/Main.lhs | 4 +- src/ParseMonad.hs | 102 ++------------------------ src/ParseMonad/Bootstrapped.hs | 25 +++++++ src/ParseMonad/Oracle.hs | 99 +++++++++++++++++++++++++ src/Parser/Bootstrapped.ly | 5 +- src/Parser/Oracle.hs | 1 + 10 files changed, 143 insertions(+), 105 deletions(-) create mode 100644 src/ParseMonad/Bootstrapped.hs create mode 100644 src/ParseMonad/Oracle.hs diff --git a/happy.cabal b/happy.cabal index 815aee83..1072a350 100644 --- a/happy.cabal +++ b/happy.cabal @@ -177,6 +177,8 @@ executable happy LALR Lexer ParseMonad + ParseMonad.Bootstrapped + ParseMonad.Oracle Parser.Oracle Parser.Bootstrapped ProduceCode diff --git a/src/AttrGrammarParser/Bootstrapped.ly b/src/AttrGrammarParser/Bootstrapped.ly index fda8575e..7c83ea96 100644 --- a/src/AttrGrammarParser/Bootstrapped.ly +++ b/src/AttrGrammarParser/Bootstrapped.ly @@ -8,6 +8,7 @@ or a conditional statement. > {-# OPTIONS_GHC -w #-} > module AttrGrammarParser.Bootstrapped (agParser) where > import ParseMonad +> import ParseMonad.Bootstrapped > import AttrGrammar > } @@ -24,8 +25,8 @@ or a conditional statement. > rightRef { AgTok_RightmostRef _ } > unknown { AgTok_Unknown _ } > -> %monad { P AgToken } -> %lexer { (fmap (maybe AgTok_EOF) . lexToken') } { AgTok_EOF } +> %monad { P } +> %lexer { lexTokenP } { AgTok_EOF } > %% @@ -63,6 +64,6 @@ or a conditional statement. > | { [] } > { -> happyError :: P AgToken a +> happyError :: P a > happyError = failP (\l -> show l ++ ": Parse error\n") > } diff --git a/src/AttrGrammarParser/Oracle.hs b/src/AttrGrammarParser/Oracle.hs index a97e9db9..13470765 100644 --- a/src/AttrGrammarParser/Oracle.hs +++ b/src/AttrGrammarParser/Oracle.hs @@ -11,6 +11,7 @@ import Control.Applicative #endif import ParseMonad +import ParseMonad.Oracle import AttrGrammar type Parser = P AgToken diff --git a/src/Grammar.lhs b/src/Grammar.lhs index a6412059..308f7abb 100644 --- a/src/Grammar.lhs +++ b/src/Grammar.lhs @@ -424,7 +424,7 @@ So is this. first we need to parse the body of the code block -> case runP agParser (PS code 0 Nothing) of +> case runFromStartP agParser code 0 of > Left msg -> do addErr ("error in attribute grammar rules: "++msg) > return ("",[]) > Right rules -> diff --git a/src/Main.lhs b/src/Main.lhs index e596c7cb..4c5d3a8a 100644 --- a/src/Main.lhs +++ b/src/Main.lhs @@ -15,7 +15,7 @@ Path settings auto-generated by Cabal: > import AbsSyn > import Grammar > import PrettyGrammar -> import Parser +> import Parser.Oracle > import First > import LALR > import ProduceCode (produceParser) @@ -76,7 +76,7 @@ Open the file. Parse, using bootstrapping parser. -> case runP ourParser (PS file 1 Nothing) of { +> case runFromStartP ourParser file 1 of { > Left err -> die (fl_name ++ ':' : err); > Right abssyn@(AbsSyn hd _ _ tl) -> diff --git a/src/ParseMonad.hs b/src/ParseMonad.hs index cdab357d..11efaa8c 100644 --- a/src/ParseMonad.hs +++ b/src/ParseMonad.hs @@ -1,105 +1,13 @@ -{-# LANGUAGE RankNTypes #-} - module ParseMonad where -#if !MIN_VERSION_base(4,8,0) -import Control.Applicative -#endif -import Control.Monad - type Pfunc a = String -> Int -> ParseResult a -type ParseResult = Either String - -data PState token = - PS !String - !Int - !(Maybe token) - class HasLexer token where lexToken :: (token -> Pfunc r) -> Pfunc r -data Decision token = Consume | PutBack token - -type P' token a = - forall r. (a -> PState token -> ParseResult r) -> Pfunc r - -andThen :: Decision token -> P token a -> P' token a -d `andThen` m = \cont s l -> unP m cont (PS s l mTok) - where mTok = case d of Consume -> Nothing - PutBack tok -> Just tok - -andReturn :: Decision token -> a -> P' token a -d `andReturn` a = d `andThen` return a - -andThenJust :: Decision token -> P token a -> P' token (Maybe a) -d `andThenJust` p = d `andThen` fmap Just p - -infix 0 `andThen` -infix 0 `andReturn` -infix 0 `andThenJust` - -withToken :: HasLexer token => (token -> P' token a) -> P token a -withToken f = - MkP $ \cont (PS s l mTok) -> - case mTok of - Nothing -> lexToken (\tok -> f tok cont) s l - Just tok -> f tok cont s l - -newtype P token a = - MkP { unP :: forall r. (a -> PState token -> ParseResult r) -> - PState token -> ParseResult r } - -instance Functor (P token) where - fmap = liftM - -instance Applicative (P token) where - pure a = MkP ($ a) - MkP f <*> MkP v = MkP $ \cont -> f (\g -> v (cont . g)) - -instance Monad (P token) where -#if !MIN_VERSION_base(4,8,0) - return = pure -#endif - MkP m >>= k = MkP $ \cont -> m (\x -> unP (k x) cont) - -failP :: (Int -> String) -> P token a -failP mkErr = MkP $ \_ (PS _ l _) -> Left (mkErr l) - -lineP :: P token Int -lineP = MkP $ \cont pstate@(PS _ l _) -> cont l pstate - -runP :: P token a -> PState token -> ParseResult a -runP (MkP p) = p (\a _ -> Right a) - -manyP :: P token (Maybe a) -> P token [a] -manyP p = go [] - where - go acc = do - mX <- p - case mX of - Nothing -> return (reverse acc) - Just x -> go (x : acc) - -manySepByP :: HasLexer token => (token -> Bool) -> P token (Maybe a) -> P token [a] -manySepByP isSep p = go [] where - go acc = do - mX <- p - case mX of - Nothing -> return (reverse acc) - Just x -> do - let acc' = x : acc - withToken $ \tok -> - if isSep tok - then Consume `andThen` go acc' - else PutBack tok `andReturn` reverse acc' +type ParseResult = Either String -someSepByP :: HasLexer token => (token -> Bool) -> P token a -> P token [a] -someSepByP isSep p = go [] where - go acc = do - x <- p - let acc' = x : acc - withToken $ \tok -> - if isSep tok - then Consume `andThen` go acc' - else PutBack tok `andReturn` reverse acc' +class Monad p => ParseMonad p where + failP :: (Int -> String) -> p a + lineP :: p Int + runFromStartP :: p a -> String -> Int -> ParseResult a diff --git a/src/ParseMonad/Bootstrapped.hs b/src/ParseMonad/Bootstrapped.hs new file mode 100644 index 00000000..1a262460 --- /dev/null +++ b/src/ParseMonad/Bootstrapped.hs @@ -0,0 +1,25 @@ +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE TypeSynonymInstances #-} + +{-# OPTIONS_GHC -Wno-orphans #-} + +module ParseMonad.Bootstrapped where + +import Control.Monad.Reader +import ParseMonad + +type P = ReaderT (String, Int) ParseResult + +mkP :: (String -> Int -> ParseResult a) -> P a +mkP = ReaderT . uncurry + +runP :: P a -> String -> Int -> ParseResult a +runP f s l = runReaderT f (s, l) + +instance ParseMonad P where + failP mkStr = ReaderT (\(_, l) -> Left $ mkStr l) + lineP = asks snd + runFromStartP m s l = runP m s l + +lexTokenP :: HasLexer token => (token -> P r) -> P r +lexTokenP k = ReaderT $ uncurry $ lexToken (\t -> runP $ k t) diff --git a/src/ParseMonad/Oracle.hs b/src/ParseMonad/Oracle.hs new file mode 100644 index 00000000..6ecd5827 --- /dev/null +++ b/src/ParseMonad/Oracle.hs @@ -0,0 +1,99 @@ +{-# LANGUAGE RankNTypes #-} + +module ParseMonad.Oracle where + +#if !MIN_VERSION_base(4,8,0) +import Control.Applicative +#endif +import Control.Monad + +import ParseMonad + +data PState token = + PS !String + !Int + !(Maybe token) + +data Decision token = Consume | PutBack token + +type P' token a = + forall r. (a -> PState token -> ParseResult r) -> Pfunc r + +andThen :: Decision token -> P token a -> P' token a +d `andThen` m = \cont s l -> unP m cont (PS s l mTok) + where mTok = case d of Consume -> Nothing + PutBack tok -> Just tok + +andReturn :: Decision token -> a -> P' token a +d `andReturn` a = d `andThen` return a + +andThenJust :: Decision token -> P token a -> P' token (Maybe a) +d `andThenJust` p = d `andThen` fmap Just p + +infix 0 `andThen` +infix 0 `andReturn` +infix 0 `andThenJust` + +withToken :: HasLexer token => (token -> P' token a) -> P token a +withToken f = + MkP $ \cont (PS s l mTok) -> + case mTok of + Nothing -> lexToken (\tok -> f tok cont) s l + Just tok -> f tok cont s l + +newtype P token a = + MkP { unP :: forall r. (a -> PState token -> ParseResult r) -> + PState token -> ParseResult r } + +instance Functor (P token) where + fmap = liftM + +instance Applicative (P token) where + pure a = MkP ($ a) + MkP f <*> MkP v = MkP $ \cont -> f (\g -> v (cont . g)) + +instance Monad (P token) where +#if !MIN_VERSION_base(4,8,0) + return = pure +#endif + MkP m >>= k = MkP $ \cont -> m (\x -> unP (k x) cont) + +instance ParseMonad (P token) where + failP mkErr = MkP $ \_ (PS _ l _) -> Left (mkErr l) + lineP = MkP $ \cont pstate@(PS _ l _) -> cont l pstate + runFromStartP m s l = runP m (PS s l Nothing) + +runP :: P token a -> PState token -> ParseResult a +runP (MkP p) = p (\a _ -> Right a) + +manyP :: P token (Maybe a) -> P token [a] +manyP p = go [] + where + go acc = do + mX <- p + case mX of + Nothing -> return (reverse acc) + Just x -> go (x : acc) + +manySepByP :: HasLexer token => (token -> Bool) -> P token (Maybe a) -> P token [a] +manySepByP isSep p = go [] where + go acc = do + mX <- p + case mX of + Nothing -> return (reverse acc) + Just x -> do + let acc' = x : acc + withToken $ \tok -> + if isSep tok + then Consume `andThen` go acc' + else PutBack tok `andReturn` reverse acc' + +someSepByP :: HasLexer token => (token -> Bool) -> P token a -> P token [a] +someSepByP isSep p = go [] where + go acc = do + x <- p + let acc' = x : acc + withToken $ \tok -> + if isSep tok + then Consume `andThen` go acc' + else PutBack tok `andReturn` reverse acc' diff --git a/src/Parser/Bootstrapped.ly b/src/Parser/Bootstrapped.ly index f1fabc45..1bf181cb 100644 --- a/src/Parser/Bootstrapped.ly +++ b/src/Parser/Bootstrapped.ly @@ -10,6 +10,7 @@ The parser. > {-# OPTIONS_GHC -w #-} > module Parser.Bootstrapped (ourParser,AbsSyn) where > import ParseMonad +> import ParseMonad.Bootstrapped > import AbsSyn > import Lexer > } @@ -46,8 +47,8 @@ The parser. > ")" { TokenKW TokParenR } > "," { TokenKW TokComma } -> %monad { P AgToken } -> %lexer { lexer } { TokenEOF } +> %monad { P } +> %lexer { lexTokenP } { TokenEOF } > %% diff --git a/src/Parser/Oracle.hs b/src/Parser/Oracle.hs index 5542cf13..027b2dfe 100644 --- a/src/Parser/Oracle.hs +++ b/src/Parser/Oracle.hs @@ -9,6 +9,7 @@ import Control.Applicative import Control.Monad (when) import Data.Maybe (isJust) import ParseMonad +import ParseMonad.Oracle import AbsSyn import Lexer From eebb859bc03680ffa4f602cd92f43708eee4a929 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sat, 5 Dec 2020 15:35:45 -0500 Subject: [PATCH 04/12] Add Cabal flag to configure whether we do the bootstrapped version --- happy.cabal | 28 +++++++++++++++++++++------- src/Grammar.lhs | 12 ++++++++++++ src/Main.lhs | 12 ++++++++++++ 3 files changed, 45 insertions(+), 7 deletions(-) diff --git a/happy.cabal b/happy.cabal index 1072a350..20b183ad 100644 --- a/happy.cabal +++ b/happy.cabal @@ -150,6 +150,11 @@ extra-source-files: tests/rank2.y tests/shift01.y +flag bootstrap + description: Optimize the implementation of happy using a pre-built happy + manual: True + default: False + source-repository head type: git location: https://github.com/simonmar/happy.git @@ -177,20 +182,30 @@ executable happy LALR Lexer ParseMonad - ParseMonad.Bootstrapped - ParseMonad.Oracle - Parser.Oracle - Parser.Bootstrapped ProduceCode ProduceGLRCode NameSet Target AttrGrammar - AttrGrammarParser.Oracle - AttrGrammarParser.Bootstrapped ParamRules PrettyGrammar + if flag(bootstrap) + -- TODO put this back when Cabal can use it's qualified goals to better + -- understand bootstrapping + --build-tools: happy + cpp-options: -DHAPPY_BOOTSTRAP + other-modules: + ParseMonad.Bootstrapped + Parser.Bootstrapped + AttrGrammarParser.Bootstrapped + else + other-modules: + ParseMonad.Oracle + Parser.Oracle + AttrGrammarParser.Oracle + + test-suite tests type: exitcode-stdio-1.0 main-is: test.hs @@ -199,4 +214,3 @@ test-suite tests build-depends: base, process default-language: Haskell98 - diff --git a/src/Grammar.lhs b/src/Grammar.lhs index 308f7abb..b65d8c31 100644 --- a/src/Grammar.lhs +++ b/src/Grammar.lhs @@ -22,7 +22,19 @@ Here is our mid-section datatype > import AbsSyn > import ParseMonad > import AttrGrammar + +We use the bootstrapped version if it is available + +#ifdef HAPPY_BOOTSTRAP + +> import AttrGrammarParser.Bootstrapped + +#else + > import AttrGrammarParser.Oracle + +#endif + > import ParamRules > import Data.Array diff --git a/src/Main.lhs b/src/Main.lhs index 4c5d3a8a..4cdb056a 100644 --- a/src/Main.lhs +++ b/src/Main.lhs @@ -15,7 +15,19 @@ Path settings auto-generated by Cabal: > import AbsSyn > import Grammar > import PrettyGrammar + +We use the bootstrapped version if it is available + +#ifdef HAPPY_BOOTSTRAP + +> import Parser.Bootstrapped + +#else + > import Parser.Oracle + +#endif + > import First > import LALR > import ProduceCode (produceParser) From 73e995a0020fb7e3c4efb54f826b744c330f6d88 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Thu, 24 Dec 2020 15:54:17 +0000 Subject: [PATCH 05/12] CI both ways --- Makefile | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9f45db2d..da46499c 100644 --- a/Makefile +++ b/Makefile @@ -36,7 +36,14 @@ sdist-test-only :: rm -rf "${SDIST_DIR}/happy-$(HAPPY_VER)/" tar -xf "${SDIST_DIR}/happy-$(HAPPY_VER).tar.gz" -C ${SDIST_DIR}/ echo "packages: ." > "${SDIST_DIR}/happy-$(HAPPY_VER)/cabal.project" - cd "${SDIST_DIR}/happy-$(HAPPY_VER)/" && cabal v2-test --enable-tests all + echo "tests: True" >> "${SDIST_DIR}/happy-$(HAPPY_VER)/cabal.project" + cd "${SDIST_DIR}/happy-$(HAPPY_VER)/" \ + && cabal v2-build all --flag -bootstrap \ + && cabal v2-install --flag -bootstrap --installdir="./bootstrap-root" \ + && cabal v2-test all --flag -bootstrap \ + && export PATH=./bootstrap-root:$$PATH \ + && cabal v2-build all --flag +bootstrap \ + && cabal v2-test all --flag +bootstrap @echo "" @echo "Success! ${SDIST_DIR}/happy-$(HAPPY_VER).tar.gz is ready for distribution!" @echo "" From 0724a890dad0282d9aaebbb6fe0f1e65b8464e05 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Thu, 24 Dec 2020 16:24:34 +0000 Subject: [PATCH 06/12] Make tests parallel --- Makefile | 4 ++-- test.hs | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index da46499c..2ee5ee22 100644 --- a/Makefile +++ b/Makefile @@ -40,10 +40,10 @@ sdist-test-only :: cd "${SDIST_DIR}/happy-$(HAPPY_VER)/" \ && cabal v2-build all --flag -bootstrap \ && cabal v2-install --flag -bootstrap --installdir="./bootstrap-root" \ - && cabal v2-test all --flag -bootstrap \ + && cabal v2-test all -j --flag -bootstrap \ && export PATH=./bootstrap-root:$$PATH \ && cabal v2-build all --flag +bootstrap \ - && cabal v2-test all --flag +bootstrap + && cabal v2-test all -j --flag +bootstrap @echo "" @echo "Success! ${SDIST_DIR}/happy-$(HAPPY_VER).tar.gz is ready for distribution!" @echo "" diff --git a/test.hs b/test.hs index e452a17b..816640d3 100644 --- a/test.hs +++ b/test.hs @@ -1,4 +1,9 @@ +import Data.List (intercalate) +import GHC.Conc (numCapabilities) import System.Process (system) import System.Exit (exitWith) -main = system "make -k -C tests clean all" >>= exitWith +main = do + let jFlag = "-j" ++ show numCapabilities + let cmd = ["make", jFlag, "-k", "-C", "tests", "clean", "all"] + system (intercalate " " cmd) >>= exitWith From 7cf2ef546c9ad7c206d5ba6442b230cf5a61ccd8 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Thu, 24 Dec 2020 20:38:27 +0000 Subject: [PATCH 07/12] Fix `-Wno-orphans` for older GHCs --- src/ParseMonad/Bootstrapped.hs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/ParseMonad/Bootstrapped.hs b/src/ParseMonad/Bootstrapped.hs index 1a262460..2b3c929d 100644 --- a/src/ParseMonad/Bootstrapped.hs +++ b/src/ParseMonad/Bootstrapped.hs @@ -1,7 +1,12 @@ +{-# LANGUAGE CPP #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE TypeSynonymInstances #-} +#if __GLASGOW_HASKELL__ >= 800 {-# OPTIONS_GHC -Wno-orphans #-} +#else +{-# OPTIONS_GHC -fno-warn-orphans #-} +#endif module ParseMonad.Bootstrapped where From 2f89f002ab19d4bf909d7e0609d08fdcda425659 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Thu, 24 Dec 2020 20:40:25 +0000 Subject: [PATCH 08/12] Mention exact cabal issue in cabal file --- happy.cabal | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/happy.cabal b/happy.cabal index 20b183ad..9a5e8b00 100644 --- a/happy.cabal +++ b/happy.cabal @@ -191,8 +191,9 @@ executable happy PrettyGrammar if flag(bootstrap) - -- TODO put this back when Cabal can use it's qualified goals to better - -- understand bootstrapping + -- TODO put this back when Cabal can use it's qualified goals to better -- + -- understand bootstrapping, see + -- https://github.com/haskell/cabal/issues/7189 --build-tools: happy cpp-options: -DHAPPY_BOOTSTRAP other-modules: From 8c217cc9cb596e19abd5df05048d1594adc3b29b Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sat, 26 Dec 2020 17:25:55 -0500 Subject: [PATCH 09/12] Fix bad word-wrap --- happy.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/happy.cabal b/happy.cabal index 9a5e8b00..08291a60 100644 --- a/happy.cabal +++ b/happy.cabal @@ -191,7 +191,7 @@ executable happy PrettyGrammar if flag(bootstrap) - -- TODO put this back when Cabal can use it's qualified goals to better -- + -- TODO put this back when Cabal can use it's qualified goals to better -- understand bootstrapping, see -- https://github.com/haskell/cabal/issues/7189 --build-tools: happy From 354a366fdff631c0b2c68ce38d2b7b5ecb5db254 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sun, 27 Dec 2020 00:33:11 -0500 Subject: [PATCH 10/12] Remove pre-built happy from CI! We boostrap from source now. Alex is still kept, but just for tests --- .appveyor.yml | 2 -- .travis.yml | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index de5d8a51..41b50a4e 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -29,8 +29,6 @@ install: - "cabal %CABOPTS% v2-update -vverbose+nowrap" - "cabal %CABOPTS% v2-install alex --bindir=/hsbin" - "alex --version" - - "cabal %CABOPTS% v2-install happy --bindir=/hsbin" - - "happy --version" build: off diff --git a/.travis.yml b/.travis.yml index 5662e3f3..11370513 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,8 +17,8 @@ env: before_install: - sudo add-apt-repository -y ppa:hvr/ghc - sudo apt-get update - - sudo apt-get install alex-3.1.7 happy-1.19.5 cabal-install-3.4 ghc-$GHCVER - - export PATH=/opt/cabal/3.4/bin:/opt/ghc/$GHCVER/bin:/opt/alex/3.1.7/bin:/opt/happy/1.19.5/bin:$PATH + - sudo apt-get install alex-3.1.7 cabal-install-3.4 ghc-$GHCVER + - export PATH=/opt/cabal/3.4/bin:/opt/ghc/$GHCVER/bin:/opt/alex/3.1.7/bin:$PATH install: - cabal update From 35fc9c07aa71a0ccf10003b60fa39840beccb362 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Mon, 28 Dec 2020 15:54:49 -0500 Subject: [PATCH 11/12] Use stub modules to avoid CPP imports --- examples/ErlParser.ly | 2 +- happy.cabal | 3 +++ src/AttrGrammar.lhs | 2 +- src/AttrGrammarParser.hs | 8 ++++++++ src/AttrGrammarParser/Bootstrapped.ly | 2 +- src/AttrGrammarParser/Oracle.hs | 2 +- src/Grammar.lhs | 15 ++------------- src/Lexer.lhs | 2 +- src/Main.lhs | 15 ++------------- src/ParseMonad.hs | 21 ++++++++------------- src/ParseMonad/Bootstrapped.hs | 2 +- src/ParseMonad/Class.hs | 13 +++++++++++++ src/ParseMonad/Oracle.hs | 2 +- src/Parser.hs | 8 ++++++++ src/Parser/Bootstrapped.ly | 2 +- src/Parser/Oracle.hs | 2 +- 16 files changed, 53 insertions(+), 48 deletions(-) create mode 100644 src/AttrGrammarParser.hs create mode 100644 src/ParseMonad/Class.hs create mode 100644 src/Parser.hs diff --git a/examples/ErlParser.ly b/examples/ErlParser.ly index 770bfcba..e8227944 100644 --- a/examples/ErlParser.ly +++ b/examples/ErlParser.ly @@ -13,7 +13,7 @@ Author : Simon Marlow > import Lexer > import AbsSyn > import Types -> import ParseMonad +> import ParseMonad.Class > } > %token diff --git a/happy.cabal b/happy.cabal index 4875a0b3..a8f007c5 100644 --- a/happy.cabal +++ b/happy.cabal @@ -187,11 +187,14 @@ executable happy LALR Lexer ParseMonad + ParseMonad.Class + Parser ProduceCode ProduceGLRCode NameSet Target AttrGrammar + AttrGrammarParser ParamRules PrettyGrammar diff --git a/src/AttrGrammar.lhs b/src/AttrGrammar.lhs index 378638c0..cf3513d6 100644 --- a/src/AttrGrammar.lhs +++ b/src/AttrGrammar.lhs @@ -9,7 +9,7 @@ > ) where > import Data.Char -> import ParseMonad +> import ParseMonad.Class > data AgToken > = AgTok_LBrace diff --git a/src/AttrGrammarParser.hs b/src/AttrGrammarParser.hs new file mode 100644 index 00000000..07249012 --- /dev/null +++ b/src/AttrGrammarParser.hs @@ -0,0 +1,8 @@ +module AttrGrammarParser (module X) where + +-- We use the bootstrapped version if it is available +#ifdef HAPPY_BOOTSTRAP +import AttrGrammarParser.Bootstrapped as X +#else +import AttrGrammarParser.Oracle as X +#endif diff --git a/src/AttrGrammarParser/Bootstrapped.ly b/src/AttrGrammarParser/Bootstrapped.ly index 7c83ea96..deb584d5 100644 --- a/src/AttrGrammarParser/Bootstrapped.ly +++ b/src/AttrGrammarParser/Bootstrapped.ly @@ -7,7 +7,7 @@ or a conditional statement. > { > {-# OPTIONS_GHC -w #-} > module AttrGrammarParser.Bootstrapped (agParser) where -> import ParseMonad +> import ParseMonad.Class > import ParseMonad.Bootstrapped > import AttrGrammar > } diff --git a/src/AttrGrammarParser/Oracle.hs b/src/AttrGrammarParser/Oracle.hs index 13470765..d2eba6f1 100644 --- a/src/AttrGrammarParser/Oracle.hs +++ b/src/AttrGrammarParser/Oracle.hs @@ -10,7 +10,7 @@ module AttrGrammarParser.Oracle (agParser) where import Control.Applicative #endif -import ParseMonad +import ParseMonad.Class import ParseMonad.Oracle import AttrGrammar diff --git a/src/Grammar.lhs b/src/Grammar.lhs index b65d8c31..1afcced2 100644 --- a/src/Grammar.lhs +++ b/src/Grammar.lhs @@ -20,20 +20,9 @@ Here is our mid-section datatype > import GenUtils > import AbsSyn -> import ParseMonad +> import ParseMonad.Class > import AttrGrammar - -We use the bootstrapped version if it is available - -#ifdef HAPPY_BOOTSTRAP - -> import AttrGrammarParser.Bootstrapped - -#else - -> import AttrGrammarParser.Oracle - -#endif +> import AttrGrammarParser > import ParamRules diff --git a/src/Lexer.lhs b/src/Lexer.lhs index 162ac948..1e41df43 100644 --- a/src/Lexer.lhs +++ b/src/Lexer.lhs @@ -9,7 +9,7 @@ The lexer. > TokenId(..), > HasLexer(..) ) where -> import ParseMonad +> import ParseMonad.Class > import Data.Char ( isSpace, isAlphaNum, isDigit, digitToInt ) diff --git a/src/Main.lhs b/src/Main.lhs index 395cb73d..6aaeea04 100644 --- a/src/Main.lhs +++ b/src/Main.lhs @@ -11,22 +11,11 @@ Path settings auto-generated by Cabal: > import Paths_happy -> import ParseMonad +> import ParseMonad.Class > import AbsSyn > import Grammar > import PrettyGrammar - -We use the bootstrapped version if it is available - -#ifdef HAPPY_BOOTSTRAP - -> import Parser.Bootstrapped - -#else - -> import Parser.Oracle - -#endif +> import Parser > import First > import LALR diff --git a/src/ParseMonad.hs b/src/ParseMonad.hs index 11efaa8c..634de1bd 100644 --- a/src/ParseMonad.hs +++ b/src/ParseMonad.hs @@ -1,13 +1,8 @@ -module ParseMonad where - -type Pfunc a = String -> Int -> ParseResult a - -class HasLexer token where - lexToken :: (token -> Pfunc r) -> Pfunc r - -type ParseResult = Either String - -class Monad p => ParseMonad p where - failP :: (Int -> String) -> p a - lineP :: p Int - runFromStartP :: p a -> String -> Int -> ParseResult a +module ParseMonad (module X) where + +-- We use the bootstrapped version if it is available +#ifdef HAPPY_BOOTSTRAP +import ParseMonad.Bootstrapped as X +#else +import ParseMonad.Oracle as X +#endif diff --git a/src/ParseMonad/Bootstrapped.hs b/src/ParseMonad/Bootstrapped.hs index 2b3c929d..50fee187 100644 --- a/src/ParseMonad/Bootstrapped.hs +++ b/src/ParseMonad/Bootstrapped.hs @@ -11,7 +11,7 @@ module ParseMonad.Bootstrapped where import Control.Monad.Reader -import ParseMonad +import ParseMonad.Class type P = ReaderT (String, Int) ParseResult diff --git a/src/ParseMonad/Class.hs b/src/ParseMonad/Class.hs new file mode 100644 index 00000000..6824d768 --- /dev/null +++ b/src/ParseMonad/Class.hs @@ -0,0 +1,13 @@ +module ParseMonad.Class where + +type Pfunc a = String -> Int -> ParseResult a + +class HasLexer token where + lexToken :: (token -> Pfunc r) -> Pfunc r + +type ParseResult = Either String + +class Monad p => ParseMonad p where + failP :: (Int -> String) -> p a + lineP :: p Int + runFromStartP :: p a -> String -> Int -> ParseResult a diff --git a/src/ParseMonad/Oracle.hs b/src/ParseMonad/Oracle.hs index 6ecd5827..1e770db3 100644 --- a/src/ParseMonad/Oracle.hs +++ b/src/ParseMonad/Oracle.hs @@ -7,7 +7,7 @@ import Control.Applicative #endif import Control.Monad -import ParseMonad +import ParseMonad.Class data PState token = PS !String diff --git a/src/Parser.hs b/src/Parser.hs new file mode 100644 index 00000000..c6269df4 --- /dev/null +++ b/src/Parser.hs @@ -0,0 +1,8 @@ +module Parser (module X) where + +-- We use the bootstrapped version if it is available +#ifdef HAPPY_BOOTSTRAP +import Parser.Bootstrapped as X +#else +import Parser.Oracle as X +#endif diff --git a/src/Parser/Bootstrapped.ly b/src/Parser/Bootstrapped.ly index 1bf181cb..4ba44ff1 100644 --- a/src/Parser/Bootstrapped.ly +++ b/src/Parser/Bootstrapped.ly @@ -9,7 +9,7 @@ The parser. > { > {-# OPTIONS_GHC -w #-} > module Parser.Bootstrapped (ourParser,AbsSyn) where -> import ParseMonad +> import ParseMonad.Class > import ParseMonad.Bootstrapped > import AbsSyn > import Lexer diff --git a/src/Parser/Oracle.hs b/src/Parser/Oracle.hs index 027b2dfe..7dc510ea 100644 --- a/src/Parser/Oracle.hs +++ b/src/Parser/Oracle.hs @@ -8,7 +8,7 @@ import Control.Applicative import Control.Monad (when) import Data.Maybe (isJust) -import ParseMonad +import ParseMonad.Class import ParseMonad.Oracle import AbsSyn import Lexer From 7027f42a51817ce3c0a5bdcb9de5063221096b2a Mon Sep 17 00:00:00 2001 From: John Ericson Date: Mon, 28 Dec 2020 20:29:06 -0500 Subject: [PATCH 12/12] Attribute grammars are now only supported when bootstrapped This makes the initial boostrapping stage smaller while not affecting the second stage. --- happy.cabal | 6 +- src/AttrGrammarParser.hs | 8 -- .../Bootstrapped.ly => AttrGrammarParser.ly} | 2 +- src/AttrGrammarParser/Oracle.hs | 77 ------------------- src/Grammar.lhs | 20 ++++- tests/Makefile | 5 +- 6 files changed, 25 insertions(+), 93 deletions(-) delete mode 100644 src/AttrGrammarParser.hs rename src/{AttrGrammarParser/Bootstrapped.ly => AttrGrammarParser.ly} (97%) delete mode 100644 src/AttrGrammarParser/Oracle.hs diff --git a/happy.cabal b/happy.cabal index a8f007c5..f6bacd8a 100644 --- a/happy.cabal +++ b/happy.cabal @@ -158,7 +158,7 @@ extra-source-files: flag bootstrap description: Optimize the implementation of happy using a pre-built happy manual: True - default: False + default: True source-repository head type: git @@ -194,7 +194,6 @@ executable happy NameSet Target AttrGrammar - AttrGrammarParser ParamRules PrettyGrammar @@ -207,12 +206,11 @@ executable happy other-modules: ParseMonad.Bootstrapped Parser.Bootstrapped - AttrGrammarParser.Bootstrapped + AttrGrammarParser else other-modules: ParseMonad.Oracle Parser.Oracle - AttrGrammarParser.Oracle test-suite tests diff --git a/src/AttrGrammarParser.hs b/src/AttrGrammarParser.hs deleted file mode 100644 index 07249012..00000000 --- a/src/AttrGrammarParser.hs +++ /dev/null @@ -1,8 +0,0 @@ -module AttrGrammarParser (module X) where - --- We use the bootstrapped version if it is available -#ifdef HAPPY_BOOTSTRAP -import AttrGrammarParser.Bootstrapped as X -#else -import AttrGrammarParser.Oracle as X -#endif diff --git a/src/AttrGrammarParser/Bootstrapped.ly b/src/AttrGrammarParser.ly similarity index 97% rename from src/AttrGrammarParser/Bootstrapped.ly rename to src/AttrGrammarParser.ly index deb584d5..a00df123 100644 --- a/src/AttrGrammarParser/Bootstrapped.ly +++ b/src/AttrGrammarParser.ly @@ -6,7 +6,7 @@ or a conditional statement. > { > {-# OPTIONS_GHC -w #-} -> module AttrGrammarParser.Bootstrapped (agParser) where +> module AttrGrammarParser (agParser) where > import ParseMonad.Class > import ParseMonad.Bootstrapped > import AttrGrammar diff --git a/src/AttrGrammarParser/Oracle.hs b/src/AttrGrammarParser/Oracle.hs deleted file mode 100644 index d2eba6f1..00000000 --- a/src/AttrGrammarParser/Oracle.hs +++ /dev/null @@ -1,77 +0,0 @@ --- This parser parses the contents of the attribute grammar --- into a list of rules. A rule can either be an assignment --- to an attribute of the LHS (synthesized attribute), and --- assignment to an attribute of the RHS (an inherited attribute), --- or a conditional statement. - -module AttrGrammarParser.Oracle (agParser) where - -#if !MIN_VERSION_base(4,8,0) -import Control.Applicative -#endif - -import ParseMonad.Class -import ParseMonad.Oracle -import AttrGrammar - -type Parser = P AgToken - -agParser :: Parser [AgRule] -agParser = manySepByP isSemi optRuleP - -optRuleP :: Parser (Maybe AgRule) -optRuleP = withToken match where - match (AgTok_SelfRef v) = - Consume `andThenJust` - pure (SelfAssign v) <* eqP <*> codeP - match (AgTok_SubRef v) = - Consume `andThenJust` - pure (SubAssign v) <* eqP <*> codeP - match (AgTok_RightmostRef v) = - Consume `andThenJust` - pure (RightmostAssign v) <* eqP <*> codeP - match AgTok_Where = - Consume `andThenJust` - fmap Conditional codeP - match tok = PutBack tok `andReturn` Nothing - -eqP :: Parser AgToken -eqP = withToken match where - match tok@AgTok_Eq = Consume `andReturn` tok - match tok = PutBack tok `andThen` parseError "Expected '='" - -rBraceP :: Parser AgToken -rBraceP = withToken match where - match tok@AgTok_RBrace = Consume `andReturn` tok - match tok = PutBack tok `andThen` parseError "Expected '}'" - -codeP :: Parser [AgToken] -codeP = codeP' False - -codeP' :: Bool -> Parser [AgToken] -codeP' consume_semi = withToken match where - match tok@AgTok_LBrace = - Consume `andThen` do - c1 <- codeP' True - tok' <- rBraceP - c2 <- codeP' consume_semi - return $ [tok] ++ c1 ++ [tok'] ++ c2 - match tok = - let consume = Consume `andThen` do - c <- codeP' consume_semi - return (tok : c) - in case tok of - AgTok_Semicolon | consume_semi -> consume - AgTok_Eq -> consume - AgTok_SelfRef _ -> consume - AgTok_SubRef _ -> consume - AgTok_RightmostRef _ -> consume - AgTok_Unknown _ -> consume - _ -> PutBack tok `andReturn` [] - -isSemi :: AgToken -> Bool -isSemi AgTok_Semicolon = True -isSemi _ = False - -parseError :: String -> Parser a -parseError s = failP $ \l -> show l ++ ": " ++ s ++ "\n" diff --git a/src/Grammar.lhs b/src/Grammar.lhs index 1afcced2..bf91009d 100644 --- a/src/Grammar.lhs +++ b/src/Grammar.lhs @@ -1,4 +1,4 @@ ------------------------------------------------------------------------------ +/----------------------------------------------------------------------------- The Grammar data type. (c) 1993-2001 Andy Gill, Simon Marlow @@ -20,9 +20,15 @@ Here is our mid-section datatype > import GenUtils > import AbsSyn +#ifdef HAPPY_BOOTSTRAP > import ParseMonad.Class > import AttrGrammar +#endif + +This is only supported in the bootstrapped version +#ifdef HAPPY_BOOTSTRAP > import AttrGrammarParser +#endif > import ParamRules @@ -413,13 +419,22 @@ So is this. > checkCode :: Int -> [Name] -> [Name] -> String -> [(String,String)] -> M (String,[Int]) > checkCode arity _ _ code [] = doCheckCode arity code + +#ifdef HAPPY_BOOTSTRAP > checkCode arity lhs nonterm_names code attrs = rewriteAttributeGrammar arity lhs nonterm_names code attrs +#else +> checkCode arity _ _ code (_:_) = do +> addErr "Attribute grammars are not supported in non-bootstrapped build" +> doCheckCode arity code +#endif ------------------------------------------------------------------------------ -- Special processing for attribute grammars. We re-parse the body of the code -- block and output the nasty-looking record manipulation and let binding goop -- +#ifdef HAPPY_BOOTSTRAP + > rewriteAttributeGrammar :: Int -> [Name] -> [Name] -> String -> [(String,String)] -> M (String,[Int]) > rewriteAttributeGrammar arity lhs nonterm_names code attrs = @@ -471,7 +486,6 @@ So is this. > checkArity x = when (x > arity) $ addErr (show x++" out of range") - ------------------------------------------------------------------------------------ -- Actually emit the code for the record bindings and conditionals -- @@ -540,6 +554,8 @@ So is this. > formatToken (AgTok_Unknown x) = x++" " > formatToken AgTok_EOF = error "formatToken AgTok_EOF" +#endif + ----------------------------------------------------------------------------- -- Check for every $i that i is <= the arity of the rule. diff --git a/tests/Makefile b/tests/Makefile index 93130715..b839425d 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -27,11 +27,14 @@ TESTS = Test.ly TestMulti.ly TestPrecedence.ly bug001.ly \ monad001.y monad002.ly precedence001.ly precedence002.y \ bogus-token.y bug002.y Partial.ly \ issue91.y issue93.y issue94.y issue95.y \ - AttrGrammar001.y AttrGrammar002.y \ test_rules.y monaderror.y monaderror-explist.y \ typeclass_monad001.y typeclass_monad002.ly typeclass_monad_lexer.y \ rank2.y shift01.y +ifdef HAPPY_BOOTSTRAP +TESTS += AttrGrammar001.y AttrGrammar002.y +endif + ERROR_TESTS = error001.y # NOTE: `cabal` will set the `happy_datadir` env-var accordingly before invoking the test-suite