Skip to content

Commit fcaca24

Browse files
Optional bootstrapping (#175)
Before this patch, building 'happy' required a pre-built binary of 'happy'. This was elegant in the same way a self-hosting compiler is elegant. But it also made building purely from source more complicated than needed. Now, we have a subset of the functionality also written with parser combinators. One can build that minimal happy from source, and then bootstrap the regular one from it. This simplifies the build process considerably, along with helping with various (albeit mostly theoretical) trust issues. The parser combinators are bespoke now, but since they need not be performant we will probably just switch to `ReadP` from base, later. Co-authored-by: Vladislav Zavialov <[email protected]>
1 parent 75dbfd2 commit fcaca24

19 files changed

+512
-125
lines changed

.appveyor.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,6 @@ install:
2929
- "cabal %CABOPTS% v2-update -vverbose+nowrap"
3030
- "cabal %CABOPTS% v2-install alex --bindir=/hsbin"
3131
- "alex --version"
32-
- "cabal %CABOPTS% v2-install happy --bindir=/hsbin"
33-
- "happy --version"
3432

3533
build: off
3634

.travis.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ env:
1717
before_install:
1818
- sudo add-apt-repository -y ppa:hvr/ghc
1919
- sudo apt-get update
20-
- sudo apt-get install alex-3.1.7 happy-1.19.5 cabal-install-3.4 ghc-$GHCVER
21-
- export PATH=/opt/cabal/3.4/bin:/opt/ghc/$GHCVER/bin:/opt/alex/3.1.7/bin:/opt/happy/1.19.5/bin:$PATH
20+
- sudo apt-get install alex-3.1.7 cabal-install-3.4 ghc-$GHCVER
21+
- export PATH=/opt/cabal/3.4/bin:/opt/ghc/$GHCVER/bin:/opt/alex/3.1.7/bin:$PATH
2222

2323
install:
2424
- cabal update

Makefile

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,12 @@
11
CABAL = cabal
22

3-
HAPPY = happy
4-
HAPPY_OPTS = -agc
53
HAPPY_VER = `awk '/^version:/ { print $$2 }' happy.cabal`
64

75
ALEX = alex
86
ALEX_OPTS = -g
97

108
SDIST_DIR=dist-newstyle/sdist
119

12-
GEN = src/gen/Parser.hs src/gen/AttrGrammarParser.hs
13-
14-
all : $(GEN)
15-
16-
src/gen/%.hs : src/boot/%.ly
17-
$(HAPPY) $(HAPPYFLAGS) $< -o $@
18-
1910
sdist ::
2011
@case "`$(CABAL) --numeric-version`" in \
2112
2.[2-9].* | [3-9].* ) ;; \
@@ -25,10 +16,6 @@ sdist ::
2516
echo "Error: Tree is not clean"; \
2617
exit 1; \
2718
fi
28-
$(HAPPY) $(HAPPY_OPTS) src/Parser.ly -o src/Parser.hs
29-
$(HAPPY) $(HAPPY_OPTS) src/AttrGrammarParser.ly -o src/AttrGrammarParser.hs
30-
mv src/Parser.ly src/Parser.ly.boot
31-
mv src/AttrGrammarParser.ly src/AttrGrammarParser.ly.boot
3219
$(CABAL) v2-run gen-happy-sdist
3320
$(CABAL) v2-sdist
3421
@if [ ! -f "${SDIST_DIR}/happy-$(HAPPY_VER).tar.gz" ]; then \
@@ -49,7 +36,14 @@ sdist-test-only ::
4936
rm -rf "${SDIST_DIR}/happy-$(HAPPY_VER)/"
5037
tar -xf "${SDIST_DIR}/happy-$(HAPPY_VER).tar.gz" -C ${SDIST_DIR}/
5138
echo "packages: ." > "${SDIST_DIR}/happy-$(HAPPY_VER)/cabal.project"
52-
cd "${SDIST_DIR}/happy-$(HAPPY_VER)/" && cabal v2-test --enable-tests all
39+
echo "tests: True" >> "${SDIST_DIR}/happy-$(HAPPY_VER)/cabal.project"
40+
cd "${SDIST_DIR}/happy-$(HAPPY_VER)/" \
41+
&& cabal v2-build all --flag -bootstrap \
42+
&& cabal v2-install --flag -bootstrap --installdir="./bootstrap-root" \
43+
&& cabal v2-test all -j --flag -bootstrap \
44+
&& export PATH=./bootstrap-root:$$PATH \
45+
&& cabal v2-build all --flag +bootstrap \
46+
&& cabal v2-test all -j --flag +bootstrap
5347
@echo ""
5448
@echo "Success! ${SDIST_DIR}/happy-$(HAPPY_VER).tar.gz is ready for distribution!"
5549
@echo ""

examples/ErlParser.ly

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ Author : Simon Marlow <[email protected]>
1313
> import Lexer
1414
> import AbsSyn
1515
> import Types
16-
> import ParseMonad
16+
> import ParseMonad.Class
1717
> }
1818

1919
> %token

happy.cabal

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,11 @@ extra-source-files:
155155
tests/rank2.y
156156
tests/shift01.y
157157

158+
flag bootstrap
159+
description: Optimize the implementation of happy using a pre-built happy
160+
manual: True
161+
default: True
162+
158163
source-repository head
159164
type: git
160165
location: https://github.com/simonmar/happy.git
@@ -182,16 +187,32 @@ executable happy
182187
LALR
183188
Lexer
184189
ParseMonad
190+
ParseMonad.Class
185191
Parser
186192
ProduceCode
187193
ProduceGLRCode
188194
NameSet
189195
Target
190196
AttrGrammar
191-
AttrGrammarParser
192197
ParamRules
193198
PrettyGrammar
194199

200+
if flag(bootstrap)
201+
-- TODO put this back when Cabal can use it's qualified goals to better
202+
-- understand bootstrapping, see
203+
-- https://github.com/haskell/cabal/issues/7189
204+
--build-tools: happy
205+
cpp-options: -DHAPPY_BOOTSTRAP
206+
other-modules:
207+
ParseMonad.Bootstrapped
208+
Parser.Bootstrapped
209+
AttrGrammarParser
210+
else
211+
other-modules:
212+
ParseMonad.Oracle
213+
Parser.Oracle
214+
215+
195216
test-suite tests
196217
type: exitcode-stdio-1.0
197218
main-is: test.hs
@@ -200,4 +221,3 @@ test-suite tests
200221

201222
build-depends: base, process
202223
default-language: Haskell98
203-

src/AttrGrammar.lhs

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
> module AttrGrammar
22
> ( AgToken (..)
33
> , AgRule (..)
4+
> , HasLexer (..)
45
> , agLexAll
5-
> , agLexer
66
> , subRefVal
77
> , selfRefVal
88
> , rightRefVal
99
> ) where
1010

1111
> import Data.Char
12-
> import ParseMonad
12+
> import ParseMonad.Class
1313

1414
> data AgToken
1515
> = AgTok_LBrace
@@ -62,32 +62,30 @@
6262
-- will wreck column alignment so attribute grammar specifications must
6363
-- not rely on layout.
6464
65-
> type Pfunc a = String -> Int -> ParseResult a
66-
67-
> agLexAll :: P [AgToken]
68-
> agLexAll = mkP $ aux []
65+
> agLexAll :: String -> Int -> ParseResult [AgToken]
66+
> agLexAll = aux []
6967
> where aux toks [] _ = Right (reverse toks)
70-
> aux toks s l = agLexer' (\t -> aux (t:toks)) s l
68+
> aux toks s l = agLexer (\t -> aux (t:toks)) s l
7169
72-
> agLexer :: (AgToken -> P a) -> P a
73-
> agLexer m = mkP $ agLexer' (\x -> runP (m x))
70+
> instance HasLexer AgToken where
71+
> lexToken = agLexer
7472
75-
> agLexer' :: (AgToken -> Pfunc a) -> Pfunc a
76-
> agLexer' cont [] = cont AgTok_EOF []
77-
> agLexer' cont ('{':rest) = cont AgTok_LBrace rest
78-
> agLexer' cont ('}':rest) = cont AgTok_RBrace rest
79-
> agLexer' cont (';':rest) = cont AgTok_Semicolon rest
80-
> agLexer' cont ('=':rest) = cont AgTok_Eq rest
81-
> agLexer' cont ('w':'h':'e':'r':'e':rest) = cont AgTok_Where rest
82-
> agLexer' cont ('$':'$':rest) = agLexAttribute cont (\a -> AgTok_SelfRef a) rest
83-
> agLexer' cont ('$':'>':rest) = agLexAttribute cont (\a -> AgTok_RightmostRef a) rest
84-
> agLexer' cont s@('$':rest) =
73+
> agLexer :: (AgToken -> Pfunc a) -> Pfunc a
74+
> agLexer cont [] = cont AgTok_EOF []
75+
> agLexer cont ('{':rest) = cont AgTok_LBrace rest
76+
> agLexer cont ('}':rest) = cont AgTok_RBrace rest
77+
> agLexer cont (';':rest) = cont AgTok_Semicolon rest
78+
> agLexer cont ('=':rest) = cont AgTok_Eq rest
79+
> agLexer cont ('w':'h':'e':'r':'e':rest) = cont AgTok_Where rest
80+
> agLexer cont ('$':'$':rest) = agLexAttribute cont (\a -> AgTok_SelfRef a) rest
81+
> agLexer cont ('$':'>':rest) = agLexAttribute cont (\a -> AgTok_RightmostRef a) rest
82+
> agLexer cont s@('$':rest) =
8583
> let (n,rest') = span isDigit rest
8684
> in if null n
8785
> then agLexUnknown cont s
8886
> else agLexAttribute cont (\a -> AgTok_SubRef (read n,a)) rest'
89-
> agLexer' cont s@(c:rest)
90-
> | isSpace c = agLexer' cont (dropWhile isSpace rest)
87+
> agLexer cont s@(c:rest)
88+
> | isSpace c = agLexer cont (dropWhile isSpace rest)
9189
> | otherwise = agLexUnknown cont s
9290
9391
> agLexUnknown :: (AgToken -> Pfunc a) -> Pfunc a

src/AttrGrammarParser.ly

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ or a conditional statement.
77
> {
88
> {-# OPTIONS_GHC -w #-}
99
> module AttrGrammarParser (agParser) where
10-
> import ParseMonad
10+
> import ParseMonad.Class
11+
> import ParseMonad.Bootstrapped
1112
> import AttrGrammar
1213
> }
1314

@@ -25,7 +26,7 @@ or a conditional statement.
2526
> unknown { AgTok_Unknown _ }
2627
>
2728
> %monad { P }
28-
> %lexer { agLexer } { AgTok_EOF }
29+
> %lexer { lexTokenP } { AgTok_EOF }
2930

3031
> %%
3132

@@ -64,5 +65,5 @@ or a conditional statement.
6465

6566
> {
6667
> happyError :: P a
67-
> happyError = failP ("Parse error\n")
68+
> happyError = failP (\l -> show l ++ ": Parse error\n")
6869
> }

src/Grammar.lhs

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
-----------------------------------------------------------------------------
1+
/-----------------------------------------------------------------------------
22
The Grammar data type.
33

44
(c) 1993-2001 Andy Gill, Simon Marlow
@@ -20,9 +20,16 @@ Here is our mid-section datatype
2020

2121
> import GenUtils
2222
> import AbsSyn
23-
> import ParseMonad
23+
#ifdef HAPPY_BOOTSTRAP
24+
> import ParseMonad.Class
2425
> import AttrGrammar
26+
#endif
27+
28+
This is only supported in the bootstrapped version
29+
#ifdef HAPPY_BOOTSTRAP
2530
> import AttrGrammarParser
31+
#endif
32+
2633
> import ParamRules
2734

2835
> import Data.Array
@@ -412,19 +419,28 @@ So is this.
412419

413420
> checkCode :: Int -> [Name] -> [Name] -> String -> [(String,String)] -> M (String,[Int])
414421
> checkCode arity _ _ code [] = doCheckCode arity code
422+
423+
#ifdef HAPPY_BOOTSTRAP
415424
> checkCode arity lhs nonterm_names code attrs = rewriteAttributeGrammar arity lhs nonterm_names code attrs
425+
#else
426+
> checkCode arity _ _ code (_:_) = do
427+
> addErr "Attribute grammars are not supported in non-bootstrapped build"
428+
> doCheckCode arity code
429+
#endif
416430

417431
------------------------------------------------------------------------------
418432
-- Special processing for attribute grammars. We re-parse the body of the code
419433
-- block and output the nasty-looking record manipulation and let binding goop
420434
--
421435

436+
#ifdef HAPPY_BOOTSTRAP
437+
422438
> rewriteAttributeGrammar :: Int -> [Name] -> [Name] -> String -> [(String,String)] -> M (String,[Int])
423439
> rewriteAttributeGrammar arity lhs nonterm_names code attrs =
424440

425441
first we need to parse the body of the code block
426442

427-
> case runP agParser code 0 of
443+
> case runFromStartP agParser code 0 of
428444
> Left msg -> do addErr ("error in attribute grammar rules: "++msg)
429445
> return ("",[])
430446
> Right rules ->
@@ -470,7 +486,6 @@ So is this.
470486
> checkArity x = when (x > arity) $ addErr (show x++" out of range")
471487

472488

473-
474489
------------------------------------------------------------------------------------
475490
-- Actually emit the code for the record bindings and conditionals
476491
--
@@ -539,6 +554,8 @@ So is this.
539554
> formatToken (AgTok_Unknown x) = x++" "
540555
> formatToken AgTok_EOF = error "formatToken AgTok_EOF"
541556

557+
#endif
558+
542559

543560
-----------------------------------------------------------------------------
544561
-- Check for every $i that i is <= the arity of the rule.

0 commit comments

Comments
 (0)