Skip to content

Commit 1f24a71

Browse files
authored
bpo-35808: Retire pgen and use pgen2 to generate the parser (GH-11814)
Pgen is the oldest piece of technology in the CPython repository, building it requires various #if[n]def PGEN hacks in other parts of the code and it also depends more and more on CPython internals. This commit removes the old pgen C code and replaces it for a new version implemented in pure Python. This is a modified and adapted version of lib2to3/pgen2 that can generate grammar files compatibles with the current parser. This commit also eliminates all the #ifdef and code branches related to pgen, simplifying the code and making it more maintainable. The regen-grammar step now uses $(PYTHON_FOR_REGEN) that can be any version of the interpreter, so the new pgen code maintains compatibility with older versions of the interpreter (this also allows regenerating the grammar with the current CI solution that uses Python3.5). The new pgen Python module also makes use of the Grammar/Tokens file that holds the token specification, so is always kept in sync and avoids having to maintain duplicate token definitions.
1 parent 7eebbbd commit 1f24a71

27 files changed

+1480
-2684
lines changed

.gitignore

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,6 @@ PCbuild/arm32/
7373
PCbuild/obj/
7474
PCbuild/win32/
7575
.purify
76-
Parser/pgen
77-
Parser/pgen.exe
7876
__pycache__
7977
autom4te.cache
8078
build/

Include/metagrammar.h

Lines changed: 0 additions & 18 deletions
This file was deleted.

Include/parsetok.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,7 @@ extern "C" {
1212

1313
typedef struct {
1414
int error;
15-
#ifndef PGEN
16-
/* The filename is useless for pgen, see comment in tok_state structure */
1715
PyObject *filename;
18-
#endif
1916
int lineno;
2017
int offset;
2118
char *text; /* UTF-8-encoded string */

Include/pgen.h

Lines changed: 0 additions & 18 deletions
This file was deleted.

Makefile.pre.in

Lines changed: 4 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -290,40 +290,21 @@ LIBFFI_INCLUDEDIR= @LIBFFI_INCLUDEDIR@
290290

291291
##########################################################################
292292
# Parser
293-
PGEN= Parser/pgen$(EXE)
294-
295293
POBJS= \
296294
Parser/acceler.o \
297295
Parser/grammar1.o \
298296
Parser/listnode.o \
299297
Parser/node.o \
300298
Parser/parser.o \
301-
Parser/bitset.o \
302-
Parser/metagrammar.o \
303-
Parser/firstsets.o \
304-
Parser/grammar.o \
305299
Parser/token.o \
306-
Parser/pgen.o
307300

308301
PARSER_OBJS= $(POBJS) Parser/myreadline.o Parser/parsetok.o Parser/tokenizer.o
309302

310-
PGOBJS= \
311-
Objects/obmalloc.o \
312-
Python/dynamic_annotations.o \
313-
Python/mysnprintf.o \
314-
Python/pyctype.o \
315-
Parser/tokenizer_pgen.o \
316-
Parser/printgrammar.o \
317-
Parser/parsetok_pgen.o \
318-
Parser/pgenmain.o
319-
320303
PARSER_HEADERS= \
321304
$(srcdir)/Parser/parser.h \
322305
$(srcdir)/Include/parsetok.h \
323306
$(srcdir)/Parser/tokenizer.h
324307

325-
PGENOBJS= $(POBJS) $(PGOBJS)
326-
327308
##########################################################################
328309
# Python
329310

@@ -802,31 +783,18 @@ Python/sysmodule.o: $(srcdir)/Python/sysmodule.c Makefile
802783

803784
$(IO_OBJS): $(IO_H)
804785

805-
$(PGEN): $(PGENOBJS)
806-
$(CC) $(OPT) $(PY_CORE_LDFLAGS) $(PGENOBJS) $(LIBS) -o $(PGEN)
807-
808786
.PHONY: regen-grammar
809-
regen-grammar: $(PGEN)
787+
regen-grammar: regen-token
810788
# Regenerate Include/graminit.h and Python/graminit.c
811789
# from Grammar/Grammar using pgen
812790
@$(MKDIR_P) Include
813-
$(PGEN) $(srcdir)/Grammar/Grammar \
791+
$(PYTHON_FOR_REGEN) -m Parser.pgen $(srcdir)/Grammar/Grammar \
792+
$(srcdir)/Grammar/Tokens \
814793
$(srcdir)/Include/graminit.h.new \
815794
$(srcdir)/Python/graminit.c.new
816795
$(UPDATE_FILE) $(srcdir)/Include/graminit.h $(srcdir)/Include/graminit.h.new
817796
$(UPDATE_FILE) $(srcdir)/Python/graminit.c $(srcdir)/Python/graminit.c.new
818797

819-
Parser/grammar.o: $(srcdir)/Parser/grammar.c \
820-
$(srcdir)/Include/token.h \
821-
$(srcdir)/Include/grammar.h
822-
Parser/metagrammar.o: $(srcdir)/Parser/metagrammar.c
823-
824-
Parser/tokenizer_pgen.o: $(srcdir)/Parser/tokenizer.c
825-
Parser/parsetok_pgen.o: $(srcdir)/Parser/parsetok.c
826-
Parser/printgrammar.o: $(srcdir)/Parser/printgrammar.c
827-
828-
Parser/pgenmain.o: $(srcdir)/Include/parsetok.h
829-
830798
.PHONY=regen-ast
831799
regen-ast:
832800
# Regenerate Include/Python-ast.h using Parser/asdl_c.py -h
@@ -1016,7 +984,6 @@ PYTHON_HEADERS= \
1016984
$(srcdir)/Include/longobject.h \
1017985
$(srcdir)/Include/marshal.h \
1018986
$(srcdir)/Include/memoryobject.h \
1019-
$(srcdir)/Include/metagrammar.h \
1020987
$(srcdir)/Include/methodobject.h \
1021988
$(srcdir)/Include/modsupport.h \
1022989
$(srcdir)/Include/moduleobject.h \
@@ -1028,7 +995,6 @@ PYTHON_HEADERS= \
1028995
$(srcdir)/Include/osdefs.h \
1029996
$(srcdir)/Include/osmodule.h \
1030997
$(srcdir)/Include/patchlevel.h \
1031-
$(srcdir)/Include/pgen.h \
1032998
$(srcdir)/Include/pgenheaders.h \
1033999
$(srcdir)/Include/pyarena.h \
10341000
$(srcdir)/Include/pycapsule.h \
@@ -1771,7 +1737,7 @@ profile-removal:
17711737
rm -f profile-run-stamp
17721738

17731739
clobber: clean profile-removal
1774-
-rm -f $(BUILDPYTHON) $(PGEN) $(LIBRARY) $(LDLIBRARY) $(DLLLIBRARY) \
1740+
-rm -f $(BUILDPYTHON) $(LIBRARY) $(LDLIBRARY) $(DLLLIBRARY) \
17751741
tags TAGS \
17761742
config.cache config.log pyconfig.h Modules/config.c
17771743
-rm -rf build platform
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Retire pgen and use a modified version of pgen2 to generate the parser.
2+
Patch by Pablo Galindo.

Misc/coverity_model.c

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -92,14 +92,6 @@ wchar_t *Py_DecodeLocale(const char* arg, size_t *size)
9292
return w;
9393
}
9494

95-
/* Parser/pgenmain.c */
96-
grammar *getgrammar(const char *filename)
97-
{
98-
grammar *g;
99-
__coverity_tainted_data_sink__(filename);
100-
return g;
101-
}
102-
10395
/* Python/marshal.c */
10496

10597
static Py_ssize_t r_string(char *s, Py_ssize_t n, RFILE *p)

PCbuild/pythoncore.vcxproj

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,6 @@
161161
<ClInclude Include="..\Include\longobject.h" />
162162
<ClInclude Include="..\Include\marshal.h" />
163163
<ClInclude Include="..\Include\memoryobject.h" />
164-
<ClInclude Include="..\Include\metagrammar.h" />
165164
<ClInclude Include="..\Include\methodobject.h" />
166165
<ClInclude Include="..\Include\modsupport.h" />
167166
<ClInclude Include="..\Include\moduleobject.h" />
@@ -175,7 +174,6 @@
175174
<ClInclude Include="..\Include\osmodule.h" />
176175
<ClInclude Include="..\Include\parsetok.h" />
177176
<ClInclude Include="..\Include\patchlevel.h" />
178-
<ClInclude Include="..\Include\pgen.h" />
179177
<ClInclude Include="..\Include\pgenheaders.h" />
180178
<ClInclude Include="..\Include\pyhash.h" />
181179
<ClInclude Include="..\Include\py_curses.h" />
@@ -372,12 +370,8 @@
372370
<ClCompile Include="..\Objects\unicodeobject.c" />
373371
<ClCompile Include="..\Objects\weakrefobject.c" />
374372
<ClCompile Include="..\Parser\acceler.c" />
375-
<ClCompile Include="..\Parser\bitset.c" />
376-
<ClCompile Include="..\Parser\firstsets.c" />
377-
<ClCompile Include="..\Parser\grammar.c" />
378373
<ClCompile Include="..\Parser\grammar1.c" />
379374
<ClCompile Include="..\Parser\listnode.c" />
380-
<ClCompile Include="..\Parser\metagrammar.c" />
381375
<ClCompile Include="..\Parser\myreadline.c" />
382376
<ClCompile Include="..\Parser\node.c" />
383377
<ClCompile Include="..\Parser\parser.c" />

PCbuild/pythoncore.vcxproj.filters

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -234,9 +234,6 @@
234234
<ClInclude Include="..\Include\memoryobject.h">
235235
<Filter>Include</Filter>
236236
</ClInclude>
237-
<ClInclude Include="..\Include\metagrammar.h">
238-
<Filter>Include</Filter>
239-
</ClInclude>
240237
<ClInclude Include="..\Include\methodobject.h">
241238
<Filter>Include</Filter>
242239
</ClInclude>
@@ -270,9 +267,6 @@
270267
<ClInclude Include="..\Include\patchlevel.h">
271268
<Filter>Include</Filter>
272269
</ClInclude>
273-
<ClInclude Include="..\Include\pgen.h">
274-
<Filter>Include</Filter>
275-
</ClInclude>
276270
<ClInclude Include="..\Include\pgenheaders.h">
277271
<Filter>Include</Filter>
278272
</ClInclude>
@@ -836,24 +830,12 @@
836830
<ClCompile Include="..\Parser\acceler.c">
837831
<Filter>Parser</Filter>
838832
</ClCompile>
839-
<ClCompile Include="..\Parser\bitset.c">
840-
<Filter>Parser</Filter>
841-
</ClCompile>
842-
<ClCompile Include="..\Parser\firstsets.c">
843-
<Filter>Parser</Filter>
844-
</ClCompile>
845-
<ClCompile Include="..\Parser\grammar.c">
846-
<Filter>Parser</Filter>
847-
</ClCompile>
848833
<ClCompile Include="..\Parser\grammar1.c">
849834
<Filter>Parser</Filter>
850835
</ClCompile>
851836
<ClCompile Include="..\Parser\listnode.c">
852837
<Filter>Parser</Filter>
853838
</ClCompile>
854-
<ClCompile Include="..\Parser\metagrammar.c">
855-
<Filter>Parser</Filter>
856-
</ClCompile>
857839
<ClCompile Include="..\Parser\myreadline.c">
858840
<Filter>Parser</Filter>
859841
</ClCompile>

Parser/bitset.c

Lines changed: 0 additions & 66 deletions
This file was deleted.

0 commit comments

Comments
 (0)