Skip to content

Commit 29c4ba9

Browse files
author
Fredrik Lundh
committed
SRE 0.9.8: passes the entire test suite
-- reverted REPEAT operator to use "repeat context" strategy (from 0.8.X), but done right this time. -- got rid of backtracking stack; use nested SRE_MATCH calls instead (should probably put it back again in 0.9.9 ;-) -- properly reset state in scanner mode -- don't use aggressive inlining by default
1 parent 19c6afb commit 29c4ba9

File tree

7 files changed

+370
-536
lines changed

7 files changed

+370
-536
lines changed

Lib/sre.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,12 @@
55
#
66
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
77
#
8+
# This version of the SRE library can be redistributed under CNRI's
9+
# Python 1.6 license. For any other use, please contact Secret Labs
10+
11+
#
812
# Portions of this engine have been developed in cooperation with
9-
# CNRI. Hewlett-Packard provided funding for 2.0 integration and
13+
# CNRI. Hewlett-Packard provided funding for 1.6 integration and
1014
# other compatibility work.
1115
#
1216

@@ -24,7 +28,7 @@
2428
S = DOTALL = sre_compile.SRE_FLAG_DOTALL
2529
X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE
2630

27-
# sre extensions (may or may not be in 2.0 final)
31+
# sre extensions (may or may not be in 1.6/2.0 final)
2832
T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE
2933
U = UNICODE = sre_compile.SRE_FLAG_UNICODE
3034

@@ -168,15 +172,14 @@ def _pickle(p):
168172

169173
class Scanner:
170174
def __init__(self, lexicon):
171-
from sre_constants import BRANCH, SUBPATTERN, INDEX
175+
from sre_constants import BRANCH, SUBPATTERN
172176
self.lexicon = lexicon
173177
# combine phrases into a compound pattern
174178
p = []
175179
s = sre_parse.Pattern()
176180
for phrase, action in lexicon:
177181
p.append(sre_parse.SubPattern(s, [
178-
(SUBPATTERN, (None, sre_parse.parse(phrase))),
179-
(INDEX, len(p))
182+
(SUBPATTERN, (len(p), sre_parse.parse(phrase))),
180183
]))
181184
p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
182185
s.groups = len(p)

Lib/sre_compile.py

+29-35
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@
55
#
66
# Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
77
#
8-
# Portions of this engine have been developed in cooperation with
9-
# CNRI. Hewlett-Packard provided funding for 2.0 integration and
10-
# other compatibility work.
8+
# See the sre.py file for information on usage and redistribution.
119
#
1210

1311
import _sre
@@ -124,6 +122,7 @@ def fixup(literal, flags=flags):
124122
emit(CHCODES[CATEGORY_NOT_LINEBREAK])
125123
elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
126124
if flags & SRE_FLAG_TEMPLATE:
125+
raise error, "internal: unsupported template operator"
127126
emit(OPCODES[REPEAT])
128127
skip = len(code); emit(0)
129128
emit(av[0])
@@ -136,39 +135,33 @@ def fixup(literal, flags=flags):
136135
if lo == 0:
137136
raise error, "nothing to repeat"
138137
if 0 and lo == hi == 1 and op is MAX_REPEAT:
139-
# FIXME: <fl> need a better way to figure out when
140-
# it's safe to use this one (in the parser, probably)
141-
emit(OPCODES[MAX_REPEAT_ONE])
138+
# FIXME: <fl> fast and wrong (but we'll fix that)
139+
emit(OPCODES[REPEAT_ONE])
142140
skip = len(code); emit(0)
143141
emit(av[0])
144142
emit(av[1])
145143
_compile(code, av[2], flags)
146144
emit(OPCODES[SUCCESS])
147145
code[skip] = len(code) - skip
148146
else:
149-
emit(OPCODES[op])
147+
emit(OPCODES[REPEAT])
150148
skip = len(code); emit(0)
151149
emit(av[0])
152150
emit(av[1])
153-
mark = MAXCODE
154-
if av[2][0][0] == SUBPATTERN:
155-
# repeated subpattern
156-
gid, foo = av[2][0][1]
157-
if gid:
158-
mark = (gid-1)*2
159-
emit(mark)
160151
_compile(code, av[2], flags)
161-
emit(OPCODES[SUCCESS])
162152
code[skip] = len(code) - skip
153+
if op == MAX_REPEAT:
154+
emit(OPCODES[MAX_UNTIL])
155+
else:
156+
emit(OPCODES[MIN_UNTIL])
163157
elif op is SUBPATTERN:
164-
gid = av[0]
165-
if gid:
158+
if av[0]:
166159
emit(OPCODES[MARK])
167-
emit((gid-1)*2)
160+
emit((av[0]-1)*2)
168161
_compile(code, av[1], flags)
169-
if gid:
162+
if av[0]:
170163
emit(OPCODES[MARK])
171-
emit((gid-1)*2+1)
164+
emit((av[0]-1)*2+1)
172165
elif op in (SUCCESS, FAILURE):
173166
emit(OPCODES[op])
174167
elif op in (ASSERT, ASSERT_NOT):
@@ -197,11 +190,10 @@ def fixup(literal, flags=flags):
197190
else:
198191
emit(ATCODES[av])
199192
elif op is BRANCH:
193+
emit(OPCODES[op])
200194
tail = []
201195
for av in av[1]:
202-
emit(OPCODES[op])
203196
skip = len(code); emit(0)
204-
emit(MAXCODE) # save mark
205197
_compile(code, av, flags)
206198
emit(OPCODES[JUMP])
207199
tail.append(len(code)); emit(0)
@@ -223,9 +215,6 @@ def fixup(literal, flags=flags):
223215
else:
224216
emit(OPCODES[op])
225217
emit(av-1)
226-
elif op in (MARK, INDEX):
227-
emit(OPCODES[op])
228-
emit(av)
229218
else:
230219
raise ValueError, ("unsupported operand type", op)
231220

@@ -294,16 +283,7 @@ def _compile_info(code, pattern, flags):
294283
except NameError:
295284
pass
296285

297-
def compile(p, flags=0):
298-
# internal: convert pattern list to internal format
299-
300-
# compile, as necessary
301-
if type(p) in STRING_TYPES:
302-
import sre_parse
303-
pattern = p
304-
p = sre_parse.parse(p, flags)
305-
else:
306-
pattern = None
286+
def _compile1(p, flags):
307287

308288
flags = p.pattern.flags | flags
309289
code = []
@@ -316,6 +296,20 @@ def compile(p, flags=0):
316296

317297
code.append(OPCODES[SUCCESS])
318298

299+
return code
300+
301+
def compile(p, flags=0):
302+
# internal: convert pattern list to internal format
303+
304+
if type(p) in STRING_TYPES:
305+
import sre_parse
306+
pattern = p
307+
p = sre_parse.parse(p, flags)
308+
else:
309+
pattern = None
310+
311+
code = _compile1(p, flags)
312+
319313
# print code
320314

321315
# FIXME: <fl> get rid of this limitation!

Lib/sre_constants.py

+8-10
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,7 @@
66
#
77
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
88
#
9-
# Portions of this engine have been developed in cooperation with
10-
# CNRI. Hewlett-Packard provided funding for 2.0 integration and
11-
# other compatibility work.
9+
# See the sre.py file for information on usage and redistribution.
1210
#
1311

1412
# should this really be here?
@@ -33,15 +31,15 @@ class error(Exception):
3331
GROUPREF_IGNORE = "groupref_ignore"
3432
IN = "in"
3533
IN_IGNORE = "in_ignore"
36-
INDEX = "index"
3734
INFO = "info"
3835
JUMP = "jump"
3936
LITERAL = "literal"
4037
LITERAL_IGNORE = "literal_ignore"
4138
MARK = "mark"
4239
MAX_REPEAT = "max_repeat"
43-
MAX_REPEAT_ONE = "max_repeat_one"
40+
MAX_UNTIL = "max_until"
4441
MIN_REPEAT = "min_repeat"
42+
MIN_UNTIL = "min_until"
4543
NEGATE = "negate"
4644
NOT_LITERAL = "not_literal"
4745
NOT_LITERAL_IGNORE = "not_literal_ignore"
@@ -91,19 +89,19 @@ class error(Exception):
9189
CATEGORY,
9290
CHARSET,
9391
GROUPREF, GROUPREF_IGNORE,
94-
INDEX,
9592
IN, IN_IGNORE,
9693
INFO,
9794
JUMP,
9895
LITERAL, LITERAL_IGNORE,
9996
MARK,
100-
MAX_REPEAT,
101-
MAX_REPEAT_ONE,
102-
MIN_REPEAT,
97+
MAX_UNTIL,
98+
MIN_UNTIL,
10399
NOT_LITERAL, NOT_LITERAL_IGNORE,
104100
NEGATE,
105101
RANGE,
106-
REPEAT
102+
REPEAT,
103+
REPEAT_ONE,
104+
SUBPATTERN
107105

108106
]
109107

Lib/sre_parse.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@
55
#
66
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
77
#
8-
# Portions of this engine have been developed in cooperation with
9-
# CNRI. Hewlett-Packard provided funding for 2.0 integration and
10-
# other compatibility work.
8+
# See the sre.py file for information on usage and redistribution.
119
#
1210

1311
import string, sys
@@ -536,8 +534,6 @@ def _parse(source, state):
536534
group = state.getgroup(name)
537535
p = _parse_sub(source, state)
538536
subpattern.append((SUBPATTERN, (group, p)))
539-
if group is not None:
540-
p.append((INDEX, group))
541537
else:
542538
while 1:
543539
char = source.get()

0 commit comments

Comments
 (0)