Skip to content

Commit e9ac890

Browse files
gh-98740: Fix validation of conditional expressions in RE (GH-98764)
In very rare circumstances the JUMP opcode could be confused with the argument of the opcode in the "then" part which doesn't end with the JUMP opcode. This led to incorrect detection of the final JUMP opcode and incorrect calculation of the size of the subexpression. NOTE: Changed return value of functions _validate_inner() and _validate_charset() in Modules/_sre/sre.c. Now they return 0 on success, -1 on failure, and 1 if the last op is JUMP (which usually is a failure). Previously they returned 1 on success and 0 on failure.
1 parent 41bc101 commit e9ac890

File tree

4 files changed

+40
-27
lines changed

4 files changed

+40
-27
lines changed

Doc/library/re.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,9 @@ The special characters are:
483483
some fixed length. Patterns which start with negative lookbehind assertions may
484484
match at the beginning of the string being searched.
485485

486+
.. _re-conditional-expression:
487+
.. index:: single: (?(; in regular expressions
488+
486489
``(?(id/name)yes-pattern|no-pattern)``
487490
Will try to match with ``yes-pattern`` if the group with given *id* or
488491
*name* exists, and with ``no-pattern`` if it doesn't. ``no-pattern`` is

Lib/test/test_re.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -630,6 +630,11 @@ def test_re_groupref_exists_errors(self):
630630
self.checkPatternError(r'()(?(2)a)',
631631
"invalid group reference 2", 5)
632632

633+
def test_re_groupref_exists_validation_bug(self):
634+
for i in range(256):
635+
with self.subTest(code=i):
636+
re.compile(r'()(?(1)\x%02x?)' % i)
637+
633638
def test_re_groupref_overflow(self):
634639
from re._constants import MAXGROUPS
635640
self.checkTemplateError('()', r'\g<%s>' % MAXGROUPS, 'xx',
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix internal error in the :mod:`re` module which in very rare circumstances
2+
prevented compilation of a regular expression containing a :ref:`conditional
3+
expression <re-conditional-expression>` without the "else" branch.

Modules/_sre/sre.c

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1623,7 +1623,7 @@ _sre_template_impl(PyObject *module, PyObject *pattern, PyObject *template)
16231623
#endif
16241624

16251625
/* Report failure */
1626-
#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1626+
#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return -1; } while (0)
16271627

16281628
/* Extract opcode, argument, or skip count from code array */
16291629
#define GET_OP \
@@ -1647,7 +1647,7 @@ _sre_template_impl(PyObject *module, PyObject *pattern, PyObject *template)
16471647
skip = *code; \
16481648
VTRACE(("%lu (skip to %p)\n", \
16491649
(unsigned long)skip, code+skip)); \
1650-
if (skip-adj > (uintptr_t)(end - code)) \
1650+
if (skip-adj > (uintptr_t)(end - code)) \
16511651
FAIL; \
16521652
code++; \
16531653
} while (0)
@@ -1736,9 +1736,10 @@ _validate_charset(SRE_CODE *code, SRE_CODE *end)
17361736
}
17371737
}
17381738

1739-
return 1;
1739+
return 0;
17401740
}
17411741

1742+
/* Returns 0 on success, -1 on failure, and 1 if the last op is JUMP. */
17421743
static int
17431744
_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
17441745
{
@@ -1816,7 +1817,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
18161817
case SRE_OP_IN_LOC_IGNORE:
18171818
GET_SKIP;
18181819
/* Stop 1 before the end; we check the FAILURE below */
1819-
if (!_validate_charset(code, code+skip-2))
1820+
if (_validate_charset(code, code+skip-2))
18201821
FAIL;
18211822
if (code[skip-2] != SRE_OP_FAILURE)
18221823
FAIL;
@@ -1870,7 +1871,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
18701871
}
18711872
/* Validate the charset */
18721873
if (flags & SRE_INFO_CHARSET) {
1873-
if (!_validate_charset(code, newcode-1))
1874+
if (_validate_charset(code, newcode-1))
18741875
FAIL;
18751876
if (newcode[-1] != SRE_OP_FAILURE)
18761877
FAIL;
@@ -1891,7 +1892,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
18911892
if (skip == 0)
18921893
break;
18931894
/* Stop 2 before the end; we check the JUMP below */
1894-
if (!_validate_inner(code, code+skip-3, groups))
1895+
if (_validate_inner(code, code+skip-3, groups))
18951896
FAIL;
18961897
code += skip-3;
18971898
/* Check that it ends with a JUMP, and that each JUMP
@@ -1905,6 +1906,8 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
19051906
else if (code+skip-1 != target)
19061907
FAIL;
19071908
}
1909+
if (code != target)
1910+
FAIL;
19081911
}
19091912
break;
19101913

@@ -1920,7 +1923,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
19201923
FAIL;
19211924
if (max > SRE_MAXREPEAT)
19221925
FAIL;
1923-
if (!_validate_inner(code, code+skip-4, groups))
1926+
if (_validate_inner(code, code+skip-4, groups))
19241927
FAIL;
19251928
code += skip-4;
19261929
GET_OP;
@@ -1940,7 +1943,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
19401943
FAIL;
19411944
if (max > SRE_MAXREPEAT)
19421945
FAIL;
1943-
if (!_validate_inner(code, code+skip-3, groups))
1946+
if (_validate_inner(code, code+skip-3, groups))
19441947
FAIL;
19451948
code += skip-3;
19461949
GET_OP;
@@ -1958,7 +1961,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
19581961
case SRE_OP_ATOMIC_GROUP:
19591962
{
19601963
GET_SKIP;
1961-
if (!_validate_inner(code, code+skip-2, groups))
1964+
if (_validate_inner(code, code+skip-2, groups))
19621965
FAIL;
19631966
code += skip-2;
19641967
GET_OP;
@@ -2010,24 +2013,17 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
20102013
to allow arbitrary jumps anywhere in the code; so we just look
20112014
for a JUMP opcode preceding our skip target.
20122015
*/
2013-
if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
2014-
code[skip-3] == SRE_OP_JUMP)
2015-
{
2016-
VTRACE(("both then and else parts present\n"));
2017-
if (!_validate_inner(code+1, code+skip-3, groups))
2018-
FAIL;
2016+
VTRACE(("then part:\n"));
2017+
int rc = _validate_inner(code+1, code+skip-1, groups);
2018+
if (rc == 1) {
2019+
VTRACE(("else part:\n"));
20192020
code += skip-2; /* Position after JUMP, at <skipno> */
20202021
GET_SKIP;
2021-
if (!_validate_inner(code, code+skip-1, groups))
2022-
FAIL;
2023-
code += skip-1;
2024-
}
2025-
else {
2026-
VTRACE(("only a then part present\n"));
2027-
if (!_validate_inner(code+1, code+skip-1, groups))
2028-
FAIL;
2029-
code += skip-1;
2022+
rc = _validate_inner(code, code+skip-1, groups);
20302023
}
2024+
if (rc)
2025+
FAIL;
2026+
code += skip-1;
20312027
break;
20322028

20332029
case SRE_OP_ASSERT:
@@ -2038,22 +2034,28 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
20382034
if (arg & 0x80000000)
20392035
FAIL; /* Width too large */
20402036
/* Stop 1 before the end; we check the SUCCESS below */
2041-
if (!_validate_inner(code+1, code+skip-2, groups))
2037+
if (_validate_inner(code+1, code+skip-2, groups))
20422038
FAIL;
20432039
code += skip-2;
20442040
GET_OP;
20452041
if (op != SRE_OP_SUCCESS)
20462042
FAIL;
20472043
break;
20482044

2045+
case SRE_OP_JUMP:
2046+
if (code + 1 != end)
2047+
FAIL;
2048+
VTRACE(("JUMP: %d\n", __LINE__));
2049+
return 1;
2050+
20492051
default:
20502052
FAIL;
20512053

20522054
}
20532055
}
20542056

20552057
VTRACE(("okay\n"));
2056-
return 1;
2058+
return 0;
20572059
}
20582060

20592061
static int
@@ -2068,7 +2070,7 @@ _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
20682070
static int
20692071
_validate(PatternObject *self)
20702072
{
2071-
if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
2073+
if (_validate_outer(self->code, self->code+self->codesize, self->groups))
20722074
{
20732075
PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
20742076
return 0;

0 commit comments

Comments
 (0)