diff --git a/Doc/howto/regex.rst b/Doc/howto/regex.rst index 5e2f9a9d1837fe..e543f6d5657d79 100644 --- a/Doc/howto/regex.rst +++ b/Doc/howto/regex.rst @@ -738,9 +738,12 @@ given location, they can obviously be matched an infinite number of times. different: ``\A`` still matches only at the beginning of the string, but ``^`` may match at any location inside the string that follows a newline character. -``\Z`` +``\z`` Matches only at the end of the string. +``\Z`` + The same as ``\z``. For compatibility with old Python versions. + ``\b`` Word boundary. This is a zero-width assertion that matches only at the beginning or end of a word. A word is defined as a sequence of alphanumeric diff --git a/Doc/library/re.rst b/Doc/library/re.rst index a91bac53fb4e75..0ee2d68bcbe006 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -266,7 +266,7 @@ The special characters are: not a word boundary as outside a set, and numeric escapes such as ``\1`` are always octal escapes, not group references. Special sequences which do not match a single character such as ``\A`` - and ``\Z`` are not allowed. + and ``\z`` are not allowed. .. index:: single: ^ (caret); in regular expressions @@ -661,11 +661,17 @@ character ``'$'``. matches characters which are neither alphanumeric in the current locale nor the underscore. -.. index:: single: \Z; in regular expressions +.. index:: single: \z; in regular expressions + single: \Z; in regular expressions -``\Z`` +``\z`` Matches only at the end of the string. + .. versionadded:: next + +``\Z`` + The same as ``\z``. For compatibility with old Python versions. + .. index:: single: \a; in regular expressions single: \b; in regular expressions diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 62dd0551483e97..640642b6e90f6e 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -607,6 +607,11 @@ Other language changes ASCII :class:`bytes` and :term:`bytes-like objects `. (Contributed by Daniel Pope in :gh:`129349`.) +* Support ``\z`` as a synonym for ``\Z`` in :mod:`regular expressions `. + It is interpreted unambiguously in many other regular expression engines, + unlike ``\Z``, which has subtly different behavior. + (Contributed by Serhiy Storchaka in :gh:`133306`.) + * ``\B`` in :mod:`regular expression ` now matches empty input string. Now it is always the opposite of ``\b``. (Contributed by Serhiy Storchaka in :gh:`124130`.) diff --git a/Lib/re/__init__.py b/Lib/re/__init__.py index 7e8abbf6ffe155..af2808a77da691 100644 --- a/Lib/re/__init__.py +++ b/Lib/re/__init__.py @@ -61,7 +61,7 @@ resulting RE will match the second character. \number Matches the contents of the group of the same number. \A Matches only at the start of the string. - \Z Matches only at the end of the string. + \z Matches only at the end of the string. \b Matches the empty string, but only at the start or end of a word. \B Matches the empty string, but not at the start or end of a word. \d Matches any decimal digit; equivalent to the set [0-9] in diff --git a/Lib/re/_parser.py b/Lib/re/_parser.py index 0990255b22c219..35ab7ede2a75a9 100644 --- a/Lib/re/_parser.py +++ b/Lib/re/_parser.py @@ -49,7 +49,8 @@ r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]), r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]), r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]), - r"\Z": (AT, AT_END_STRING), # end of string + r"\z": (AT, AT_END_STRING), # end of string + r"\Z": (AT, AT_END_STRING), # end of string (obsolete) } FLAGS = { diff --git a/Lib/test/re_tests.py b/Lib/test/re_tests.py index 85b026736caac8..e50f5d52bbdec0 100755 --- a/Lib/test/re_tests.py +++ b/Lib/test/re_tests.py @@ -531,7 +531,7 @@ (r'a[ ]*?\ (\d+).*', 'a 10', SUCCEED, 'found', 'a 10'), (r'a[ ]*?\ (\d+).*', 'a 10', SUCCEED, 'found', 'a 10'), # bug 127259: \Z shouldn't depend on multiline mode - (r'(?ms).*?x\s*\Z(.*)','xx\nx\n', SUCCEED, 'g1', ''), + (r'(?ms).*?x\s*\z(.*)','xx\nx\n', SUCCEED, 'g1', ''), # bug 128899: uppercase literals under the ignorecase flag (r'(?i)M+', 'MMM', SUCCEED, 'found', 'MMM'), (r'(?i)m+', 'MMM', SUCCEED, 'found', 'MMM'), diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index cf8525ed901ad3..f79a6149078996 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -619,6 +619,7 @@ def test_re_fullmatch(self): self.assertEqual(re.fullmatch(r"a.*?b", "axxb").span(), (0, 4)) self.assertIsNone(re.fullmatch(r"a+", "ab")) self.assertIsNone(re.fullmatch(r"abc$", "abc\n")) + self.assertIsNone(re.fullmatch(r"abc\z", "abc\n")) self.assertIsNone(re.fullmatch(r"abc\Z", "abc\n")) self.assertIsNone(re.fullmatch(r"(?m)abc$", "abc\n")) self.assertEqual(re.fullmatch(r"ab(?=c)cd", "abcd").span(), (0, 4)) @@ -802,6 +803,8 @@ def test_special_escapes(self): self.assertEqual(re.search(r"\B(b.)\B", "abc bcd bc abxd", re.ASCII).group(1), "bx") self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc") + self.assertEqual(re.search(r"^\Aabc\z$", "abc", re.M).group(0), "abc") + self.assertIsNone(re.search(r"^\Aabc\z$", "\nabc\n", re.M)) self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc") self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M)) self.assertEqual(re.search(br"\b(b.)\b", @@ -813,6 +816,8 @@ def test_special_escapes(self): self.assertEqual(re.search(br"\B(b.)\B", b"abc bcd bc abxd", re.LOCALE).group(1), b"bx") self.assertEqual(re.search(br"^abc$", b"\nabc\n", re.M).group(0), b"abc") + self.assertEqual(re.search(br"^\Aabc\z$", b"abc", re.M).group(0), b"abc") + self.assertIsNone(re.search(br"^\Aabc\z$", b"\nabc\n", re.M)) self.assertEqual(re.search(br"^\Aabc\Z$", b"abc", re.M).group(0), b"abc") self.assertIsNone(re.search(br"^\Aabc\Z$", b"\nabc\n", re.M)) self.assertEqual(re.search(r"\d\D\w\W\s\S", @@ -836,7 +841,7 @@ def test_other_escapes(self): self.assertEqual(re.match(r"[\^a]+", 'a^').group(), 'a^') self.assertIsNone(re.match(r"[\^a]+", 'b')) re.purge() # for warnings - for c in 'ceghijklmopqyzCEFGHIJKLMNOPQRTVXY': + for c in 'ceghijklmopqyCEFGHIJKLMNOPQRTVXY': with self.subTest(c): self.assertRaises(re.PatternError, re.compile, '\\%c' % c) for c in 'ceghijklmopqyzABCEFGHIJKLMNOPQRTVXYZ': @@ -2608,8 +2613,8 @@ def test_findall_atomic_grouping(self): self.assertEqual(re.findall(r'(?>(?:ab){1,3})', 'ababc'), ['abab']) def test_bug_gh91616(self): - self.assertTrue(re.fullmatch(r'(?s:(?>.*?\.).*)\Z', "a.txt")) # reproducer - self.assertTrue(re.fullmatch(r'(?s:(?=(?P.*?\.))(?P=g0).*)\Z', "a.txt")) + self.assertTrue(re.fullmatch(r'(?s:(?>.*?\.).*)\z', "a.txt")) # reproducer + self.assertTrue(re.fullmatch(r'(?s:(?=(?P.*?\.))(?P=g0).*)\z', "a.txt")) def test_bug_gh100061(self): # gh-100061 diff --git a/Misc/NEWS.d/next/Library/2025-05-02-21-35-03.gh-issue-133306.-vBye5.rst b/Misc/NEWS.d/next/Library/2025-05-02-21-35-03.gh-issue-133306.-vBye5.rst new file mode 100644 index 00000000000000..d0973af5ffc352 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-02-21-35-03.gh-issue-133306.-vBye5.rst @@ -0,0 +1 @@ +Support ``\z`` as a synonym for ``\Z`` in :mod:`regular expressions `.