diff --git a/Doc/library/re.rst b/Doc/library/re.rst index cbb2f439d15391..8c15462871b8f1 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -200,6 +200,20 @@ The special characters are: place it at the beginning of the set. For example, both ``[()[\]{}]`` and ``[]()[{}]`` will both match a parenthesis. + * Support of nested sets and set operations as in `Unicode Technical + Standard #18`_ might be added in the future. This would change the + syntax, so to facilitate this change a :exc:`FutureWarning` will be raised + in ambiguous cases for the time being. + That include sets starting with a literal ``'['`` or containing literal + character sequences ``'--'``, ``'&&'``, ``'~~'``, and ``'||'``. To + avoid a warning escape them with a backslash. + + .. _Unicode Technical Standard #18: https://unicode.org/reports/tr18/ + + .. versionchanged:: 3.7 + :exc:`FutureWarning` is raised if a character set contains constructs + that will change semantically in the future. + ``|`` ``A|B``, where *A* and *B* can be arbitrary REs, creates a regular expression that will match either *A* or *B*. An arbitrary number of REs can be separated by the @@ -829,7 +843,7 @@ form. >>> legal_chars = string.ascii_lowercase + string.digits + "!#$%&'*+-.^_`|~:" >>> print('[%s]+' % re.escape(legal_chars)) - [abcdefghijklmnopqrstuvwxyz0123456789!\#\$%&'\*\+\-\.\^_`\|~:]+ + [abcdefghijklmnopqrstuvwxyz0123456789!\#\$%\&'\*\+\-\.\^_`\|\~:]+ >>> operators = ['+', '-', '*', '/', '**'] >>> print('|'.join(map(re.escape, sorted(operators, reverse=True)))) diff --git a/Doc/tools/susp-ignored.csv b/Doc/tools/susp-ignored.csv index 2b3ccf3ac60700..d52f81b76b52f0 100644 --- a/Doc/tools/susp-ignored.csv +++ b/Doc/tools/susp-ignored.csv @@ -300,7 +300,7 @@ whatsnew/3.2,,:gz,">>> with tarfile.open(name='myarchive.tar.gz', mode='w:gz') a whatsnew/3.2,,:location,zope9-location = ${zope9:location} whatsnew/3.2,,:prefix,zope-conf = ${custom:prefix}/etc/zope.conf library/re,,`,!#$%&'*+-.^_`|~: -library/re,,`,!\#\$%&'\*\+\-\.\^_`\|~: +library/re,,`,!\#\$%\&'\*\+\-\.\^_`\|\~: library/tarfile,,:xz,'x:xz' library/xml.etree.elementtree,,:sometag,prefix:sometag library/xml.etree.elementtree,,:fictional,"@_`~' + LITERAL_CHARS = string.ascii_letters + string.digits + '!"%\',/:;<=>@_`' def test_re_escape(self): p = ''.join(chr(i) for i in range(256)) diff --git a/Misc/NEWS.d/next/Library/2017-10-05-12-45-29.bpo-30349.6zKJsF.rst b/Misc/NEWS.d/next/Library/2017-10-05-12-45-29.bpo-30349.6zKJsF.rst new file mode 100644 index 00000000000000..6862e02502ab80 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2017-10-05-12-45-29.bpo-30349.6zKJsF.rst @@ -0,0 +1,3 @@ +FutureWarning is now emitted if a regular expression contains character set +constructs that will change semantically in the future (nested sets and set +operations).