From adb881257c4ea8d36263ed574a5fd9b832f3eee8 Mon Sep 17 00:00:00 2001 From: "Eric V. Smith" Date: Mon, 24 Jan 2022 21:20:57 -0500 Subject: [PATCH 1/2] bpo-46503: Prevent an assert from firing. Also fix one nearby tiny PEP-7 nit. --- Lib/test/test_fstring.py | 4 ++++ Parser/string_parser.c | 16 ++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index bd1ca943c7c094..d0b1ade15137ba 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -746,12 +746,16 @@ def test_misformed_unicode_character_name(self): # differently inside f-strings. self.assertAllRaise(SyntaxError, r"\(unicode error\) 'unicodeescape' codec can't decode bytes in position .*: malformed \\N character escape", [r"f'\N'", + r"f'\N '", + r"f'\N '", # See bpo-46503. r"f'\N{'", r"f'\N{GREEK CAPITAL LETTER DELTA'", # Here are the non-f-string versions, # which should give the same errors. r"'\N'", + r"'\N '", + r"'\N '", r"'\N{'", r"'\N{GREEK CAPITAL LETTER DELTA'", ]) diff --git a/Parser/string_parser.c b/Parser/string_parser.c index 57d9b9ed3fdbbd..0b5e30ba2ca6a4 100644 --- a/Parser/string_parser.c +++ b/Parser/string_parser.c @@ -442,12 +442,23 @@ fstring_find_literal(Parser *p, const char **str, const char *end, int raw, if (!raw && ch == '\\' && s < end) { ch = *s++; if (ch == 'N') { + /* We need to look at and skip matching braces for "\N{name}" + sequences because otherwise we'll think the opening '{' + starts an expression, which is not the case with "\N". + Keep looking for either a matched '{' '}' pair, or the end + of the string. */ + if (s < end && *s++ == '{') { while (s < end && *s++ != '}') { } continue; } - break; + + /* This is an invalid "\N" sequence, since it's a "\N" not + followed by a "{". Just keep parsing this literal. This + error will be caught later by + decode_unicode_with_escapes(). */ + continue; } if (ch == '{' && warn_invalid_escape_sequence(p, ch, t) < 0) { return -1; @@ -491,7 +502,8 @@ fstring_find_literal(Parser *p, const char **str, const char *end, int raw, *literal = PyUnicode_DecodeUTF8Stateful(literal_start, s - literal_start, NULL, NULL); - } else { + } + else { *literal = decode_unicode_with_escapes(p, literal_start, s - literal_start, t); } From 0607094cde1c0c18bf7ef483c434ffaf7472841e Mon Sep 17 00:00:00 2001 From: "Eric V. Smith" Date: Mon, 24 Jan 2022 21:24:56 -0500 Subject: [PATCH 2/2] Added blurb. --- .../Core and Builtins/2022-01-24-21-24-41.bpo-46503.4UrPsE.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-01-24-21-24-41.bpo-46503.4UrPsE.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-01-24-21-24-41.bpo-46503.4UrPsE.rst b/Misc/NEWS.d/next/Core and Builtins/2022-01-24-21-24-41.bpo-46503.4UrPsE.rst new file mode 100644 index 00000000000000..e48028d72ca8e9 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-01-24-21-24-41.bpo-46503.4UrPsE.rst @@ -0,0 +1 @@ +Fix an assert when parsing some invalid \N escape sequences in f-strings.