From 870a86b6dc5b19d55cf0cc6429aca72d00ee0f9d Mon Sep 17 00:00:00 2001 From: Aaryn Tonita Date: Tue, 26 Feb 2019 13:59:26 +0100 Subject: [PATCH 1/2] Create test illustrating bug 36041 For display names that are longer than the policy's maximum line length, the folding process removes the quotes from the display name which may violate the structure of an address header if the content of the display name is specifically formed. --- .../test_email/test__header_value_parser.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 676732bb3d0261..6975c900ad7d8b 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2770,5 +2770,25 @@ def test_long_filename_attachment(self): " filename*1*=_TEST_TES.txt\n", ) + def test_long_display_name(self): + display_name = ( + # An address as first part of display name is a security issue. + 'spoofed@example.com' + + ' ' + # This triggers the BareQuotedString folding recursion. + + 'a'*self.policy.max_line_length + # These two quoted-pairs become the unescaped " \ outside. + + ' ' + + '\\" \\\\' + ) + addr_spec = 'actual.sender@example.com' + address = '"' + display_name + '" <' + addr_spec + '>\n' + self._test(parser.get_address(address)[0], + '"spoofed@example.com\n' + ' ' + 'a'*self.policy.max_line_length + '\n' + ' \\" \\\\" \n', + ) + + if __name__ == '__main__': unittest.main() From 686a713a84c267d3172950bf0d64ebe33619243b Mon Sep 17 00:00:00 2001 From: Aaryn Tonita Date: Tue, 26 Feb 2019 14:15:44 +0100 Subject: [PATCH 2/2] Fix email bug 36041: don't unescape bare quoted strings during refold Since the _refold_parse_tree is context unaware, the function is unable to determine if we can directly encode the text into the header. In the language of RFC 5322, _refold_parse_tree is unaware if it is folding a structured header or an unstructured header. Since a quoted string will always be acceptable for both header types, when a quoted string needs to be refolded (because it would make the line overflow) prepare quoted children instead of the usual semantic children of the BareQuotedString. --- Lib/email/_header_value_parser.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 922daa2560f006..0fb97a6cb2107f 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2661,6 +2661,24 @@ def _refold_parse_tree(parse_tree, *, policy): if newline or part.startswith_fws(): lines.append(newline + tstr) continue + # Do not strip the quotes from BareQuotedString's by iterating over the + # children. Doing so might break a structured header and this function + # is context unaware. Instead prepare quoted children. + if isinstance(part, BareQuotedString): + subparts = list(part) + quoted_subparts = [] + dquote = ValueTerminal('"', 'ptext') + quoted_subparts.append(dquote) + for subpart in subparts: + quoted_without_quotes = quote_string(subpart)[1:-1] + quoted_terminal = ValueTerminal(quoted_without_quotes, 'ptext') + quoted_subparts.append(quoted_terminal) + quoted_subparts.append(dquote) + if not part.as_ew_allowed: + wrap_as_ew_blocked += 1 + quoted_subparts.append(end_ew_not_allowed) + parts = quoted_subparts + parts + continue if not hasattr(part, 'encode'): # It's not a terminal, try folding the subparts. newparts = list(part)