From 81098edfcad52b876bfe4d7bcc811df8dd87038e Mon Sep 17 00:00:00 2001 From: Thomas Dwyer Date: Tue, 30 May 2023 21:05:44 -0500 Subject: [PATCH 1/8] gh-102988: Detect email address parsing errors and return empty tuple to indicate the parsing error (old API) --- Lib/email/utils.py | 60 ++++++++++++++++++++--- Lib/test/test_email/test_email.py | 81 +++++++++++++++++++++++++++++-- 2 files changed, 132 insertions(+), 9 deletions(-) diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 81da5394ea1695..5bcbb1da0c18a8 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -106,12 +106,51 @@ def formataddr(pair, charset='utf-8'): return address +def _pre_parse_validation(email_header_fields): + accepted_values = [] + for v in email_header_fields: + s = v.replace('\\(', '').replace('\\)', '') + if s.count('(') != s.count(')'): + v = "('', '')" + accepted_values.append(v) + + return accepted_values + + +def _post_parse_validation(parsed_email_header_tuples): + accepted_values = [] + # The parser would have parsed a correctly formatted domain-literal + # The existence of an [ after parsing indicates a parsing failure + for v in parsed_email_header_tuples: + if '[' in v[1]: + v = ('', '') + accepted_values.append(v) + + return accepted_values + def getaddresses(fieldvalues): - """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" - all = COMMASPACE.join(str(v) for v in fieldvalues) + """Return a list of (REALNAME, EMAIL) for each fieldvalue, + unless the parse fails, in which case return a 2-tuple of ('', '') + will be returned in it's place. + + If the resulting list is greater than number of items in the fieldvalues + list, a list containing a single empty 2-tuple [('', '')] will be returned. + """ + fieldvalues = [str(v) for v in fieldvalues] + fieldvalues = _pre_parse_validation(fieldvalues) + all = COMMASPACE.join(v for v in fieldvalues) a = _AddressList(all) - return a.addresslist + result = _post_parse_validation(a.addresslist) + + n = 0 + for v in fieldvalues: + n += v.count(',') + 1 + + if len(result) != n: + return [('', '')] + + return result def _format_timetuple_and_zone(timetuple, zone): @@ -212,9 +251,18 @@ def parseaddr(addr): Return a tuple of realname and email address, unless the parse fails, in which case return a 2-tuple of ('', ''). """ - addrs = _AddressList(addr).addresslist - if not addrs: - return '', '' + if isinstance(addr, list): + addr = addr[0] + + if not isinstance(addr, str): + return ('', '') + + addr = _pre_parse_validation([addr])[0] + addrs = _post_parse_validation(_AddressList(addr).addresslist) + + if not addrs or len(addrs) > 1: + return ('', '') + return addrs[0] diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 44b405740c4403..f7f5a55e138750 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -3319,15 +3319,90 @@ def test_getaddresses(self): [('Al Person', 'aperson@dom.ain'), ('Bud Person', 'bperson@dom.ain')]) + def test_getaddresses_parsing_errors(self): + """Test for parsing errors from CVE-2023-27043""" + eq = self.assertEqual + eq(utils.getaddresses(['alice@example.org(']), + [('', '')]) + eq(utils.getaddresses(['alice@example.org)']), + [('', '')]) + eq(utils.getaddresses(['alice@example.org<']), + [('', '')]) + eq(utils.getaddresses(['alice@example.org>']), + [('', '')]) + eq(utils.getaddresses(['alice@example.org@']), + [('', '')]) + eq(utils.getaddresses(['alice@example.org,']), + [('', 'alice@example.org'), ('', 'bob@example.com')]) + eq(utils.getaddresses(['alice@example.org;']), + [('', '')]) + eq(utils.getaddresses(['alice@example.org:']), + [('', '')]) + eq(utils.getaddresses(['alice@example.org.']), + [('', '')]) + eq(utils.getaddresses(['alice@example.org"']), + [('', '')]) + eq(utils.getaddresses(['alice@example.org[']), + [('', '')]) + eq(utils.getaddresses(['alice@example.org]']), + [('', '')]) + + def test_parseaddr_parsing_errors(self): + """Test for parsing errors from CVE-2023-27043""" + eq = self.assertEqual + eq(utils.parseaddr(['alice@example.org(']), + ('', '')) + eq(utils.parseaddr(['alice@example.org)']), + ('', '')) + eq(utils.parseaddr(['alice@example.org<']), + ('', '')) + eq(utils.parseaddr(['alice@example.org>']), + ('', '')) + eq(utils.parseaddr(['alice@example.org@']), + ('', '')) + eq(utils.parseaddr(['alice@example.org,']), + ('', '')) + eq(utils.parseaddr(['alice@example.org;']), + ('', '')) + eq(utils.parseaddr(['alice@example.org:']), + ('', '')) + eq(utils.parseaddr(['alice@example.org.']), + ('', '')) + eq(utils.parseaddr(['alice@example.org"']), + ('', '')) + eq(utils.parseaddr(['alice@example.org[']), + ('', '')) + eq(utils.parseaddr(['alice@example.org]']), + ('', '')) + def test_getaddresses_nasty(self): eq = self.assertEqual eq(utils.getaddresses(['foo: ;']), [('', '')]) - eq(utils.getaddresses( - ['[]*-- =~$']), - [('', ''), ('', ''), ('', '*--')]) + eq(utils.getaddresses(['[]*-- =~$']), [('', '')]) eq(utils.getaddresses( ['foo: ;', '"Jason R. Mastaler" ']), [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) + eq(utils.getaddresses( + ['Pete(A nice \) chap) ']), + [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]) + eq(utils.getaddresses( + ['(Empty list)(start)Undisclosed recipients :(nobody(I know))']), + [('', '')]) + eq(utils.getaddresses( + ['Mary <@machine.tld:mary@example.net>, , jdoe@test . example']), + [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]) + eq(utils.getaddresses( + ['John Doe ']), + [('John Doe (comment)', 'jdoe@machine.example')]) + eq(utils.getaddresses( + ['"Mary Smith: Personal Account" ']), + [('Mary Smith: Personal Account', 'smith@home.example')]) + eq(utils.getaddresses( + ['Undisclosed recipients:;']), + [('', '')]) + eq(utils.getaddresses( + [', "Giant; \"Big\" Box" ']), + [('', 'boss@nil.test'), ('Giant; Big Box', 'bob@example.net')]) def test_getaddresses_embedded_comment(self): """Test proper handling of a nested comment""" From ecbb31ce4f743aa38718f744dcabfd5d1ced041b Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Tue, 30 May 2023 20:29:08 -0700 Subject: [PATCH 2/8] r-prefix strs with \ in the tests --- Lib/test/test_email/test_email.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index f7f5a55e138750..e86ae087955d0b 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -3383,7 +3383,7 @@ def test_getaddresses_nasty(self): ['foo: ;', '"Jason R. Mastaler" ']), [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) eq(utils.getaddresses( - ['Pete(A nice \) chap) ']), + [r'Pete(A nice \) chap) ']), [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]) eq(utils.getaddresses( ['(Empty list)(start)Undisclosed recipients :(nobody(I know))']), @@ -3401,7 +3401,7 @@ def test_getaddresses_nasty(self): ['Undisclosed recipients:;']), [('', '')]) eq(utils.getaddresses( - [', "Giant; \"Big\" Box" ']), + [r', "Giant; \"Big\" Box" ']), [('', 'boss@nil.test'), ('Giant; Big Box', 'bob@example.net')]) def test_getaddresses_embedded_comment(self): From d8d283103938bf90bd8b904dec34251d9f7403e1 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Tue, 30 May 2023 20:33:03 -0700 Subject: [PATCH 3/8] reword getaddresses docstring --- Lib/email/utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 5bcbb1da0c18a8..62599d05cfdd1a 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -130,9 +130,10 @@ def _post_parse_validation(parsed_email_header_tuples): def getaddresses(fieldvalues): - """Return a list of (REALNAME, EMAIL) for each fieldvalue, - unless the parse fails, in which case return a 2-tuple of ('', '') - will be returned in it's place. + """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. + + When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in + its place. If the resulting list is greater than number of items in the fieldvalues list, a list containing a single empty 2-tuple [('', '')] will be returned. From b44a0b7bf94acfb3e78ff2d2d4a3614a9db0e19b Mon Sep 17 00:00:00 2001 From: Thomas Dwyer Date: Tue, 6 Jun 2023 20:17:09 -0500 Subject: [PATCH 4/8] Update comment for getaddresses() and fix broken email test --- Lib/email/utils.py | 12 +++++++++--- Lib/test/test_email/test_email.py | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 62599d05cfdd1a..1760b8040dfe12 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -131,12 +131,18 @@ def _post_parse_validation(parsed_email_header_tuples): def getaddresses(fieldvalues): """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. - + When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in its place. - If the resulting list is greater than number of items in the fieldvalues - list, a list containing a single empty 2-tuple [('', '')] will be returned. + If the resulting list of parsed address is greater than number of + fieldvalues in the input list a parsing error has occurred, so a list + containing a single empty 2-tuple [('', '')] is returned in its place. + This is done to avoid invalid output. + + Malformed input: getaddresses(['alice@example.com ']) + Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')] + Safe output: [('', '')] """ fieldvalues = [str(v) for v in fieldvalues] fieldvalues = _pre_parse_validation(fieldvalues) diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index e86ae087955d0b..5238944d6b4788 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -3402,7 +3402,7 @@ def test_getaddresses_nasty(self): [('', '')]) eq(utils.getaddresses( [r', "Giant; \"Big\" Box" ']), - [('', 'boss@nil.test'), ('Giant; Big Box', 'bob@example.net')]) + [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]) def test_getaddresses_embedded_comment(self): """Test proper handling of a nested comment""" From 40002d978b52351e3ffeea609a0a55c105a90790 Mon Sep 17 00:00:00 2001 From: Thomas Dwyer Date: Tue, 13 Jun 2023 21:25:27 -0500 Subject: [PATCH 5/8] Add NEWS --- .../Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst diff --git a/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst b/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst new file mode 100644 index 00000000000000..c36fa3b1210c3a --- /dev/null +++ b/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst @@ -0,0 +1,4 @@ +CVE-2023-27043: Prevent :func:`email.utils.parseaddr` +and :func:`email.utils.getaddresses` from returning the realname portion of an +invalid RFC2822 email header in the email address portion of the 2-tuple +returned after being parsed by :mod:`email._parseaddr`. From 302ee76de4468b4b4c5372ec0590365b01c7eba1 Mon Sep 17 00:00:00 2001 From: Thomas Dwyer Date: Wed, 21 Jun 2023 17:15:35 -0500 Subject: [PATCH 6/8] Fix NEWS entry. It was not a Module its Class email._parseaddr.AddressList --- .../Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst b/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst index c36fa3b1210c3a..e0434ccd2ccab5 100644 --- a/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst +++ b/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst @@ -1,4 +1,4 @@ CVE-2023-27043: Prevent :func:`email.utils.parseaddr` and :func:`email.utils.getaddresses` from returning the realname portion of an invalid RFC2822 email header in the email address portion of the 2-tuple -returned after being parsed by :mod:`email._parseaddr`. +returned after being parsed by :class:`email._parseaddr.AddressList`. From 48d9f5861316673ef73a088d5936e667e6c0121d Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Mon, 10 Jul 2023 15:05:42 -0700 Subject: [PATCH 7/8] Add email.utils docs, narrow the docstring. --- Doc/library/email.utils.rst | 21 ++++++++++++++++++++- Lib/email/utils.py | 8 ++------ 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst index 345b64001c1ace..18515597e85ac3 100644 --- a/Doc/library/email.utils.rst +++ b/Doc/library/email.utils.rst @@ -87,7 +87,7 @@ of the new API. This method returns a list of 2-tuples of the form returned by ``parseaddr()``. *fieldvalues* is a sequence of header field values as might be returned by :meth:`Message.get_all `. Here's a simple - example that gets all the recipients of a message:: + example that gets all the recipients of a message: from email.utils import getaddresses @@ -97,6 +97,25 @@ of the new API. resent_ccs = msg.get_all('resent-cc', []) all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs) + When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')`` + is returned in its place. Other errors in parsing the list of + addresses such as a fieldvalue seemingly parsing into multiple + addresses may result in a list containing a single empty 2-tuple + ``[('', '')]`` being returned rather than returning potentially + invalid output. + + Example malformed input parsing: + + .. doctest:: + + >>> from email.utils import getaddresses + >>> getaddresses(['alice@example.com ', 'me@example.com']) + [('', '')] + + .. versionchanged:: 3.12 + The 2-tuple of ``('', '')`` in the returned values when parsing + fails were added as to address a security issue. + .. function:: parsedate(date) diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 1760b8040dfe12..11ad75e94e9345 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -135,14 +135,10 @@ def getaddresses(fieldvalues): When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in its place. - If the resulting list of parsed address is greater than number of - fieldvalues in the input list a parsing error has occurred, so a list + If the resulting list of parsed address is not the same as the number of + fieldvalues in the input list a parsing error has occurred. A list containing a single empty 2-tuple [('', '')] is returned in its place. This is done to avoid invalid output. - - Malformed input: getaddresses(['alice@example.com ']) - Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')] - Safe output: [('', '')] """ fieldvalues = [str(v) for v in fieldvalues] fieldvalues = _pre_parse_validation(fieldvalues) From f96a3ff86a6d1ebadc2ee30096bd2df2deebc6bc Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Mon, 10 Jul 2023 15:23:09 -0700 Subject: [PATCH 8/8] Add versionchanged to parseaddr, add What's New. --- Doc/library/email.utils.rst | 5 +++++ Doc/whatsnew/3.12.rst | 8 ++++++++ 2 files changed, 13 insertions(+) diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst index 18515597e85ac3..a87a0bd2e7de6b 100644 --- a/Doc/library/email.utils.rst +++ b/Doc/library/email.utils.rst @@ -65,6 +65,11 @@ of the new API. *email address* parts. Returns a tuple of that information, unless the parse fails, in which case a 2-tuple of ``('', '')`` is returned. + .. versionchanged:: 3.12 + For security reasons, addresses that were ambiguous and could parse into + multiple different addresses now cause ``('', '')`` to be returned + instead of only one of the *potential* addresses. + .. function:: formataddr(pair, charset='utf-8') diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 1a50bce0b65e0b..8d033189f17e82 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -564,6 +564,14 @@ dis :data:`~dis.hasarg` collection instead. (Contributed by Irit Katriel in :gh:`94216`.) +email +----- + +* :func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now return + ``('', '')`` 2-tuples in more situations where invalid email addresses are + encountered instead of potentially inaccurate values. + (Contributed by Thomas Dwyer for :gh:`102988` to ameliorate CVE-2023-27043.) + fractions ---------