Skip to content

gh-102988: Detect email address parsing errors and return empty tuple to indicate the parsing error (old API) #105128

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 54 additions & 6 deletions Lib/email/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,51 @@ def formataddr(pair, charset='utf-8'):
return address


def _pre_parse_validation(email_header_fields):
accepted_values = []
for v in email_header_fields:
s = v.replace('\\(', '').replace('\\)', '')
if s.count('(') != s.count(')'):
v = "('', '')"
accepted_values.append(v)

return accepted_values


def _post_parse_validation(parsed_email_header_tuples):
accepted_values = []
# The parser would have parsed a correctly formatted domain-literal
# The existence of an [ after parsing indicates a parsing failure
for v in parsed_email_header_tuples:
if '[' in v[1]:
v = ('', '')
accepted_values.append(v)

return accepted_values


def getaddresses(fieldvalues):
"""Return a list of (REALNAME, EMAIL) for each fieldvalue."""
all = COMMASPACE.join(str(v) for v in fieldvalues)
"""Return a list of (REALNAME, EMAIL) for each fieldvalue,
unless the parse fails, in which case return a 2-tuple of ('', '')
will be returned in it's place.

If the resulting list is greater than number of items in the fieldvalues
list, a list containing a single empty 2-tuple [('', '')] will be returned.
"""
fieldvalues = [str(v) for v in fieldvalues]
fieldvalues = _pre_parse_validation(fieldvalues)
all = COMMASPACE.join(v for v in fieldvalues)
a = _AddressList(all)
return a.addresslist
result = _post_parse_validation(a.addresslist)

n = 0
for v in fieldvalues:
n += v.count(',') + 1

if len(result) != n:
return [('', '')]

return result


def _format_timetuple_and_zone(timetuple, zone):
Expand Down Expand Up @@ -212,9 +251,18 @@ def parseaddr(addr):
Return a tuple of realname and email address, unless the parse fails, in
which case return a 2-tuple of ('', '').
"""
addrs = _AddressList(addr).addresslist
if not addrs:
return '', ''
if isinstance(addr, list):
addr = addr[0]

if not isinstance(addr, str):
return ('', '')

addr = _pre_parse_validation([addr])[0]
addrs = _post_parse_validation(_AddressList(addr).addresslist)

if not addrs or len(addrs) > 1:
return ('', '')

return addrs[0]


Expand Down
81 changes: 78 additions & 3 deletions Lib/test/test_email/test_email.py
Original file line number Diff line number Diff line change
Expand Up @@ -3319,15 +3319,90 @@ def test_getaddresses(self):
[('Al Person', '[email protected]'),
('Bud Person', '[email protected]')])

def test_getaddresses_parsing_errors(self):
"""Test for parsing errors from CVE-2023-27043"""
eq = self.assertEqual
eq(utils.getaddresses(['[email protected](<[email protected]>']),
[('', '')])
eq(utils.getaddresses(['[email protected])<[email protected]>']),
[('', '')])
eq(utils.getaddresses(['[email protected]<<[email protected]>']),
[('', '')])
eq(utils.getaddresses(['[email protected]><[email protected]>']),
[('', '')])
eq(utils.getaddresses(['[email protected]@<[email protected]>']),
[('', '')])
eq(utils.getaddresses(['[email protected],<[email protected]>']),
[('', '[email protected]'), ('', '[email protected]')])
eq(utils.getaddresses(['[email protected];<[email protected]>']),
[('', '')])
eq(utils.getaddresses(['[email protected]:<[email protected]>']),
[('', '')])
eq(utils.getaddresses(['[email protected].<[email protected]>']),
[('', '')])
eq(utils.getaddresses(['[email protected]"<[email protected]>']),
[('', '')])
eq(utils.getaddresses(['[email protected][<[email protected]>']),
[('', '')])
eq(utils.getaddresses(['[email protected]]<[email protected]>']),
[('', '')])

def test_parseaddr_parsing_errors(self):
"""Test for parsing errors from CVE-2023-27043"""
eq = self.assertEqual
eq(utils.parseaddr(['[email protected](<[email protected]>']),
('', ''))
eq(utils.parseaddr(['[email protected])<[email protected]>']),
('', ''))
eq(utils.parseaddr(['[email protected]<<[email protected]>']),
('', ''))
eq(utils.parseaddr(['[email protected]><[email protected]>']),
('', ''))
eq(utils.parseaddr(['[email protected]@<[email protected]>']),
('', ''))
eq(utils.parseaddr(['[email protected],<[email protected]>']),
('', ''))
eq(utils.parseaddr(['[email protected];<[email protected]>']),
('', ''))
eq(utils.parseaddr(['[email protected]:<[email protected]>']),
('', ''))
eq(utils.parseaddr(['[email protected].<[email protected]>']),
('', ''))
eq(utils.parseaddr(['[email protected]"<[email protected]>']),
('', ''))
eq(utils.parseaddr(['[email protected][<[email protected]>']),
('', ''))
eq(utils.parseaddr(['[email protected]]<[email protected]>']),
('', ''))

def test_getaddresses_nasty(self):
eq = self.assertEqual
eq(utils.getaddresses(['foo: ;']), [('', '')])
eq(utils.getaddresses(
['[]*-- =~$']),
[('', ''), ('', ''), ('', '*--')])
eq(utils.getaddresses(['[]*-- =~$']), [('', '')])
eq(utils.getaddresses(
['foo: ;', '"Jason R. Mastaler" <[email protected]>']),
[('', ''), ('Jason R. Mastaler', '[email protected]')])
eq(utils.getaddresses(
['Pete(A nice \) chap) <pete(his account)@silly.test(his host)>']),
[('Pete (A nice ) chap his account his host)', '[email protected]')])
eq(utils.getaddresses(
['(Empty list)(start)Undisclosed recipients :(nobody(I know))']),
[('', '')])
eq(utils.getaddresses(
['Mary <@machine.tld:[email protected]>, , jdoe@test . example']),
[('Mary', '[email protected]'), ('', ''), ('', '[email protected]')])
eq(utils.getaddresses(
['John Doe <jdoe@machine(comment). example>']),
[('John Doe (comment)', '[email protected]')])
eq(utils.getaddresses(
['"Mary Smith: Personal Account" <[email protected]>']),
[('Mary Smith: Personal Account', '[email protected]')])
eq(utils.getaddresses(
['Undisclosed recipients:;']),
[('', '')])
eq(utils.getaddresses(
['<[email protected]>, "Giant; \"Big\" Box" <[email protected]>']),
[('', '[email protected]'), ('Giant; Big Box', '[email protected]')])

def test_getaddresses_embedded_comment(self):
"""Test proper handling of a nested comment"""
Expand Down