diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 4d014bacd6182e..d3680d068d1764 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -106,12 +106,41 @@ def formataddr(pair, charset='utf-8'): return address +def _pre_parse_validation(fieldvalues): + """Validate the field values are syntactically correct""" + for v in fieldvalues: + s = str(v).replace('\\(', '').replace('\\)', '') + if s.count('(') != s.count(')'): + fieldvalues.remove(v) + fieldvalues.append('') + + return fieldvalues + + +def _post_parse_validation(parsedvalues): + """Validate the parsed values are syntactically correct""" + for v in parsedvalues: + if '[' in v[1]: + parsedvalues.remove(v) + + return parsedvalues + def getaddresses(fieldvalues): """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" + fieldvalues = _pre_parse_validation(fieldvalues) all = COMMASPACE.join(str(v) for v in fieldvalues) a = _AddressList(all) - return a.addresslist + result = _post_parse_validation(a.addresslist) + + n = 0 + for v in fieldvalues: + n += str(v).count(',') + 1 + + if len(result) != n: + return [('', '')] + + return result def _format_timetuple_and_zone(timetuple, zone): @@ -212,9 +241,18 @@ def parseaddr(addr): Return a tuple of realname and email address, unless the parse fails, in which case return a 2-tuple of ('', ''). """ - addrs = _AddressList(addr).addresslist - if not addrs: - return '', '' + if type(addr) == list: + addr = addr[0] + + if type(addr) != str: + return ('', '') + + addr = _pre_parse_validation([addr])[0] + addrs = _post_parse_validation(_AddressList(addr).addresslist) + + if not addrs or len(addrs) > 1: + return ('', '') + return addrs[0] diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 44b405740c4403..e9d8066113bd44 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -3319,15 +3319,90 @@ def test_getaddresses(self): [('Al Person', 'aperson@dom.ain'), ('Bud Person', 'bperson@dom.ain')]) + def test_getaddresses_parsing_errors(self): + """Test for parsing errors from CVE-2023-27043""" + eq = self.assertEqual + eq(utils.getaddresses(['alice@example.org(']), + [('' ,'')]) + eq(utils.getaddresses(['alice@example.org)']), + [('' ,'')]) + eq(utils.getaddresses(['alice@example.org<']), + [('' ,'')]) + eq(utils.getaddresses(['alice@example.org>']), + [('' ,'')]) + eq(utils.getaddresses(['alice@example.org@']), + [('' ,'')]) + eq(utils.getaddresses(['alice@example.org,']), + [('', 'alice@example.org'), ('', 'bob@example.com')]) + eq(utils.getaddresses(['alice@example.org;']), + [('' ,'')]) + eq(utils.getaddresses(['alice@example.org:']), + [('' ,'')]) + eq(utils.getaddresses(['alice@example.org.']), + [('' ,'')]) + eq(utils.getaddresses(['alice@example.org"']), + [('' ,'')]) + eq(utils.getaddresses(['alice@example.org[']), + [('' ,'')]) + eq(utils.getaddresses(['alice@example.org]']), + [('' ,'')]) + + def test_parseaddr_parsing_errors(self): + """Test for parsing errors from CVE-2023-27043""" + eq = self.assertEqual + eq(utils.parseaddr(['alice@example.org(']), + ('' ,'')) + eq(utils.parseaddr(['alice@example.org)']), + ('' ,'')) + eq(utils.parseaddr(['alice@example.org<']), + ('' ,'')) + eq(utils.parseaddr(['alice@example.org>']), + ('' ,'')) + eq(utils.parseaddr(['alice@example.org@']), + ('' ,'')) + eq(utils.parseaddr(['alice@example.org,']), + ('' ,'')) + eq(utils.parseaddr(['alice@example.org;']), + ('' ,'')) + eq(utils.parseaddr(['alice@example.org:']), + ('' ,'')) + eq(utils.parseaddr(['alice@example.org.']), + ('' ,'')) + eq(utils.parseaddr(['alice@example.org"']), + ('' ,'')) + eq(utils.parseaddr(['alice@example.org[']), + ('' ,'')) + eq(utils.parseaddr(['alice@example.org]']), + ('' ,'')) + def test_getaddresses_nasty(self): eq = self.assertEqual eq(utils.getaddresses(['foo: ;']), [('', '')]) - eq(utils.getaddresses( - ['[]*-- =~$']), - [('', ''), ('', ''), ('', '*--')]) + eq(utils.getaddresses(['[]*-- =~$']), [('', '')]) eq(utils.getaddresses( ['foo: ;', '"Jason R. Mastaler" ']), [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) + eq(utils.getaddresses( + ['Pete(A nice \) chap) ']), + [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]) + eq(utils.getaddresses( + ['(Empty list)(start)Undisclosed recipients :(nobody(I know))']), + [('', '')]) + eq(utils.getaddresses( + ['Mary <@machine.tld:mary@example.net>, , jdoe@test . example']), + [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]) + eq(utils.getaddresses( + ['John Doe ']), + [('John Doe (comment)', 'jdoe@machine.example')]) + eq(utils.getaddresses( + ['"Mary Smith: Personal Account" ']), + [('Mary Smith: Personal Account', 'smith@home.example')]) + eq(utils.getaddresses( + ['Undisclosed recipients:;']), + [('', '')]) + eq(utils.getaddresses( + [', "Giant; \"Big\" Box" ']), + [('', 'boss@nil.test'), ('Giant; Big Box', 'bob@example.net')]) def test_getaddresses_embedded_comment(self): """Test proper handling of a nested comment"""