diff --git a/Lib/base64.py b/Lib/base64.py index eb8f258a2d1977..37e3baa0291473 100755 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -48,34 +48,43 @@ def _bytes_from_decode_data(s): # Base64 encoding/decoding uses binascii -def b64encode(s, altchars=None): +def b64encode(s, altchars=None, padded=True): """Encode the bytes-like object s using Base64 and return a bytes object. Optional altchars should be a byte string of length 2 which specifies an alternative alphabet for the '+' and '/' characters. This allows an application to e.g. generate url or filesystem safe Base64 strings. + + If padded is True (the default), padding will be applied to the + result bytes. If padding is False, no padding is applied. """ encoded = binascii.b2a_base64(s, newline=False) if altchars is not None: assert len(altchars) == 2, repr(altchars) - return encoded.translate(bytes.maketrans(b'+/', altchars)) + encoded = encoded.translate(bytes.maketrans(b'+/', altchars)) + if not padded: + encoded = encoded.rstrip(b'=') return encoded -def b64decode(s, altchars=None, validate=False): +def b64decode(s, altchars=None, validate=False, padded=True): """Decode the Base64 encoded bytes-like object or ASCII string s. Optional altchars must be a bytes-like object or ASCII string of length 2 which specifies the alternative alphabet used instead of the '+' and '/' characters. - The result is returned as a bytes object. A binascii.Error is raised if - s is incorrectly padded. + The result is returned as a bytes object. If validate is False (the default), characters that are neither in the normal base-64 alphabet nor the alternative alphabet are discarded prior to the padding check. If validate is True, these non-alphabet characters in the input result in a binascii.Error. + + If padded is True (the default), a binascii.Error is raised if s is + incorrectly padded. If padded is False and validate is True, a + binascii.Error will be raised if s contains padding. If both padded and + validate are False, any eventual padding will be ignored. """ s = _bytes_from_decode_data(s) if altchars is not None: @@ -84,6 +93,10 @@ def b64decode(s, altchars=None, validate=False): s = s.translate(bytes.maketrans(altchars, b'+/')) if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s): raise binascii.Error('Non-base64 digit found') + if not padded: + if validate and not re.match(b'^[A-Za-z0-9+/]*$', s): + raise binascii.Error('Padding found in supposedly non-padded input') + s += b'==' return binascii.a2b_base64(s) @@ -108,29 +121,33 @@ def standard_b64decode(s): _urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_') _urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/') -def urlsafe_b64encode(s): +def urlsafe_b64encode(s, validate=False, padded=True): """Encode bytes using the URL- and filesystem-safe Base64 alphabet. Argument s is a bytes-like object to encode. The result is returned as a bytes object. The alphabet uses '-' instead of '+' and '_' instead of '/'. + + If padded is True (the default), the result is padded. If padded + is False, the result will be left unpadded. """ - return b64encode(s).translate(_urlsafe_encode_translation) + return b64encode(s, padded=padded).translate(_urlsafe_encode_translation) -def urlsafe_b64decode(s): +def urlsafe_b64decode(s, padded=True): """Decode bytes using the URL- and filesystem-safe Base64 alphabet. Argument s is a bytes-like object or ASCII string to decode. The result - is returned as a bytes object. A binascii.Error is raised if the input - is incorrectly padded. Characters that are not in the URL-safe base-64 - alphabet, and are not a plus '+' or slash '/', are discarded prior to the - padding check. + is returned as a bytes object. Characters that are not in the URL-safe + base-64 alphabet, and are not a plus '+' or slash '/', are discarded prior + to the padding check. The alphabet uses '-' instead of '+' and '_' instead of '/'. + + Arguments padded and validate behave the same as in b64decode(). """ s = _bytes_from_decode_data(s) s = s.translate(_urlsafe_decode_translation) - return b64decode(s) + return b64decode(s, padded=padded) diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 47547396b8cb54..f999dc28447162 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -179,6 +179,15 @@ def test_b64encode(self): self.check_other_types(base64.urlsafe_b64encode, b'\xd3V\xbeo\xf7\x1d', b'01a-b_cd') self.check_encode_type_errors(base64.urlsafe_b64encode) + # Test unpadded encoding + eq(base64.b64encode(b'abcd', padded=False), b'YWJjZA') + eq(base64.urlsafe_b64encode(b"abcdefghijklmnopqrstuvwxyz" + b"ABCDEFGHIJKLMNOPQRSTUVWXYZ" + b"0123456789!@#0^&*();:<>,. []{}", + padded=False), + b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE" + b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NT" + b"Y3ODkhQCMwXiYqKCk7Ojw-LC4gW117fQ") def test_b64decode(self): eq = self.assertEqual @@ -235,6 +244,11 @@ def test_b64decode(self): self.check_other_types(base64.urlsafe_b64decode, b'01a-b_cd', b'\xd3V\xbeo\xf7\x1d') self.check_decode_type_errors(base64.urlsafe_b64decode) + # Test unpadded decoding + self.assertEqual(base64.b64decode(b'abc', padded=False), b'i\xb7') + self.assertEqual(base64.b64decode('abc', padded=False), b'i\xb7') + self.assertEqual(base64.urlsafe_b64decode('abcd_-', padded=False), b'i\xb7\x1d\xff') + self.assertEquals(base64.b64decode(b'ab=', padded=False), b'i') def test_b64decode_padding_error(self): self.assertRaises(binascii.Error, base64.b64decode, b'abc') @@ -270,6 +284,36 @@ def test_b64decode_invalid_chars(self): res = b'\xFB\xEF\xBE\xFF\xFF\xFF' self.assertEqual(base64.b64decode(b'++[[//]]', b'[]'), res) self.assertEqual(base64.urlsafe_b64decode(b'++--//__'), res) + + def test_b64decode_unpadded_invalid_chars(self): + # issue 1466065: Test some invalid characters. + tests = ((b'%3d', b'\xdd'), + (b'$3d', b'\xdd'), + (b'[', b''), + (b'YW]3', b'am'), + (b'3{d', b'\xdd'), + (b'3d}', b'\xdd'), + (b'@@', b''), + (b'!', b''), + (b'YWJj\nYWI', b'abcab')) + funcs = ( + base64.b64decode, + base64.urlsafe_b64decode, + ) + for bstr, res in tests: + for func in funcs: + with self.subTest(bstr=bstr, func=func): + self.assertEqual(func(bstr, padded=False), res) + self.assertEqual(func(bstr.decode('ascii'), padded=False), res) + with self.assertRaises(binascii.Error): + base64.b64decode(bstr, validate=True, padded=False) + with self.assertRaises(binascii.Error): + base64.b64decode(bstr.decode('ascii'), validate=True, padded=False) + + # Normal alphabet characters not discarded when alternative given + res = b'\xFB\xEF\xBE\xFF\xFF\xFF' + self.assertEqual(base64.b64decode(b'++[[//]]', b'[]'), res) + self.assertEqual(base64.urlsafe_b64decode(b'++--//__'), res) def test_b32encode(self): eq = self.assertEqual diff --git a/Misc/NEWS.d/next/Library/2018-05-23-18-02-03.bpo-29427.82cb18.rst b/Misc/NEWS.d/next/Library/2018-05-23-18-02-03.bpo-29427.82cb18.rst new file mode 100644 index 00000000000000..4f76caf4d76685 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-05-23-18-02-03.bpo-29427.82cb18.rst @@ -0,0 +1,3 @@ +Allow :func:`~base64.b64encode` and :func:`~base64.b64decode` (as well as derived +:func:`~base64.urlsafe_b64encode` and :func:`~base64.urlsafe_b64decode`) from +:mod:`base64` module to produce or accept unpadded input or output.