From d6d4d93388539a949471b3a1ba83732f58b30f23 Mon Sep 17 00:00:00 2001 From: Guillaume Pujol Date: Wed, 23 May 2018 17:10:24 +0200 Subject: [PATCH 1/8] allow parsing and base64 Allow base64.b64decode() to accept unpadded input instead of throwing a binascii.Error. Padding check is switchable using the new `padded` boolean argument, and is enabled by default to maintain backward compatibility. In a symmetric fashion, allow base64.b64encode() to produce unpadded output, using the `padded` argument. Default output will include padding for backward compatibility. --- Lib/base64.py | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/Lib/base64.py b/Lib/base64.py index eb8f258a2d1977..0eb198b390122a 100755 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -48,34 +48,42 @@ def _bytes_from_decode_data(s): # Base64 encoding/decoding uses binascii -def b64encode(s, altchars=None): +def b64encode(s, altchars=None, padded=True): """Encode the bytes-like object s using Base64 and return a bytes object. Optional altchars should be a byte string of length 2 which specifies an alternative alphabet for the '+' and '/' characters. This allows an application to e.g. generate url or filesystem safe Base64 strings. + + If padded is True (the default), padding will be be applied to the + result bytes. If padding if False, no padding is applied. """ encoded = binascii.b2a_base64(s, newline=False) if altchars is not None: assert len(altchars) == 2, repr(altchars) - return encoded.translate(bytes.maketrans(b'+/', altchars)) + encoded = encoded.translate(bytes.maketrans(b'+/', altchars)) + if not padded: + encoded = encoded.rstrip(b'=') return encoded -def b64decode(s, altchars=None, validate=False): +def b64decode(s, altchars=None, validate=False, padded=True): """Decode the Base64 encoded bytes-like object or ASCII string s. Optional altchars must be a bytes-like object or ASCII string of length 2 which specifies the alternative alphabet used instead of the '+' and '/' characters. - The result is returned as a bytes object. A binascii.Error is raised if - s is incorrectly padded. + The result is returned as a bytes object. If validate is False (the default), characters that are neither in the normal base-64 alphabet nor the alternative alphabet are discarded prior to the padding check. If validate is True, these non-alphabet characters in the input result in a binascii.Error. + + If padded is True (the default), a binascii.Error is raised if s is + incorrectly padded. If padded is False, no exception will be thrown if s + is missing padding. """ s = _bytes_from_decode_data(s) if altchars is not None: @@ -84,6 +92,8 @@ def b64decode(s, altchars=None, validate=False): s = s.translate(bytes.maketrans(altchars, b'+/')) if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s): raise binascii.Error('Non-base64 digit found') + if not padded and len(s) % 4: + s = s.ljust(4 * (len(s)//4 + 1), b'=') return binascii.a2b_base64(s) @@ -108,29 +118,35 @@ def standard_b64decode(s): _urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_') _urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/') -def urlsafe_b64encode(s): +def urlsafe_b64encode(s, padded=True): """Encode bytes using the URL- and filesystem-safe Base64 alphabet. Argument s is a bytes-like object to encode. The result is returned as a bytes object. The alphabet uses '-' instead of '+' and '_' instead of '/'. + + If padded is True (the default), the result is left padded. If padded + is False, the result will be left unpadded. """ - return b64encode(s).translate(_urlsafe_encode_translation) + return b64encode(s, padded=padded).translate(_urlsafe_encode_translation) -def urlsafe_b64decode(s): +def urlsafe_b64decode(s, padded=True): """Decode bytes using the URL- and filesystem-safe Base64 alphabet. Argument s is a bytes-like object or ASCII string to decode. The result - is returned as a bytes object. A binascii.Error is raised if the input - is incorrectly padded. Characters that are not in the URL-safe base-64 - alphabet, and are not a plus '+' or slash '/', are discarded prior to the - padding check. + is returned as a bytes object. Characters that are not in the URL-safe + base-64 alphabet, and are not a plus '+' or slash '/', are discarded prior + to the padding check. The alphabet uses '-' instead of '+' and '_' instead of '/'. + + If padded is True (the default), a binascii.Error is raised if the input + is incorrectly padded. If padded is False, a missing padding will not + raise an exception. """ s = _bytes_from_decode_data(s) s = s.translate(_urlsafe_decode_translation) - return b64decode(s) + return b64decode(s, padded=padded) From dd69e8529df1e064bd78a693fec6fb85e2b3faa3 Mon Sep 17 00:00:00 2001 From: Guillaume Pujol Date: Wed, 23 May 2018 17:38:35 +0200 Subject: [PATCH 2/8] updated base64 unittests for unpadded input/output Added some test cases for base64 module, to test optional padding in b64decode() and b64encode(), urlsafe_b64encode() and urlsafe_b64decode(). --- Lib/test/test_base64.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 47547396b8cb54..2dd0fd9f53167a 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -179,6 +179,15 @@ def test_b64encode(self): self.check_other_types(base64.urlsafe_b64encode, b'\xd3V\xbeo\xf7\x1d', b'01a-b_cd') self.check_encode_type_errors(base64.urlsafe_b64encode) + # Test unpadded encoding + eq(base64.b64encode(b'abcd', padded=False), b'YWJjZA') + eq(base64.urlsafe_b64encode(b"abcdefghijklmnopqrstuvwxyz" + b"ABCDEFGHIJKLMNOPQRSTUVWXYZ" + b"0123456789!@#0^&*();:<>,. []{}", + padded=False), + b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE" + b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NT" + b"Y3ODkhQCMwXiYqKCk7Ojw-LC4gW117fQ") def test_b64decode(self): eq = self.assertEqual @@ -235,6 +244,10 @@ def test_b64decode(self): self.check_other_types(base64.urlsafe_b64decode, b'01a-b_cd', b'\xd3V\xbeo\xf7\x1d') self.check_decode_type_errors(base64.urlsafe_b64decode) + # Test unpadded decoding + self.assertEqual(base64.b64decode(b'abc', padded=False), b'i\xb7') + self.assertEqual(base64.b64decode('abc', padded=False), b'i\xb7') + self.assertEqual(base64.urlsafe_b64decode('abcd_-', padded=False), b'i\xb7\x1d\xff') def test_b64decode_padding_error(self): self.assertRaises(binascii.Error, base64.b64decode, b'abc') From 698ec4983e96c436d30bc8dd4cc86937997f658d Mon Sep 17 00:00:00 2001 From: Guillaume Pujol Date: Wed, 23 May 2018 18:07:37 +0200 Subject: [PATCH 3/8] added NEWS.d entry for bpo-29427 --- .../NEWS.d/next/Library/2018-05-23-18-02-03.bpo-29427.82cb18.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2018-05-23-18-02-03.bpo-29427.82cb18.rst diff --git a/Misc/NEWS.d/next/Library/2018-05-23-18-02-03.bpo-29427.82cb18.rst b/Misc/NEWS.d/next/Library/2018-05-23-18-02-03.bpo-29427.82cb18.rst new file mode 100644 index 00000000000000..f6758caf0aaef2 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-05-23-18-02-03.bpo-29427.82cb18.rst @@ -0,0 +1 @@ +Allow b64encode() and b64decode() (and derived urlsafe_* methods) from base64 module to produce or accept unpadded input or output. From 8eb9823130591bad69cf08f19418f6dbeca2e91a Mon Sep 17 00:00:00 2001 From: Guillaume Pujol Date: Thu, 24 May 2018 13:29:41 +0200 Subject: [PATCH 4/8] fixed b64decode on non padded strings Fixes b64decode on non padded input if that input contains invalid characters. Implements validate=True for non padded input. --- Lib/base64.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/Lib/base64.py b/Lib/base64.py index 0eb198b390122a..924a9c544ec2fd 100755 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -82,8 +82,9 @@ def b64decode(s, altchars=None, validate=False, padded=True): in the input result in a binascii.Error. If padded is True (the default), a binascii.Error is raised if s is - incorrectly padded. If padded is False, no exception will be thrown if s - is missing padding. + incorrectly padded. If padded is False and validate is True, a + binascii.Error will be raised if s contains padding. If both padded and + validate are False, any eventual padding will be ignored. """ s = _bytes_from_decode_data(s) if altchars is not None: @@ -92,8 +93,10 @@ def b64decode(s, altchars=None, validate=False, padded=True): s = s.translate(bytes.maketrans(altchars, b'+/')) if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s): raise binascii.Error('Non-base64 digit found') - if not padded and len(s) % 4: - s = s.ljust(4 * (len(s)//4 + 1), b'=') + if not padded: + if validate and not re.match(b'^[A-Za-z0-9+/]*$', s): + raise binascii.Error('Padding found in supposedly non-padded input') + s += b'==' return binascii.a2b_base64(s) @@ -118,14 +121,14 @@ def standard_b64decode(s): _urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_') _urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/') -def urlsafe_b64encode(s, padded=True): +def urlsafe_b64encode(s, validate=False, padded=True): """Encode bytes using the URL- and filesystem-safe Base64 alphabet. Argument s is a bytes-like object to encode. The result is returned as a bytes object. The alphabet uses '-' instead of '+' and '_' instead of '/'. - If padded is True (the default), the result is left padded. If padded + If padded is True (the default), the result is padded. If padded is False, the result will be left unpadded. """ return b64encode(s, padded=padded).translate(_urlsafe_encode_translation) @@ -140,9 +143,7 @@ def urlsafe_b64decode(s, padded=True): The alphabet uses '-' instead of '+' and '_' instead of '/'. - If padded is True (the default), a binascii.Error is raised if the input - is incorrectly padded. If padded is False, a missing padding will not - raise an exception. + Arguments padded and validate behave the same as in b64decode(). """ s = _bytes_from_decode_data(s) s = s.translate(_urlsafe_decode_translation) From df1102c42a5ddd06da85d0e65b2cd13f08de173f Mon Sep 17 00:00:00 2001 From: Guillaume Pujol Date: Thu, 24 May 2018 13:31:48 +0200 Subject: [PATCH 5/8] more base64 test cases for unpadded input --- Lib/test/test_base64.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 2dd0fd9f53167a..bbb2e48f379a47 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -248,6 +248,7 @@ def test_b64decode(self): self.assertEqual(base64.b64decode(b'abc', padded=False), b'i\xb7') self.assertEqual(base64.b64decode('abc', padded=False), b'i\xb7') self.assertEqual(base64.urlsafe_b64decode('abcd_-', padded=False), b'i\xb7\x1d\xff') + self.assertEquals(base64.b64decode(b'ab=', padded=False), b'i') def test_b64decode_padding_error(self): self.assertRaises(binascii.Error, base64.b64decode, b'abc') @@ -283,6 +284,36 @@ def test_b64decode_invalid_chars(self): res = b'\xFB\xEF\xBE\xFF\xFF\xFF' self.assertEqual(base64.b64decode(b'++[[//]]', b'[]'), res) self.assertEqual(base64.urlsafe_b64decode(b'++--//__'), res) + + def test_b64decode_unpadded_invalid_chars(self): + # issue 1466065: Test some invalid characters. + tests = ((b'%3d', b'\xdd'), + (b'$3d', b'\xdd'), + (b'[', b''), + (b'YW]3', b'am'), + (b'3{d', b'\xdd'), + (b'3d}', b'\xdd'), + (b'@@', b''), + (b'!', b''), + (b'YWJj\nYWI', b'abcab')) + funcs = ( + base64.b64decode, + base64.urlsafe_b64decode, + ) + for bstr, res in tests: + for func in funcs: + with self.subTest(bstr=bstr, func=func): + self.assertEqual(func(bstr, padded=False), res) + self.assertEqual(func(bstr.decode('ascii'), padded=False), res) + with self.assertRaises(binascii.Error): + base64.b64decode(bstr, validate=True, padded=False) + with self.assertRaises(binascii.Error): + base64.b64decode(bstr.decode('ascii'), validate=True, padded=False) + + # Normal alphabet characters not discarded when alternative given + res = b'\xFB\xEF\xBE\xFF\xFF\xFF' + self.assertEqual(base64.b64decode(b'++[[//]]', b'[]'), res) + self.assertEqual(base64.urlsafe_b64decode(b'++--//__'), res) def test_b32encode(self): eq = self.assertEqual From 1b307ea685cca916a686532e2c5c2877a314b671 Mon Sep 17 00:00:00 2001 From: Guillaume Pujol Date: Thu, 24 May 2018 13:38:04 +0200 Subject: [PATCH 6/8] fixed indentation --- Lib/test/test_base64.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index bbb2e48f379a47..f999dc28447162 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -285,7 +285,7 @@ def test_b64decode_invalid_chars(self): self.assertEqual(base64.b64decode(b'++[[//]]', b'[]'), res) self.assertEqual(base64.urlsafe_b64decode(b'++--//__'), res) - def test_b64decode_unpadded_invalid_chars(self): + def test_b64decode_unpadded_invalid_chars(self): # issue 1466065: Test some invalid characters. tests = ((b'%3d', b'\xdd'), (b'$3d', b'\xdd'), From 77618add50639c317b3ada50a6225c2f90936118 Mon Sep 17 00:00:00 2001 From: Guillaume Pujol Date: Tue, 8 Oct 2019 14:57:23 +0200 Subject: [PATCH 7/8] Update Misc/NEWS.d/next/Library/2018-05-23-18-02-03.bpo-29427.82cb18.rst updated commit description Co-Authored-By: Sergey Fedoseev --- .../next/Library/2018-05-23-18-02-03.bpo-29427.82cb18.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2018-05-23-18-02-03.bpo-29427.82cb18.rst b/Misc/NEWS.d/next/Library/2018-05-23-18-02-03.bpo-29427.82cb18.rst index f6758caf0aaef2..4f76caf4d76685 100644 --- a/Misc/NEWS.d/next/Library/2018-05-23-18-02-03.bpo-29427.82cb18.rst +++ b/Misc/NEWS.d/next/Library/2018-05-23-18-02-03.bpo-29427.82cb18.rst @@ -1 +1,3 @@ -Allow b64encode() and b64decode() (and derived urlsafe_* methods) from base64 module to produce or accept unpadded input or output. +Allow :func:`~base64.b64encode` and :func:`~base64.b64decode` (as well as derived +:func:`~base64.urlsafe_b64encode` and :func:`~base64.urlsafe_b64decode`) from +:mod:`base64` module to produce or accept unpadded input or output. From df3d3268b73c60349a3261b80b68d2e2a214eb1e Mon Sep 17 00:00:00 2001 From: Guillaume Pujol Date: Tue, 8 Oct 2019 14:58:01 +0200 Subject: [PATCH 8/8] typos Co-Authored-By: Sergey Fedoseev --- Lib/base64.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/base64.py b/Lib/base64.py index 924a9c544ec2fd..37e3baa0291473 100755 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -55,8 +55,8 @@ def b64encode(s, altchars=None, padded=True): alternative alphabet for the '+' and '/' characters. This allows an application to e.g. generate url or filesystem safe Base64 strings. - If padded is True (the default), padding will be be applied to the - result bytes. If padding if False, no padding is applied. + If padded is True (the default), padding will be applied to the + result bytes. If padding is False, no padding is applied. """ encoded = binascii.b2a_base64(s, newline=False) if altchars is not None: