python · guillp · May 23, 2018 · May 23, 2018 · May 23, 2018 · May 24, 2018
diff --git a/Lib/base64.py b/Lib/base64.py
@@ -48,34 +48,43 @@ def _bytes_from_decode_data(s):
 
 # Base64 encoding/decoding uses binascii
 
-def b64encode(s, altchars=None):
+def b64encode(s, altchars=None, padded=True):
     """Encode the bytes-like object s using Base64 and return a bytes object.
 
     Optional altchars should be a byte string of length 2 which specifies an
     alternative alphabet for the '+' and '/' characters.  This allows an
     application to e.g. generate url or filesystem safe Base64 strings.
+
+    If padded is True (the default), padding will be applied to the
+    result bytes. If padding is False, no padding is applied.
     """
     encoded = binascii.b2a_base64(s, newline=False)
     if altchars is not None:
         assert len(altchars) == 2, repr(altchars)
-        return encoded.translate(bytes.maketrans(b'+/', altchars))
+        encoded = encoded.translate(bytes.maketrans(b'+/', altchars))
+    if not padded:
+        encoded = encoded.rstrip(b'=')
     return encoded
 
 
-def b64decode(s, altchars=None, validate=False):
+def b64decode(s, altchars=None, validate=False, padded=True):
     """Decode the Base64 encoded bytes-like object or ASCII string s.
 
     Optional altchars must be a bytes-like object or ASCII string of length 2
     which specifies the alternative alphabet used instead of the '+' and '/'
     characters.
 
-    The result is returned as a bytes object.  A binascii.Error is raised if
-    s is incorrectly padded.
+    The result is returned as a bytes object.
 
     If validate is False (the default), characters that are neither in the
     normal base-64 alphabet nor the alternative alphabet are discarded prior
     to the padding check.  If validate is True, these non-alphabet characters
     in the input result in a binascii.Error.
+
+    If padded is True (the default), a binascii.Error is raised if s is
+    incorrectly padded. If padded is False and validate is True, a
+    binascii.Error will be raised if s contains padding. If both padded and
+    validate are False, any eventual padding will be ignored.
     """
     s = _bytes_from_decode_data(s)
     if altchars is not None:
@@ -84,6 +93,10 @@ def b64decode(s, altchars=None, validate=False):
         s = s.translate(bytes.maketrans(altchars, b'+/'))
     if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s):
         raise binascii.Error('Non-base64 digit found')
+    if not padded:
+        if validate and not re.match(b'^[A-Za-z0-9+/]*$', s):
+            raise binascii.Error('Padding found in supposedly non-padded input')
+        s += b'=='
     return binascii.a2b_base64(s)
 
 
@@ -108,29 +121,33 @@ def standard_b64decode(s):
 _urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_')
 _urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/')
 
-def urlsafe_b64encode(s):
+def urlsafe_b64encode(s, validate=False, padded=True):
     """Encode bytes using the URL- and filesystem-safe Base64 alphabet.
 
     Argument s is a bytes-like object to encode.  The result is returned as a
     bytes object.  The alphabet uses '-' instead of '+' and '_' instead of
     '/'.
+
+    If padded is True (the default), the result is padded. If padded
+    is False, the result will be left unpadded.
     """
-    return b64encode(s).translate(_urlsafe_encode_translation)
+    return b64encode(s, padded=padded).translate(_urlsafe_encode_translation)
 
-def urlsafe_b64decode(s):
+def urlsafe_b64decode(s, padded=True):
     """Decode bytes using the URL- and filesystem-safe Base64 alphabet.
 
     Argument s is a bytes-like object or ASCII string to decode.  The result
-    is returned as a bytes object.  A binascii.Error is raised if the input
-    is incorrectly padded.  Characters that are not in the URL-safe base-64
-    alphabet, and are not a plus '+' or slash '/', are discarded prior to the
-    padding check.
+    is returned as a bytes object. Characters that are not in the URL-safe
+    base-64 alphabet, and are not a plus '+' or slash '/', are discarded prior
+    to the padding check.
 
     The alphabet uses '-' instead of '+' and '_' instead of '/'.
+
+    Arguments padded and validate behave the same as in b64decode().
     """
     s = _bytes_from_decode_data(s)
     s = s.translate(_urlsafe_decode_translation)
-    return b64decode(s)
+    return b64decode(s, padded=padded)
 
 
 

diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py
@@ -179,6 +179,15 @@ def test_b64encode(self):
         self.check_other_types(base64.urlsafe_b64encode,
                                b'\xd3V\xbeo\xf7\x1d', b'01a-b_cd')
         self.check_encode_type_errors(base64.urlsafe_b64encode)
+        # Test unpadded encoding
+        eq(base64.b64encode(b'abcd', padded=False), b'YWJjZA')
+        eq(base64.urlsafe_b64encode(b"abcdefghijklmnopqrstuvwxyz"
+                                    b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                                    b"0123456789!@#0^&*();:<>,. []{}",
+                                    padded=False),
+           b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
+           b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NT"
+           b"Y3ODkhQCMwXiYqKCk7Ojw-LC4gW117fQ")
 
     def test_b64decode(self):
         eq = self.assertEqual
@@ -235,6 +244,11 @@ def test_b64decode(self):
         self.check_other_types(base64.urlsafe_b64decode, b'01a-b_cd',
                                b'\xd3V\xbeo\xf7\x1d')
         self.check_decode_type_errors(base64.urlsafe_b64decode)
+        # Test unpadded decoding
+        self.assertEqual(base64.b64decode(b'abc', padded=False), b'i\xb7')
+        self.assertEqual(base64.b64decode('abc', padded=False), b'i\xb7')
+        self.assertEqual(base64.urlsafe_b64decode('abcd_-', padded=False), b'i\xb7\x1d\xff')
+        self.assertEquals(base64.b64decode(b'ab=', padded=False), b'i')
 
     def test_b64decode_padding_error(self):
         self.assertRaises(binascii.Error, base64.b64decode, b'abc')
@@ -270,6 +284,36 @@ def test_b64decode_invalid_chars(self):
         res = b'\xFB\xEF\xBE\xFF\xFF\xFF'
         self.assertEqual(base64.b64decode(b'++[[//]]', b'[]'), res)
         self.assertEqual(base64.urlsafe_b64decode(b'++--//__'), res)
+
+    def test_b64decode_unpadded_invalid_chars(self):
+        # issue 1466065: Test some invalid characters.
+        tests = ((b'%3d', b'\xdd'),
+                 (b'$3d', b'\xdd'),
+                 (b'[', b''),
+                 (b'YW]3', b'am'),
+                 (b'3{d', b'\xdd'),
+                 (b'3d}', b'\xdd'),
+                 (b'@@', b''),
+                 (b'!', b''),
+                 (b'YWJj\nYWI', b'abcab'))
+        funcs = (
+            base64.b64decode,
+            base64.urlsafe_b64decode,
+        )
+        for bstr, res in tests:
+            for func in funcs:
+                with self.subTest(bstr=bstr, func=func):
+                    self.assertEqual(func(bstr, padded=False), res)
+                    self.assertEqual(func(bstr.decode('ascii'), padded=False), res)
+            with self.assertRaises(binascii.Error):
+                base64.b64decode(bstr, validate=True, padded=False)
+            with self.assertRaises(binascii.Error):
+                base64.b64decode(bstr.decode('ascii'), validate=True, padded=False)
+
+        # Normal alphabet characters not discarded when alternative given
+        res = b'\xFB\xEF\xBE\xFF\xFF\xFF'
+        self.assertEqual(base64.b64decode(b'++[[//]]', b'[]'), res)
+        self.assertEqual(base64.urlsafe_b64decode(b'++--//__'), res)
 
     def test_b32encode(self):
         eq = self.assertEqual

diff --git a/Misc/NEWS.d/next/Library/2018-05-23-18-02-03.bpo-29427.82cb18.rst b/Misc/NEWS.d/next/Library/2018-05-23-18-02-03.bpo-29427.82cb18.rst
@@ -0,0 +1,3 @@
+Allow :func:`~base64.b64encode` and :func:`~base64.b64decode` (as well as derived 
+:func:`~base64.urlsafe_b64encode` and :func:`~base64.urlsafe_b64decode`) from 
+:mod:`base64` module to produce or accept unpadded input or output.