Skip to content

bpo-29427: allow unpadded input and ouput in base64 module #7072

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 30 additions & 13 deletions Lib/base64.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,34 +48,43 @@ def _bytes_from_decode_data(s):

# Base64 encoding/decoding uses binascii

def b64encode(s, altchars=None):
def b64encode(s, altchars=None, padded=True):
"""Encode the bytes-like object s using Base64 and return a bytes object.

Optional altchars should be a byte string of length 2 which specifies an
alternative alphabet for the '+' and '/' characters. This allows an
application to e.g. generate url or filesystem safe Base64 strings.

If padded is True (the default), padding will be applied to the
result bytes. If padding is False, no padding is applied.
"""
encoded = binascii.b2a_base64(s, newline=False)
if altchars is not None:
assert len(altchars) == 2, repr(altchars)
return encoded.translate(bytes.maketrans(b'+/', altchars))
encoded = encoded.translate(bytes.maketrans(b'+/', altchars))
if not padded:
encoded = encoded.rstrip(b'=')
return encoded


def b64decode(s, altchars=None, validate=False):
def b64decode(s, altchars=None, validate=False, padded=True):
"""Decode the Base64 encoded bytes-like object or ASCII string s.

Optional altchars must be a bytes-like object or ASCII string of length 2
which specifies the alternative alphabet used instead of the '+' and '/'
characters.

The result is returned as a bytes object. A binascii.Error is raised if
s is incorrectly padded.
The result is returned as a bytes object.

If validate is False (the default), characters that are neither in the
normal base-64 alphabet nor the alternative alphabet are discarded prior
to the padding check. If validate is True, these non-alphabet characters
in the input result in a binascii.Error.

If padded is True (the default), a binascii.Error is raised if s is
incorrectly padded. If padded is False and validate is True, a
binascii.Error will be raised if s contains padding. If both padded and
validate are False, any eventual padding will be ignored.
"""
s = _bytes_from_decode_data(s)
if altchars is not None:
Expand All @@ -84,6 +93,10 @@ def b64decode(s, altchars=None, validate=False):
s = s.translate(bytes.maketrans(altchars, b'+/'))
if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s):
raise binascii.Error('Non-base64 digit found')
if not padded:
if validate and not re.match(b'^[A-Za-z0-9+/]*$', s):
raise binascii.Error('Padding found in supposedly non-padded input')
s += b'=='

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand why two ==s is always the right padding. I may be missing something.

Copy link

@bitti bitti Feb 27, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably depends on the base64 flavor of what is accepted as "right" padding, but as far as I understand the a2b_base64 implementation extra padding characters are just ignored, so always appending == should be safe in this context.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, thanks. Maybe worth putting that in a comment in the code.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am using util from django.utils.http.urlsafe_base64_encode

https://docs.djangoproject.com/en/3.1/ref/utils/#django.utils.http.urlsafe_base64_encode

return binascii.a2b_base64(s)


Expand All @@ -108,29 +121,33 @@ def standard_b64decode(s):
_urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_')
_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/')

def urlsafe_b64encode(s):
def urlsafe_b64encode(s, validate=False, padded=True):
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The validate arg isn't used. And if it's going to be added it should be added to urlsafe_b64decode instead.

"""Encode bytes using the URL- and filesystem-safe Base64 alphabet.

Argument s is a bytes-like object to encode. The result is returned as a
bytes object. The alphabet uses '-' instead of '+' and '_' instead of
'/'.

If padded is True (the default), the result is padded. If padded
is False, the result will be left unpadded.
"""
return b64encode(s).translate(_urlsafe_encode_translation)
return b64encode(s, padded=padded).translate(_urlsafe_encode_translation)

def urlsafe_b64decode(s):
def urlsafe_b64decode(s, padded=True):
"""Decode bytes using the URL- and filesystem-safe Base64 alphabet.

Argument s is a bytes-like object or ASCII string to decode. The result
is returned as a bytes object. A binascii.Error is raised if the input
is incorrectly padded. Characters that are not in the URL-safe base-64
alphabet, and are not a plus '+' or slash '/', are discarded prior to the
padding check.
is returned as a bytes object. Characters that are not in the URL-safe
base-64 alphabet, and are not a plus '+' or slash '/', are discarded prior
to the padding check.

The alphabet uses '-' instead of '+' and '_' instead of '/'.

Arguments padded and validate behave the same as in b64decode().
"""
s = _bytes_from_decode_data(s)
s = s.translate(_urlsafe_decode_translation)
return b64decode(s)
return b64decode(s, padded=padded)



Expand Down
44 changes: 44 additions & 0 deletions Lib/test/test_base64.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,15 @@ def test_b64encode(self):
self.check_other_types(base64.urlsafe_b64encode,
b'\xd3V\xbeo\xf7\x1d', b'01a-b_cd')
self.check_encode_type_errors(base64.urlsafe_b64encode)
# Test unpadded encoding
eq(base64.b64encode(b'abcd', padded=False), b'YWJjZA')
eq(base64.urlsafe_b64encode(b"abcdefghijklmnopqrstuvwxyz"
b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
b"0123456789!@#0^&*();:<>,. []{}",
padded=False),
b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NT"
b"Y3ODkhQCMwXiYqKCk7Ojw-LC4gW117fQ")

def test_b64decode(self):
eq = self.assertEqual
Expand Down Expand Up @@ -235,6 +244,11 @@ def test_b64decode(self):
self.check_other_types(base64.urlsafe_b64decode, b'01a-b_cd',
b'\xd3V\xbeo\xf7\x1d')
self.check_decode_type_errors(base64.urlsafe_b64decode)
# Test unpadded decoding
self.assertEqual(base64.b64decode(b'abc', padded=False), b'i\xb7')
self.assertEqual(base64.b64decode('abc', padded=False), b'i\xb7')
self.assertEqual(base64.urlsafe_b64decode('abcd_-', padded=False), b'i\xb7\x1d\xff')
self.assertEquals(base64.b64decode(b'ab=', padded=False), b'i')

def test_b64decode_padding_error(self):
self.assertRaises(binascii.Error, base64.b64decode, b'abc')
Expand Down Expand Up @@ -270,6 +284,36 @@ def test_b64decode_invalid_chars(self):
res = b'\xFB\xEF\xBE\xFF\xFF\xFF'
self.assertEqual(base64.b64decode(b'++[[//]]', b'[]'), res)
self.assertEqual(base64.urlsafe_b64decode(b'++--//__'), res)

def test_b64decode_unpadded_invalid_chars(self):
# issue 1466065: Test some invalid characters.
tests = ((b'%3d', b'\xdd'),
(b'$3d', b'\xdd'),
(b'[', b''),
(b'YW]3', b'am'),
(b'3{d', b'\xdd'),
(b'3d}', b'\xdd'),
(b'@@', b''),
(b'!', b''),
(b'YWJj\nYWI', b'abcab'))
funcs = (
base64.b64decode,
base64.urlsafe_b64decode,
)
for bstr, res in tests:
for func in funcs:
with self.subTest(bstr=bstr, func=func):
self.assertEqual(func(bstr, padded=False), res)
self.assertEqual(func(bstr.decode('ascii'), padded=False), res)
with self.assertRaises(binascii.Error):
base64.b64decode(bstr, validate=True, padded=False)
with self.assertRaises(binascii.Error):
base64.b64decode(bstr.decode('ascii'), validate=True, padded=False)

# Normal alphabet characters not discarded when alternative given
res = b'\xFB\xEF\xBE\xFF\xFF\xFF'
self.assertEqual(base64.b64decode(b'++[[//]]', b'[]'), res)
self.assertEqual(base64.urlsafe_b64decode(b'++--//__'), res)

def test_b32encode(self):
eq = self.assertEqual
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Allow :func:`~base64.b64encode` and :func:`~base64.b64decode` (as well as derived
:func:`~base64.urlsafe_b64encode` and :func:`~base64.urlsafe_b64decode`) from
:mod:`base64` module to produce or accept unpadded input or output.