From 1cae766c2d35918adcacf134062c631076f16fc9 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Thu, 8 Nov 2018 11:44:49 -0500 Subject: [PATCH 1/7] Coerce Python 2 objects via old buffer interface As Python 2 objects can be comfortably coerced to the old-style buffer interface, which can cleanly be converted to a `memoryview`, go ahead and coerce everything to an old-style buffer in Python 2. Then it is a straightforward matter to get a `memoryview` from the object and handle it the same as one might on Python 3. --- numcodecs/compat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/numcodecs/compat.py b/numcodecs/compat.py index f3b6e95c..9464aa14 100644 --- a/numcodecs/compat.py +++ b/numcodecs/compat.py @@ -33,8 +33,8 @@ def buffer_tobytes(v): return v elif isinstance(v, np.ndarray): return v.tobytes(order='A') - elif PY2 and isinstance(v, array.array): # pragma: py3 no cover - return v.tostring() + elif PY2: # pragma: py3 no cover + return memoryview(buffer(v)).tobytes() else: return memoryview(v).tobytes() From 62a5df07e4fabd6813463c2b9c29aa66b895a438 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Thu, 8 Nov 2018 11:44:50 -0500 Subject: [PATCH 2/7] Consolidate code between Python 2/3 Once we have an old-style buffer object in Python 2, use the same `memoryview` handling code for both Python 2 and Python 3. --- numcodecs/compat.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/numcodecs/compat.py b/numcodecs/compat.py index 9464aa14..668f6f8c 100644 --- a/numcodecs/compat.py +++ b/numcodecs/compat.py @@ -34,9 +34,9 @@ def buffer_tobytes(v): elif isinstance(v, np.ndarray): return v.tobytes(order='A') elif PY2: # pragma: py3 no cover - return memoryview(buffer(v)).tobytes() - else: - return memoryview(v).tobytes() + v = buffer(v) + + return memoryview(v).tobytes() def buffer_copy(buf, out=None): From 441c527b614a592c24891e13f540951eab2d6817 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Thu, 8 Nov 2018 11:44:51 -0500 Subject: [PATCH 3/7] Smooth over Python 2/3 memoryview coercion In Python 2, everything can be comfortably coerced to the old-style buffer interface, which is easily coerced to the new buffer interface. This is a nice path to proceed down as it can cleanly get a `memoryview` on Python 2 without copying. This strategy works well on Python 3 as well as long as we make `buffer` a no-op. --- numcodecs/compat.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/numcodecs/compat.py b/numcodecs/compat.py index 668f6f8c..07dfb33b 100644 --- a/numcodecs/compat.py +++ b/numcodecs/compat.py @@ -18,6 +18,7 @@ binary_type = str integer_types = (int, long) reduce = reduce + buffer = buffer else: # pragma: py2 no cover @@ -25,6 +26,7 @@ binary_type = bytes integer_types = int, from functools import reduce + buffer = lambda a: a def buffer_tobytes(v): @@ -33,10 +35,8 @@ def buffer_tobytes(v): return v elif isinstance(v, np.ndarray): return v.tobytes(order='A') - elif PY2: # pragma: py3 no cover - v = buffer(v) - - return memoryview(v).tobytes() + else: + return memoryview(buffer(v)).tobytes() def buffer_copy(buf, out=None): From f20ddbca14e7535b1ac19082c188566b1d9175e5 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Thu, 8 Nov 2018 17:34:49 -0500 Subject: [PATCH 4/7] Revert "Smooth over Python 2/3 memoryview coercion" This reverts commit 441c527b614a592c24891e13f540951eab2d6817. --- numcodecs/compat.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/numcodecs/compat.py b/numcodecs/compat.py index 07dfb33b..668f6f8c 100644 --- a/numcodecs/compat.py +++ b/numcodecs/compat.py @@ -18,7 +18,6 @@ binary_type = str integer_types = (int, long) reduce = reduce - buffer = buffer else: # pragma: py2 no cover @@ -26,7 +25,6 @@ binary_type = bytes integer_types = int, from functools import reduce - buffer = lambda a: a def buffer_tobytes(v): @@ -35,8 +33,10 @@ def buffer_tobytes(v): return v elif isinstance(v, np.ndarray): return v.tobytes(order='A') - else: - return memoryview(buffer(v)).tobytes() + elif PY2: # pragma: py3 no cover + v = buffer(v) + + return memoryview(v).tobytes() def buffer_copy(buf, out=None): From eb254540c66a7959342da9d152c1039515a8fb26 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Thu, 8 Nov 2018 22:08:33 -0500 Subject: [PATCH 5/7] Coerce data to old-style buffers on Python 2 In the GZip and Zlib encoders, coerce data to old-style buffers on Python 2 as both GZip and Zlib can handle these inputs. By doing this, we are able to avoid inducing a copy as would have been the case when converting the data to `bytes`. We also are able to handle a wide variety of inputs include `bytearray`s and `array`s. --- numcodecs/gzip.py | 10 +++++----- numcodecs/zlib.py | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/numcodecs/gzip.py b/numcodecs/gzip.py index 802d9fcd..babc1b3b 100644 --- a/numcodecs/gzip.py +++ b/numcodecs/gzip.py @@ -8,7 +8,7 @@ from .abc import Codec -from .compat import buffer_copy, handle_datetime, buffer_tobytes, PY2 +from .compat import buffer_copy, handle_datetime, PY2 class GZip(Codec): @@ -42,8 +42,8 @@ def encode(self, buf): buf = buf.tobytes(order='A') if PY2: # pragma: py3 no cover - # ensure bytes, PY2 cannot handle things like bytearray - buf = buffer_tobytes(buf) + # ensure buffer, PY2 cannot handle things like bytearray + buf = buffer(buf) # do compression compressed = io.BytesIO() @@ -59,8 +59,8 @@ def encode(self, buf): def decode(self, buf, out=None): if PY2: # pragma: py3 no cover - # ensure bytes, PY2 cannot handle things like bytearray - buf = buffer_tobytes(buf) + # ensure buffer, PY2 cannot handle things like bytearray + buf = buffer(buf) # do decompression buf = io.BytesIO(buf) diff --git a/numcodecs/zlib.py b/numcodecs/zlib.py index 0ab68d98..4c2c999b 100644 --- a/numcodecs/zlib.py +++ b/numcodecs/zlib.py @@ -7,7 +7,7 @@ from .abc import Codec -from .compat import buffer_copy, handle_datetime, buffer_tobytes, PY2 +from .compat import buffer_copy, handle_datetime, PY2 class Zlib(Codec): @@ -41,8 +41,8 @@ def encode(self, buf): buf = buf.tobytes(order='A') if PY2: # pragma: py3 no cover - # ensure bytes, PY2 cannot handle things like bytearray - buf = buffer_tobytes(buf) + # ensure buffer, PY2 cannot handle things like bytearray + buf = buffer(buf) # do compression return _zlib.compress(buf, self.level) @@ -51,8 +51,8 @@ def encode(self, buf): def decode(self, buf, out=None): if PY2: # pragma: py3 no cover - # ensure bytes, PY2 cannot handle things like bytearray - buf = buffer_tobytes(buf) + # ensure buffer, PY2 cannot handle things like bytearray + buf = buffer(buf) # do decompression dec = _zlib.decompress(buf) From eb1b32430bc3bd1951a60225a83c415974de9be1 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Thu, 8 Nov 2018 23:22:11 -0500 Subject: [PATCH 6/7] Make flake8 happy by "defining" `buffer` Basically just define `buffer` in the Python 2 world as itself so that `flake8` thinks it is defined and doesn't raise false positives. --- numcodecs/compat.py | 1 + numcodecs/gzip.py | 2 +- numcodecs/zlib.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/numcodecs/compat.py b/numcodecs/compat.py index 668f6f8c..6a4149d4 100644 --- a/numcodecs/compat.py +++ b/numcodecs/compat.py @@ -14,6 +14,7 @@ if PY2: # pragma: py3 no cover + buffer = buffer text_type = unicode binary_type = str integer_types = (int, long) diff --git a/numcodecs/gzip.py b/numcodecs/gzip.py index babc1b3b..c6bb1af6 100644 --- a/numcodecs/gzip.py +++ b/numcodecs/gzip.py @@ -8,7 +8,7 @@ from .abc import Codec -from .compat import buffer_copy, handle_datetime, PY2 +from .compat import buffer, buffer_copy, handle_datetime, PY2 class GZip(Codec): diff --git a/numcodecs/zlib.py b/numcodecs/zlib.py index 4c2c999b..dfa29f02 100644 --- a/numcodecs/zlib.py +++ b/numcodecs/zlib.py @@ -7,7 +7,7 @@ from .abc import Codec -from .compat import buffer_copy, handle_datetime, PY2 +from .compat import buffer, buffer_copy, handle_datetime, PY2 class Zlib(Codec): From a69c5c29a8de42cf3e4581cfeb675c20fc76b056 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Thu, 8 Nov 2018 23:22:35 -0500 Subject: [PATCH 7/7] Alias `buffer` as `memoryview` on Python 3 This is sort of a lie. That said, they are pretty close to one another and most compat documentation suggests to do this to migrate Python 2 code to Python 3. We only need this so importing `buffer` to appease `flake8` doesn't become even more of a mess, which seems a bit odd. --- numcodecs/compat.py | 1 + 1 file changed, 1 insertion(+) diff --git a/numcodecs/compat.py b/numcodecs/compat.py index 6a4149d4..82528a7a 100644 --- a/numcodecs/compat.py +++ b/numcodecs/compat.py @@ -22,6 +22,7 @@ else: # pragma: py2 no cover + buffer = memoryview text_type = str binary_type = bytes integer_types = int,