Skip to content

Bump Numcodecs requirement to 0.6.2 #352

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Dec 4, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ Enhancements
Maintenance
~~~~~~~~~~~

* The required version of the `numcodecs <http://numcodecs.rtfd.io>`_ package has been upgraded
to 0.6.2, which has enabled some code simplification and fixes a failing test involving
msgpack encoding. By :user:`John Kirkham <jakirkham>`, :issue:`352`, :issue:`355`,
:issue:`324`.

* CI and test environments have been upgraded to include Python 3.7, drop Python 3.4, and
upgrade all pinned package requirements. :issue:`308`.

Expand Down
4 changes: 2 additions & 2 deletions docs/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,8 @@ print some diagnostics, e.g.::
: blocksize=0)
Store type : builtins.dict
No. bytes : 400000000 (381.5M)
No. bytes stored : 3242241 (3.1M)
Storage ratio : 123.4
No. bytes stored : 3379344 (3.2M)
Storage ratio : 118.4
Chunks initialized : 100/100

If you don't specify a compressor, by default Zarr uses the Blosc
Expand Down
2 changes: 1 addition & 1 deletion requirements_dev.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
asciitree==0.3.3
fasteners==0.14.1
numcodecs==0.5.5
numcodecs==0.6.2
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
'asciitree',
'numpy>=1.7',
'fasteners',
'numcodecs>=0.5.3',
'numcodecs>=0.6.2',
],
package_dir={'': '.'},
packages=['zarr', 'zarr.tests'],
Expand Down
27 changes: 16 additions & 11 deletions zarr/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@


import numpy as np
from numcodecs.compat import ensure_ndarray


from zarr.util import (is_total_slice, human_readable_size, normalize_resize_args,
Expand Down Expand Up @@ -1743,18 +1744,22 @@ def _decode_chunk(self, cdata):
for f in self._filters[::-1]:
chunk = f.decode(chunk)

# view as correct dtype
if self._dtype == object:
if isinstance(chunk, np.ndarray):
chunk = chunk.astype(self._dtype)
else:
raise RuntimeError('cannot read object array without object codec')
elif isinstance(chunk, np.ndarray):
# view as numpy array with correct dtype
chunk = ensure_ndarray(chunk)
# special case object dtype, because incorrect handling can lead to
# segfaults and other bad things happening
if self._dtype != object:
chunk = chunk.view(self._dtype)
else:
chunk = np.frombuffer(chunk, dtype=self._dtype)

# reshape
elif chunk.dtype != object:
# If we end up here, someone must have hacked around with the filters.
# We cannot deal with object arrays unless there is an object
# codec in the filter chain, i.e., a filter that converts from object
# array to something else during encoding, and converts back to object
# array during decoding.
raise RuntimeError('cannot read object array without object codec')

# ensure correct chunk shape
chunk = chunk.reshape(-1, order='A')
chunk = chunk.reshape(self._chunks, order=self._order)

return chunk
Expand Down
14 changes: 5 additions & 9 deletions zarr/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,20 @@


import numpy as np
from numcodecs.compat import ensure_bytes


from zarr.compat import PY2, binary_type, Mapping
from zarr.compat import PY2, Mapping
from zarr.errors import MetadataError


ZARR_FORMAT = 2


def ensure_str(s):
if PY2: # pragma: py3 no cover
# noinspection PyUnresolvedReferences
if isinstance(s, buffer): # noqa
s = str(s)
else: # pragma: py2 no cover
if isinstance(s, memoryview):
s = s.tobytes()
if isinstance(s, binary_type):
if not isinstance(s, str):
s = ensure_bytes(s)
if not PY2: # pragma: py2 no cover
s = s.decode('ascii')
return s

Expand Down
30 changes: 5 additions & 25 deletions zarr/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,13 @@
import warnings


import numpy as np


from zarr.util import (normalize_shape, normalize_chunks, normalize_order,
normalize_storage_path, buffer_size,
normalize_fill_value, nolock, normalize_dtype)
from zarr.meta import encode_array_metadata, encode_group_metadata
from zarr.compat import PY2, binary_type, OrderedDict_move_to_end
from zarr.compat import PY2, OrderedDict_move_to_end
from numcodecs.registry import codec_registry
from numcodecs.compat import ensure_bytes, ensure_contiguous_ndarray
from zarr.errors import (err_contains_group, err_contains_array, err_bad_compressor,
err_fspath_exists_notdir, err_read_only, MetadataError)

Expand Down Expand Up @@ -444,23 +442,6 @@ def _init_group_metadata(store, overwrite=False, path=None, chunk_store=None):
store[key] = encode_group_metadata(meta)


def ensure_bytes(s):
if isinstance(s, binary_type):
return s
if isinstance(s, np.ndarray):
if PY2: # pragma: py3 no cover
# noinspection PyArgumentList
return s.tostring(order='A')
else: # pragma: py2 no cover
# noinspection PyArgumentList
return s.tobytes(order='A')
if hasattr(s, 'tobytes'):
return s.tobytes()
if PY2 and hasattr(s, 'tostring'): # pragma: py3 no cover
return s.tostring()
return memoryview(s).tobytes()


def _dict_store_keys(d, prefix='', cls=dict):
for k in d.keys():
v = d[k]
Expand Down Expand Up @@ -741,9 +722,8 @@ def __getitem__(self, key):

def __setitem__(self, key, value):

# handle F-contiguous numpy arrays
if isinstance(value, np.ndarray) and value.flags.f_contiguous:
value = ensure_bytes(value)
# coerce to flat, contiguous array (ideally without copying)
value = ensure_contiguous_ndarray(value)

# destination path for key
file_path = os.path.join(self.path, key)
Expand Down Expand Up @@ -1192,7 +1172,7 @@ def __getitem__(self, key):
def __setitem__(self, key, value):
if self.mode == 'r':
err_read_only()
value = ensure_bytes(value)
value = ensure_contiguous_ndarray(value)
with self.mutex:
self.zf.writestr(key, value)

Expand Down
2 changes: 1 addition & 1 deletion zarr/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -982,7 +982,7 @@ def test_object_arrays(self):
z[0] = 'foo'
assert z[0] == 'foo'
z[1] = b'bar'
assert z[1] == 'bar' # msgpack gets this wrong
assert z[1] == b'bar'
z[2] = 1
assert z[2] == 1
z[3] = [2, 4, 6, 'baz']
Expand Down
16 changes: 3 additions & 13 deletions zarr/util.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, division
import operator
from textwrap import TextWrapper, dedent
import numbers
import uuid
Expand All @@ -10,10 +9,11 @@
from asciitree import BoxStyle, LeftAligned
from asciitree.traversal import Traversal
import numpy as np
from numcodecs.compat import ensure_ndarray
from numcodecs.registry import codec_registry


from zarr.compat import PY2, reduce, text_type, binary_type
from zarr.compat import PY2, text_type, binary_type


# codecs to use for object dtype convenience API
Expand Down Expand Up @@ -314,17 +314,7 @@ def normalize_storage_path(path):


def buffer_size(v):
from array import array as _stdlib_array
if PY2 and isinstance(v, _stdlib_array): # pragma: py3 no cover
# special case array.array because does not support buffer
# interface in PY2
return v.buffer_info()[1] * v.itemsize
else: # pragma: py2 no cover
v = memoryview(v)
if v.shape:
return reduce(operator.mul, v.shape) * v.itemsize
else:
return v.itemsize
return ensure_ndarray(v).nbytes


def info_text_report(items):
Expand Down