Skip to content

[DRAFT] V3 spec implmentation. #568

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ before_script:

install:
- pip install -U pip setuptools wheel tox-travis coveralls mypy
- pip install trio pytest-trio pytest-asyncio
- |
if [[ "$TRAVIS_PYTHON_VERSION" == "3.7" ]] || [[ "$TRAVIS_PYTHON_VERSION" == "3.8" ]]; then
pip install -U pip redio
fi

script:
- tox
Expand Down
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ Contents
spec
release
contributing
v3

Projects using Zarr
-------------------
Expand Down
28 changes: 28 additions & 0 deletions docs/v3.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
Zarr Spec V3
============

See `zarr v3 specification <https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/>`__

Using current development branch, you can import new Store an utilities from ``zarr.v3``


V3 stores
---------

- SyncV3RedisStore
- SyncV3MemoryStore
- SyncV3DirectoryStore

Those 3 stores can be use to directly talk to a v3 archive using the v3 api.

``V2from3Adapter`` Can be used to wrap a v3 store instance to expose a v2 API, for libraries that might directly manipulate a v2 store::

zarr.open(V2from3Adapter(SyncV3DirectoryStore('v3.zarr'))


``StoreComparer`` can be use to wrap two stores and check that all operation on the resulting object give identical results::

mystore = StoreComparer(MemoryStore(), V2from3Adapter(SyncV3MemoryStore()))
mystore['group']

The first store is assumed to be reference store and the second the tested store.
2 changes: 2 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
[pytest]
doctest_optionflags = NORMALIZE_WHITESPACE ELLIPSIS IGNORE_EXCEPTION_DETAIL
addopts = --durations=10
trio_mode = true
filterwarnings =
ignore:DictStore has been renamed to MemoryStore and will be removed in.*:DeprecationWarning
error::DeprecationWarning:zarr.*
ignore:PY_SSIZE_T_CLEAN will be required.*:DeprecationWarning
8 changes: 7 additions & 1 deletion requirements_dev_optional.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ pytest-cov==2.7.1
pytest-doctestplus==0.4.0
pytest-remotedata==0.3.2
h5py==2.10.0
s3fs==0.5.0; python_version > '3.6'
moto>=1.3.14; python_version > '3.6'
flask
s3fs==0.5.0; python_version > '3.6'
# all async features in v3
pytest-trio ; python_version >= '3.6'
trio ; python_version >= '3.6'
redio ; python_version >= '3.7' and sys_platform != 'win32'
xarray ; python_version >= '3.8'
netCDF4 ; python_version >= '3.8'
3 changes: 2 additions & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ commands =
# run doctests in the tutorial and spec
py38: python -m doctest -o NORMALIZE_WHITESPACE -o ELLIPSIS docs/tutorial.rst docs/spec/v2.rst
# pep8 checks
py38: flake8 zarr
# temporarily disable that.
# py38: flake8 zarr
# print environment for debugging
pip freeze
deps =
Expand Down
18 changes: 16 additions & 2 deletions zarr/attrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from collections.abc import MutableMapping

from zarr.meta import parse_metadata
from zarr.util import json_dumps
from zarr.util import json_dumps, json_loads


class Attributes(MutableMapping):
Expand All @@ -27,6 +27,14 @@ class Attributes(MutableMapping):

def __init__(self, store, key='.zattrs', read_only=False, cache=True,
synchronizer=None):

assert not key.endswith("root/.group")
self._version = getattr(store, "_store_version", 2)
assert key

if self._version == 3 and ".z" in key:
raise ValueError("nop, this is v3")

self.store = store
self.key = key
self.read_only = read_only
Expand All @@ -40,7 +48,12 @@ def _get_nosync(self):
except KeyError:
d = dict()
else:
d = parse_metadata(data)
if self._version == 3:
assert isinstance(data, bytes)
d = json_loads(data)["attributes"]
else:
d = parse_metadata(data)
assert isinstance(d, dict)
return d

def asdict(self):
Expand Down Expand Up @@ -110,6 +123,7 @@ def put(self, d):
self._write_op(self._put_nosync, d)

def _put_nosync(self, d):
assert self._version != 3, "attributes are stored on group/arrays in v3."
self.store[self.key] = json_dumps(d)
if self.cache:
self._cached_asdict = d
Expand Down
71 changes: 54 additions & 17 deletions zarr/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,26 @@
from zarr.attrs import Attributes
from zarr.codecs import AsType, get_codec
from zarr.errors import ArrayNotFoundError, ReadOnlyError
from zarr.indexing import (BasicIndexer, CoordinateIndexer, MaskIndexer,
OIndex, OrthogonalIndexer, VIndex, check_fields,
check_no_multi_fields, ensure_tuple,
err_too_many_indices, is_contiguous_selection,
is_scalar, pop_fields)
from zarr.meta import decode_array_metadata, encode_array_metadata
from zarr.indexing import (
BasicIndexer,
CoordinateIndexer,
MaskIndexer,
OIndex,
OrthogonalIndexer,
VIndex,
check_fields,
check_no_multi_fields,
ensure_tuple,
err_too_many_indices,
is_contiguous_selection,
is_scalar,
pop_fields,
)
from zarr.meta import (
decode_array_metadata,
encode_array_metadata,
decode_array_metadata_v3,
)
from zarr.storage import array_meta_key, attrs_key, getsize, listdir
from zarr.util import (InfoReporter, check_array_shape, human_readable_size,
is_total_slice, nolock, normalize_chunks,
Expand Down Expand Up @@ -111,6 +125,7 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None,
self._store = store
self._chunk_store = chunk_store
self._path = normalize_storage_path(path)
self._version = getattr(store, "_store_version", 2)
if self._path:
self._key_prefix = self._path + '/'
else:
Expand All @@ -124,7 +139,14 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None,
self._load_metadata()

# initialize attributes
akey = self._key_prefix + attrs_key
if self._version == 2:
akey = self._key_prefix + attrs_key
else:
if self._key_prefix:
mkey = "meta/root/" + self._key_prefix + ".array"
else:
mkey = "meta/root.array"
akey = mkey
self._attrs = Attributes(store, key=akey, read_only=read_only,
synchronizer=synchronizer, cache=cache_attrs)

Expand All @@ -146,20 +168,32 @@ def _load_metadata(self):

def _load_metadata_nosync(self):
try:
mkey = self._key_prefix + array_meta_key
if self._version == 2:
mkey = self._key_prefix + array_meta_key
elif self._version == 3:
mkey = "meta/root/" + self._key_prefix + ".array"
meta_bytes = self._store[mkey]
except KeyError:
raise ArrayNotFoundError(self._path)
else:

# decode and store metadata as instance members
meta = decode_array_metadata(meta_bytes)
self._meta = meta
self._shape = meta['shape']
self._chunks = meta['chunks']
self._dtype = meta['dtype']
self._fill_value = meta['fill_value']
self._order = meta['order']
if self._version == 2:
meta = decode_array_metadata(meta_bytes)
self._meta = meta
self._shape = meta["shape"]
self._dtype = meta["dtype"]
self._chunks = meta["chunks"]
self._fill_value = meta["fill_value"]
self._order = meta["order"]
elif self._version == 3:
meta = decode_array_metadata_v3(meta_bytes)
self._meta = meta
self._shape = meta["shape"]
self._chunks = meta["chunk_grid"]
self._dtype = meta["data_type"]
self._fill_value = meta["fill_value"]
self._order = meta["chunk_memory_layout"]

# setup compressor
config = meta['compressor']
Expand All @@ -169,7 +203,7 @@ def _load_metadata_nosync(self):
self._compressor = get_codec(config)

# setup filters
filters = meta['filters']
filters = meta.get("filters", [])
if filters:
filters = [get_codec(config) for config in filters]
self._filters = filters
Expand Down Expand Up @@ -1583,7 +1617,10 @@ def _chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection,

try:
# obtain compressed data for chunk
cdata = self.chunk_store[ckey]
if self._version == 2:
cdata = self.chunk_store[ckey]
elif self._version == 3:
cdata = self.chunk_store["data/root/" + ckey]

except KeyError:
# chunk not initialized
Expand Down
Loading