From a61828fb34f00145fb200ca3dcc1e20d14518e5d Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Mon, 28 Sep 2020 11:40:47 -0700 Subject: [PATCH 1/2] [DRAFT] V3 spec implmentation. This is mostly opened to foster discussion and need code from https://github.com/Carreau/zarr-spec-v3-impl. IN the above mentioned repository I'm working on looking at what an implementation of the spec v3 could look to inform us on the possible transition and compatiblity shims. So far I have a rough implementation of an in memory v3 store as well as multiple utilities. 1) A base class that automatically provide sync version of all async method of a class. I'm playing with the idea of having most method async as this may be useful in some context. For example when creating a array at /a/b/c/d/e/f/g/h/i/j/k you want to check that None of the parents are arrays, which can be done with N async requests. 2) An adapted class that wraps a v3 store and provide a v2 API. My though is that most code is currently v2 compatible and it would be useful for legacy codebase and early testing of store. 3) a class that wrap 2 stores, a reference and a tested one, replicate operation on both stores, and abort if it sees any difference in behavior. This could help to catch changes in behavior. The tests in this PR start to test the v3 memorystore and compare it to the v2 memorystore. --- .travis.yml | 5 + docs/index.rst | 1 + docs/v3.rst | 28 ++ pytest.ini | 2 + requirements_dev_optional.txt | 8 +- tox.ini | 3 +- zarr/attrs.py | 18 +- zarr/core.py | 73 +++- zarr/hierarchy.py | 102 ++++-- zarr/meta.py | 94 ++++- zarr/storage.py | 69 +++- zarr/tests/test_hierarchy.py | 86 +++++ zarr/tests/test_storage.py | 19 + zarr/tests/test_xarray.py | 41 +++ zarr/v3/__init__.py | 642 ++++++++++++++++++++++++++++++++++ zarr/v3/comparer.py | 97 +++++ zarr/v3/protocol.py | 0 zarr/v3/storage.py | 68 ++++ zarr/v3/test_protocol.py | 87 +++++ zarr/v3/utils.py | 71 ++++ 20 files changed, 1455 insertions(+), 59 deletions(-) create mode 100644 docs/v3.rst create mode 100644 zarr/tests/test_xarray.py create mode 100644 zarr/v3/__init__.py create mode 100644 zarr/v3/comparer.py create mode 100644 zarr/v3/protocol.py create mode 100644 zarr/v3/storage.py create mode 100644 zarr/v3/test_protocol.py create mode 100644 zarr/v3/utils.py diff --git a/.travis.yml b/.travis.yml index d4c1d8495a..503a38c19a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -40,6 +40,11 @@ before_script: install: - pip install -U pip setuptools wheel tox-travis coveralls mypy + - pip install trio pytest-trio pytest-asyncio + - | + if [[ "$TRAVIS_PYTHON_VERSION" == "3.7" ]] || [[ "$TRAVIS_PYTHON_VERSION" == "3.8" ]]; then + pip install -U pip redio + fi script: - tox diff --git a/docs/index.rst b/docs/index.rst index d75c159fd1..856b327d31 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -68,6 +68,7 @@ Contents spec release contributing + v3 Projects using Zarr ------------------- diff --git a/docs/v3.rst b/docs/v3.rst new file mode 100644 index 0000000000..d7d8f08b50 --- /dev/null +++ b/docs/v3.rst @@ -0,0 +1,28 @@ +Zarr Spec V3 +============ + +See `zarr v3 specification `__ + +Using current development branch, you can import new Store an utilities from ``zarr.v3`` + + +V3 stores +--------- + +- SyncV3RedisStore +- SyncV3MemoryStore +- SyncV3DirectoryStore + +Those 3 stores can be use to directly talk to a v3 archive using the v3 api. + +``V2from3Adapter`` Can be used to wrap a v3 store instance to expose a v2 API, for libraries that might directly manipulate a v2 store:: + + zarr.open(V2from3Adapter(SyncV3DirectoryStore('v3.zarr')) + + +``StoreComparer`` can be use to wrap two stores and check that all operation on the resulting object give identical results:: + + mystore = StoreComparer(MemoryStore(), V2from3Adapter(SyncV3MemoryStore())) + mystore['group'] + +The first store is assumed to be reference store and the second the tested store. diff --git a/pytest.ini b/pytest.ini index 61a0a99ab5..5f419bd8f3 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,6 +1,8 @@ [pytest] doctest_optionflags = NORMALIZE_WHITESPACE ELLIPSIS IGNORE_EXCEPTION_DETAIL addopts = --durations=10 +trio_mode = true filterwarnings = + ignore:DictStore has been renamed to MemoryStore and will be removed in.*:DeprecationWarning error::DeprecationWarning:zarr.* ignore:PY_SSIZE_T_CLEAN will be required.*:DeprecationWarning diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index a5cc0e23bd..5e61067074 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -18,6 +18,12 @@ pytest-cov==2.7.1 pytest-doctestplus==0.4.0 pytest-remotedata==0.3.2 h5py==2.10.0 -s3fs==0.5.0; python_version > '3.6' moto>=1.3.14; python_version > '3.6' flask +s3fs==0.5.0; python_version > '3.6' +# all async features in v3 +pytest-trio ; python_version >= '3.6' +trio ; python_version >= '3.6' +redio ; python_version >= '3.7' and sys_platform != 'win32' +xarray ; python_version >= '3.8' +netCDF4 ; python_version >= '3.8' diff --git a/tox.ini b/tox.ini index 91cc3aa777..4a11a36da1 100644 --- a/tox.ini +++ b/tox.ini @@ -28,7 +28,8 @@ commands = # run doctests in the tutorial and spec py38: python -m doctest -o NORMALIZE_WHITESPACE -o ELLIPSIS docs/tutorial.rst docs/spec/v2.rst # pep8 checks - py38: flake8 zarr + # temporarily disable that. + # py38: flake8 zarr # print environment for debugging pip freeze deps = diff --git a/zarr/attrs.py b/zarr/attrs.py index 6c02940c4d..7243191d5e 100644 --- a/zarr/attrs.py +++ b/zarr/attrs.py @@ -2,7 +2,7 @@ from collections.abc import MutableMapping from zarr.meta import parse_metadata -from zarr.util import json_dumps +from zarr.util import json_dumps, json_loads class Attributes(MutableMapping): @@ -27,6 +27,14 @@ class Attributes(MutableMapping): def __init__(self, store, key='.zattrs', read_only=False, cache=True, synchronizer=None): + + assert not key.endswith("root/.group") + self._version = getattr(store, "_store_version", 2) + assert key + + if self._version == 3 and ".z" in key: + raise ValueError("nop, this is v3") + self.store = store self.key = key self.read_only = read_only @@ -40,7 +48,12 @@ def _get_nosync(self): except KeyError: d = dict() else: - d = parse_metadata(data) + if self._version == 3: + assert isinstance(data, bytes) + d = json_loads(data)["attributes"] + else: + d = parse_metadata(data) + assert isinstance(d, dict) return d def asdict(self): @@ -110,6 +123,7 @@ def put(self, d): self._write_op(self._put_nosync, d) def _put_nosync(self, d): + assert self._version != 3, "attributes are stored on group/arrays in v3." self.store[self.key] = json_dumps(d) if self.cache: self._cached_asdict = d diff --git a/zarr/core.py b/zarr/core.py index 56ef3e547a..56818404c1 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -13,13 +13,27 @@ from zarr.attrs import Attributes from zarr.codecs import AsType, get_codec from zarr.errors import ArrayNotFoundError, ReadOnlyError -from zarr.indexing import (BasicIndexer, CoordinateIndexer, MaskIndexer, - OIndex, OrthogonalIndexer, VIndex, check_fields, - check_no_multi_fields, ensure_tuple, - err_too_many_indices, is_contiguous_selection, - is_scalar, pop_fields) -from zarr.meta import decode_array_metadata, encode_array_metadata -from zarr.storage import array_meta_key, attrs_key, getsize, listdir +from zarr.indexing import ( + BasicIndexer, + CoordinateIndexer, + MaskIndexer, + OIndex, + OrthogonalIndexer, + VIndex, + check_fields, + check_no_multi_fields, + ensure_tuple, + err_too_many_indices, + is_contiguous_selection, + is_scalar, + pop_fields, +) +from zarr.meta import ( + decode_array_metadata, + encode_array_metadata, + decode_array_metadata_v3, +) +from iarr.storage import array_meta_key, attrs_key, getsize, listdir from zarr.util import (InfoReporter, check_array_shape, human_readable_size, is_total_slice, nolock, normalize_chunks, normalize_resize_args, normalize_shape, @@ -111,6 +125,7 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, self._store = store self._chunk_store = chunk_store self._path = normalize_storage_path(path) + self._version = getattr(store, "_store_version", 2) if self._path: self._key_prefix = self._path + '/' else: @@ -124,7 +139,14 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, self._load_metadata() # initialize attributes - akey = self._key_prefix + attrs_key + if self._version == 2: + akey = self._key_prefix + attrs_key + else: + if self._key_prefix: + mkey = "meta/root/" + self._key_prefix + ".array" + else: + mkey = "meta/root.array" + akey = mkey self._attrs = Attributes(store, key=akey, read_only=read_only, synchronizer=synchronizer, cache=cache_attrs) @@ -146,20 +168,32 @@ def _load_metadata(self): def _load_metadata_nosync(self): try: - mkey = self._key_prefix + array_meta_key + if self._version == 2: + mkey = self._key_prefix + array_meta_key + elif self._version == 3: + mkey = "meta/root/" + self._key_prefix + ".array" meta_bytes = self._store[mkey] except KeyError: raise ArrayNotFoundError(self._path) else: # decode and store metadata as instance members - meta = decode_array_metadata(meta_bytes) - self._meta = meta - self._shape = meta['shape'] - self._chunks = meta['chunks'] - self._dtype = meta['dtype'] - self._fill_value = meta['fill_value'] - self._order = meta['order'] + if self._version == 2: + meta = decode_array_metadata(meta_bytes) + self._meta = meta + self._shape = meta["shape"] + self._dtype = meta["dtype"] + self._chunks = meta["chunks"] + self._fill_value = meta["fill_value"] + self._order = meta["order"] + elif self._version == 3: + meta = decode_array_metadata_v3(meta_bytes) + self._meta = meta + self._shape = meta["shape"] + self._chunks = meta["chunk_grid"] + self._dtype = meta["data_type"] + self._fill_value = meta["fill_value"] + self._order = meta["chunk_memory_layout"] # setup compressor config = meta['compressor'] @@ -169,7 +203,7 @@ def _load_metadata_nosync(self): self._compressor = get_codec(config) # setup filters - filters = meta['filters'] + filters = meta.get("filters", []) if filters: filters = [get_codec(config) for config in filters] self._filters = filters @@ -1583,7 +1617,10 @@ def _chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection, try: # obtain compressed data for chunk - cdata = self.chunk_store[ckey] + if self._version == 2: + cdata = self.chunk_store[ckey] + elif self._version == 3: + cdata = self.chunk_store["data/root/" + ckey] except KeyError: # chunk not initialized diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 372778e20f..3026637b4c 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -6,21 +6,45 @@ from zarr.attrs import Attributes from zarr.core import Array -from zarr.creation import (array, create, empty, empty_like, full, full_like, - normalize_store_arg, ones, ones_like, zeros, - zeros_like) +from zarr.creation import ( + array, + create, + empty, + empty_like, + full, + full_like, + normalize_store_arg, + ones, + ones_like, + zeros, + zeros_like, +) +from zarr.util import ( + InfoReporter, + TreeViewer, + is_valid_python_name, + nolock, + normalize_shape, + normalize_storage_path, +) from zarr.errors import ( ContainsArrayError, ContainsGroupError, GroupNotFoundError, ReadOnlyError, ) -from zarr.meta import decode_group_metadata -from zarr.storage import (MemoryStore, attrs_key, contains_array, - contains_group, group_meta_key, init_group, listdir, - rename, rmdir) -from zarr.util import (InfoReporter, TreeViewer, is_valid_python_name, nolock, - normalize_shape, normalize_storage_path) +from zarr.meta import decode_group_metadata, decode_group_metadata_v3 +from zarr.storage import ( + MemoryStore, + attrs_key, + contains_array, + contains_group, + group_meta_key, + init_group, + listdir, + rename, + rmdir, +) class Group(MutableMapping): @@ -97,6 +121,8 @@ class Group(MutableMapping): def __init__(self, store, path=None, read_only=False, chunk_store=None, cache_attrs=True, synchronizer=None): + if path: + assert not path.startswith(("meta/", "data/")) self._store = store self._chunk_store = chunk_store self._path = normalize_storage_path(path) @@ -111,18 +137,35 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, if contains_array(store, path=self._path): raise ContainsArrayError(path) + self._version = getattr(store, "_store_version", 2) + # initialize metadata try: - mkey = self._key_prefix + group_meta_key - meta_bytes = store[mkey] + if self._version == 3: + assert not self._key_prefix.startswith(("meta/", "data/")) + if self._key_prefix: + mkey = "meta/root/" + self._key_prefix + ".group" + else: + mkey = "meta/root.group" + assert not mkey.endswith("root/.group") + meta_bytes = store.get(mkey) + else: + mkey = self._key_prefix + group_meta_key + meta_bytes = store[mkey] + except KeyError: raise GroupNotFoundError(path) else: - meta = decode_group_metadata(meta_bytes) - self._meta = meta + if self._version == 3: + self._meta = decode_group_metadata_v3(meta_bytes) + elif self._version == 2: + self._meta = decode_group_metadata(meta_bytes) # setup attributes - akey = self._key_prefix + attrs_key + if self._version == 2: + akey = self._key_prefix + attrs_key + else: + akey = mkey self._attrs = Attributes(store, key=akey, read_only=read_only, cache=cache_attrs, synchronizer=synchronizer) @@ -337,7 +380,9 @@ def __getitem__(self, item): """ + assert not item.startswith("meta/") path = self._item_path(item) + assert not path.startswith("meta/") if contains_array(self._store, path): return Array(self._store, read_only=self._read_only, path=path, chunk_store=self._chunk_store, @@ -419,11 +464,27 @@ def groups(self): """ for key in sorted(listdir(self._store, self._path)): path = self._key_prefix + key - if contains_group(self._store, path): - yield key, Group(self._store, path=path, read_only=self._read_only, - chunk_store=self._chunk_store, - cache_attrs=self.attrs.cache, - synchronizer=self._synchronizer) + if getattr(self._store, "_store_version", None) == 3: + if path.endswith("/"): + if contains_group(self._store, path): + yield key, Group( + self._store, + path=path[9:-1], + read_only=self._read_only, + chunk_store=self._chunk_store, + cache_attrs=self.attrs.cache, + synchronizer=self._synchronizer, + ) + else: + if contains_group(self._store, path): + yield key, Group( + self._store, + path=path, + read_only=self._read_only, + chunk_store=self._chunk_store, + cache_attrs=self.attrs.cache, + synchronizer=self._synchronizer, + ) def array_keys(self, recurse=False): """Return an iterator over member names for arrays only. @@ -482,8 +543,9 @@ def arrays(self, recurse=False): def _array_iter(self, keys_only, method, recurse): for key in sorted(listdir(self._store, self._path)): path = self._key_prefix + key + assert not path.startswith("/meta") if contains_array(self._store, path): - yield key if keys_only else (key, self[key]) + yield key.rstrip("/") if keys_only else (key.rstrip("/"), self[key]) elif recurse and contains_group(self._store, path): group = self[key] for i in getattr(group, method)(recurse=recurse): diff --git a/zarr/meta.py b/zarr/meta.py index d7bac502a4..b9135fe0ad 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -1,15 +1,39 @@ # -*- coding: utf-8 -*- import base64 +import json from collections.abc import Mapping import numpy as np from zarr.errors import MetadataError from zarr.util import json_dumps, json_loads +import zarr.util from typing import Union, Any, List, Mapping as MappingType ZARR_FORMAT = 2 +ZARR_FORMAT_v3 = "3" + +_v3_core_type = { + "bool", + "i1", + "i2", + ">i4", + ">i8", + "u1", + "f2", + ">f4", + ">f8", +} def parse_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]: @@ -18,11 +42,9 @@ def parse_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]: # or a string of JSON that we will parse here. We allow for an already-parsed # object to accommodate a consolidated metadata store, where all the metadata for # all groups and arrays will already have been parsed from JSON. - if isinstance(s, Mapping): # assume metadata has already been parsed into a mapping object meta = s - else: # assume metadata needs to be parsed as JSON meta = json_loads(s) @@ -30,6 +52,24 @@ def parse_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]: return meta +def decode_array_metadata_v3(s): + meta = parse_metadata(s) + + # check metadata format + # extract array metadata fields + dtype = decode_dtype_v3(meta["data_type"]) + fill_value = decode_fill_value(meta["fill_value"], dtype) + meta = dict( + shape=tuple(meta["shape"]), + chunk_grid=tuple(meta["chunk_grid"]["chunk_shape"]), + data_type=dtype, + compressor=meta["compressor"], + fill_value=fill_value, + chunk_memory_layout=meta["chunk_memory_layout"], + ) + return meta + + def decode_array_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]: meta = parse_metadata(s) @@ -53,12 +93,30 @@ def decode_array_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]: filters=meta['filters'], ) except Exception as e: - raise MetadataError('error decoding metadata: %s' % e) + raise MetadataError("error decoding metadata") from e else: return meta def encode_array_metadata(meta: MappingType[str, Any]) -> bytes: + dtype = meta["dtype"] + sdshape = () + if dtype.subdtype is not None: + dtype, sdshape = dtype.subdtype + meta = dict( + zarr_format=ZARR_FORMAT, + shape=meta["shape"] + sdshape, + chunks=meta["chunks"], + dtype=encode_dtype(dtype), + compressor=meta["compressor"], + fill_value=encode_fill_value(meta["fill_value"], dtype), + order=meta["order"], + filters=meta["filters"], + ) + return json_dumps(meta) + + +def encode_array_metadata_v3(meta): dtype = meta['dtype'] sdshape = () if dtype.subdtype is not None: @@ -67,15 +125,22 @@ def encode_array_metadata(meta: MappingType[str, Any]) -> bytes: zarr_format=ZARR_FORMAT, shape=meta['shape'] + sdshape, chunks=meta['chunks'], - dtype=encode_dtype(dtype), + dtype=encode_dtype_v3(dtype), compressor=meta['compressor'], fill_value=encode_fill_value(meta['fill_value'], dtype), order=meta['order'], - filters=meta['filters'], ) return json_dumps(meta) +def encode_dtype_v3(d: np.dtype) -> str: + s = encode_dtype(d) + if s == "|b1": + return "bool" + assert s in _v3_core_type + return s + + def encode_dtype(d: np.dtype) -> str: if d.fields is None: return d.str @@ -96,7 +161,16 @@ def decode_dtype(d) -> np.dtype: return np.dtype(d) +def decode_dtype_v3(d): + assert d in _v3_core_type + return np.dtype(d) + + def decode_group_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]: + return json.loads(s) + + +def decode_group_metadata(s): meta = parse_metadata(s) # check metadata format version @@ -117,11 +191,11 @@ def encode_group_metadata(meta=None) -> bytes: return json_dumps(meta) -FLOAT_FILLS = { - 'NaN': np.nan, - 'Infinity': np.PINF, - '-Infinity': np.NINF -} +def encode_group_metadata_v3(meta): + return json_dumps(meta) + + +FLOAT_FILLS = {"NaN": np.nan, "Infinity": np.PINF, "-Infinity": np.NINF} def decode_fill_value(v, dtype): diff --git a/zarr/storage.py b/zarr/storage.py index ddb19a1f7f..d952b29def 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -47,7 +47,13 @@ FSPathExistNotDir, ReadOnlyError, ) -from zarr.meta import encode_array_metadata, encode_group_metadata + +from zarr.meta import ( + encode_array_metadata, + encode_array_metadata_v3, + encode_group_metadata, + encode_group_metadata_v3, +) from zarr.util import (buffer_size, json_loads, nolock, normalize_chunks, normalize_dtype, normalize_fill_value, normalize_order, normalize_shape, normalize_storage_path) @@ -86,9 +92,17 @@ def _path_to_prefix(path: Optional[str]) -> str: def contains_array(store: MutableMapping, path: Path = None) -> bool: """Return True if the store contains an array at the given logical path.""" + if path: + assert not path.startswith("meta/") path = normalize_storage_path(path) prefix = _path_to_prefix(path) - key = prefix + array_meta_key + if getattr(store, "_store_version", 2) == 3: + if prefix: + key = "meta/root/" + prefix + ".array" + else: + key = "meta/root.array" + else: + key = prefix + array_meta_key return key in store @@ -97,6 +111,13 @@ def contains_group(store: MutableMapping, path: Path = None) -> bool: path = normalize_storage_path(path) prefix = _path_to_prefix(path) key = prefix + group_meta_key + if getattr(store, "_store_version", 2) == 3: + if prefix: + key = "meta/root/" + prefix + ".group" + else: + key = "meta/root.group" + else: + key = prefix + group_meta_key return key in store @@ -157,11 +178,29 @@ def _listdir_from_keys(store: MutableMapping, path: Optional[str] = None) -> Lis return sorted(children) +def _norm(k): + if k.endswith(".group"): + return k[:-6] + "/" + if k.endswith(".array"): + return k[:-6] + return k + + def listdir(store, path: Path = None): """Obtain a directory listing for the given path. If `store` provides a `listdir` method, this will be called, otherwise will fall back to implementation via the `MutableMapping` interface.""" path = normalize_storage_path(path) + if getattr(store, "_store_version", None) == 3: + if not path.endswith("/"): + path = path + "/" + assert path.startswith("/") + + res = {_norm(k[10:]) for k in store.list_dir("meta/root" + path)} + for r in res: + assert not r.startswith("meta/") + return res + if hasattr(store, 'listdir'): # pass through return store.listdir(path) @@ -432,7 +471,11 @@ def _init_array_metadata( compressor=compressor_config, fill_value=fill_value, order=order, filters=filters_config) key = _path_to_prefix(path) + array_meta_key - store[key] = encode_array_metadata(meta) + + if getattr(store, "_store_version", 2) == 3: + store[key] = encode_array_metadata_v3(meta) + else: + store[key] = encode_array_metadata(meta) # backwards compatibility @@ -466,8 +509,10 @@ def init_group( path = normalize_storage_path(path) # ensure parent group initialized - _require_parent_group(path, store=store, chunk_store=chunk_store, - overwrite=overwrite) + if getattr(store, "_store_version", 2) != 3: + _require_parent_group( + path, store=store, chunk_store=chunk_store, overwrite=overwrite + ) # initialise metadata _init_group_metadata(store=store, overwrite=overwrite, path=path, @@ -496,8 +541,18 @@ def _init_group_metadata( # N.B., currently no metadata properties are needed, however there may # be in future meta = dict() # type: ignore - key = _path_to_prefix(path) + group_meta_key - store[key] = encode_group_metadata(meta) + prefix = _path_to_prefix(path) + if getattr(store, "_store_version", 2) == 3: + if prefix: + key = "meta/root/" + prefix + ".group" + else: + key = "meta/root.group" + else: + key = prefix + group_meta_key + if getattr(store, "_store_version", 2) == 2: + store[key] = encode_group_metadata(meta) + else: + store[key] = encode_group_metadata_v3(meta) def _dict_store_keys(d: Dict, prefix="", cls=dict): diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index 70e7282fc4..9f877f1b45 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -30,6 +30,57 @@ init_group) from zarr.util import InfoReporter from zarr.tests.util import skip_test_env_var +from zarr import v3 + +import zarr.v3.storage as v3storage + + +# Async test need to be top-level. +async def create_store(): + pytest.importorskip("redio") + from zarr.v3 import V2from3Adapter, SyncV3RedisStore + + # create a sync store for now as some Group methonds are sync. + rs = SyncV3RedisStore() + await rs.async_initialize() + return rs, None + + +async def create_group( + store=None, path=None, read_only=False, chunk_store=None, synchronizer=None +): + # can be overridden in sub-classes + init_group(store, path=path, chunk_store=chunk_store) + g = Group( + store, + path=path, + read_only=read_only, + chunk_store=chunk_store, + synchronizer=synchronizer, + ) + # return g + + +async def test_group_init_1(): + store, chunk_store = await create_store() + g = await create_group(store, chunk_store=chunk_store) + # assert store is g.store + # if chunk_store is None: + # assert store is g.chunk_store + # else: + # assert chunk_store is g.chunk_store + # assert not g.read_only + # assert '' == g.path + # assert '/' == g.name + # assert '' == g.basename + # assert isinstance(g.attrs, Attributes) + # g.attrs['foo'] = 'bar' + # assert g.attrs['foo'] == 'bar' + # assert isinstance(g.info, InfoReporter) + # assert isinstance(repr(g.info), str) + # assert isinstance(g.info._repr_html_(), str) + # if hasattr(store, 'close'): + # store.close() # noinspection PyStatementEffect @@ -931,6 +982,41 @@ def test_context_manager(self): d[:] = np.arange(100) +@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires trio") +class TestGroupWithV3MemoryStore(TestGroup): + @staticmethod + def create_store(): + from zarr.v3 import V2from3Adapter, SyncV3MemoryStore, StoreComparer + + return StoreComparer(MemoryStore(), V2from3Adapter(SyncV3MemoryStore())), None + + +@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires trio") +class TestGroupWithV3DirectoryStore(TestGroup): + @staticmethod + def create_store(): + path = tempfile.mkdtemp() + atexit.register(atexit_rmtree, path) + from zarr.v3 import V2from3Adapter, StoreComparer, SyncV3DirectoryStore + + return ( + StoreComparer(MemoryStore(), V2from3Adapter(SyncV3DirectoryStore(path))), + None, + ) + + +@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires trio") +class TestGroupWithV3RedisStore(TestGroup): + @staticmethod + def create_store(): + pytest.importorskip("redio") + from zarr.v3 import V2from3Adapter, SyncV3RedisStore, StoreComparer + + rs = SyncV3RedisStore() + rs.initialize() + return StoreComparer(MemoryStore(), V2from3Adapter(rs)), None + + class TestGroupWithMemoryStore(TestGroup): @staticmethod diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index f3c7fab008..43972705e5 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -3,6 +3,7 @@ import atexit import json import os +import sys import pickle import shutil import tempfile @@ -746,6 +747,24 @@ def setdel_hierarchy_checks(store): assert 'r/s' not in store +@pytest.mark.skipif(sys.version_info < (3, 6), reason="needs trio") +class TestV3Adapter(StoreTests, unittest.TestCase): + def create_store(self): + from zarr.v3 import V2from3Adapter, SyncV3MemoryStore, StoreComparer + + self._store = StoreComparer(MemoryStore(), V2from3Adapter(SyncV3MemoryStore())) + return self._store + + def test_store_contains_bytes(self): + store = self.create_store() + store["foo"] = np.array([97, 98, 99, 100, 101], dtype=np.uint8) + assert store["foo"] == b"abcde" + + def test_clear(self): + super().test_clear() + assert self._store.tested._v3store._backend == {} + + class TestMemoryStore(StoreTests, unittest.TestCase): def create_store(self): diff --git a/zarr/tests/test_xarray.py b/zarr/tests/test_xarray.py new file mode 100644 index 0000000000..89a241451d --- /dev/null +++ b/zarr/tests/test_xarray.py @@ -0,0 +1,41 @@ +import sys + + +if sys.version_info >= (3, 8): + import requests + import zarr + import xarray as xr + from zarr.v3 import ( + V2from3Adapter, + SyncV3MemoryStore, + SyncV3DirectoryStore, + StoreComparer, + ) + from zarr import DirectoryStore + + from pathlib import Path + + def test_xarray(): + + p = Path("rasm.nc") + if not p.exists(): + r = requests.get("https://github.com/pydata/xarray-data/raw/master/rasm.nc") + with open("rasm.nc", "wb") as f: + f.write(r.content) + + ds = xr.open_dataset("rasm.nc") + + compressor = zarr.Blosc(cname="zstd", clevel=3) + encoding = {vname: {"compressor": compressor} for vname in ds.data_vars} + + v33 = SyncV3DirectoryStore("v3.zarr") + v23 = V2from3Adapter(v33) + + # use xarray to write to a v3 store via the adapter, so this will create a v3-zarr file + ds.to_zarr(v23, encoding=encoding) + + # now we open directly the v3 store and check we get the right things + zarr_ds = xr.open_zarr(store=v33) + + assert len(zarr_ds.attrs) == 11 + assert zarr_ds.Tair.shape == (36, 205, 275) diff --git a/zarr/v3/__init__.py b/zarr/v3/__init__.py new file mode 100644 index 0000000000..7b83f0d7f4 --- /dev/null +++ b/zarr/v3/__init__.py @@ -0,0 +1,642 @@ +""" +Zarr spec v3 draft implementation +""" + +__version__ = "0.0.1" + +import json +import os +import sys +from collections.abc import MutableMapping +import pathlib +from string import ascii_letters, digits +from numcodecs.compat import ensure_bytes + +from .utils import syncify, nested_run + +# flake8: noqa +from .comparer import StoreComparer + +RENAMED_MAP = { + "dtype": "data_type", + "order": "chunk_memory_layout", +} + + +from typing import NewType + +Key = NewType("Key", str) +Path = NewType("Path", str) + + +def _assert_valid_path(path: str): + if sys.version_info > (3, 7): + assert path.isascii() + assert path.startswith("/") + assert "\\" not in path + + +class BaseV3Store: + """ + Base utility class to create a v3-complient store with extra checks and utilities. + + It provides a number of default method implementation adding extra checks in + order to ensure the correctness fo the implmentation. + """ + + _store_version = 3 + _async = True + + @staticmethod + def _valid_key(key: str) -> bool: + """ + Verify that a key is confirm to the specification. + + A key us any string containing only character in the range a-z, A-Z, + 0-9, or in the set /.-_ it will return True if that's the case, false + otherwise. + + In addition, in spec v3, keys can only start with the prefix meta/, + data/ or be exactly zarr.json. This should not be exposed to the + user, and is a store implmentation detail, so thie method will raise + a ValueError in that case. + """ + if sys.version_info > (3, 7): + if not key.isascii(): + return False + if set(key) - set(ascii_letters + digits + "/.-_"): + return False + + if ( + not key.startswith("data/") + and (not key.startswith("meta/")) + and (not key == "zarr.json") + ): + raise ValueError("keys starts with unexpected value: `{}`".format(key)) + # todo likely more logics to add there. + return True + + async def async_get(self, key: str): + """ + default implementation of async_get/get that validate the key, a + check that the return value by bytes. rely on `async def _get(key)` + to be implmented. + + Will ensure that the following are correct: + - return group metadata objects are json and contain a signel + `attributes` keys. + """ + assert self._valid_key(key), key + result = await self._get(key) + assert isinstance(result, bytes), "Expected bytes, got {}".format(result) + if key == "zarr.json": + v = json.loads(result.decode()) + assert set(v.keys()) == { + "zarr_format", + "metadata_encoding", + "extensions", + }, "v is {}".format(v) + elif key.endswith("/.group"): + v = json.loads(result.decode()) + assert set(v.keys()) == {"attributes"}, "got unexpected keys {}".format( + v.keys() + ) + return result + + async def async_set(self, key: str, value: bytes): + """ + default implementation of async_set/set that validate the key, and + check that the return value by bytes. rely on `async def _set(key, value)` + to be implmented. + + Will ensure that the following are correct: + - set group metadata objects are json and contain a signel `attributes` keys. + """ + if key == "zarr.json": + v = json.loads(value.decode()) + assert set(v.keys()) == { + "zarr_format", + "metadata_encoding", + "extensions", + }, "v is {}".format(v) + elif key.endswith(".array"): + v = json.loads(value.decode()) + expected = { + "shape", + "data_type", + "chunk_grid", + "chunk_memory_layout", + "compressor", + "fill_value", + "extensions", + "attributes", + } + current = set(v.keys()) + # ets do some conversions. + assert current == expected, "{} extra, {} missing in {}".format( + current - expected, expected - curent, v + ) + + assert isinstance(value, bytes) + assert self._valid_key(key) + await self._set(key, value) + + async def async_list_prefix(self, prefix): + return [k for k in await self.async_list() if k.startswith(prefix)] + + async def async_delete(self, key): + deln = await self._backend().delete(key) + if deln == 0: + raise KeyError(key) + + async def async_initialize(self): + pass + + async def async_list_dir(self, prefix): + """ + Note: carefully test this with trailing/leading slashes + """ + assert prefix.endswith("/") + + def part1(key): + if "/" not in key: + return key + else: + return key.split("/", maxsplit=1)[0] + "/" + + all_keys = await self.async_list_prefix(prefix) + len_prefix = len(prefix) + trail = {part1(k[len_prefix:]) for k in all_keys} + return [prefix + k for k in trail] + + async def async_contains(self, key): + assert key.startswith(("meta/", "data/")), "Got {}".format(key) + return key in await self.async_list() + + def __contains__(self, key): + if hasattr(self, "contains"): + return self.contains(key) + else: + with nested_run(): + import trio + + return trio.run(self.async_contains, key) + + +class AsyncV3DirectoryStore(BaseV3Store): + log = [] + + def __init__(self, key): + self.log.append("init") + self.root = pathlib.Path(key) + + async def _get(self, key: Key): + self.log.append("get" + key) + path = self.root / key + try: + return path.read_bytes() + except FileNotFoundError: + raise KeyError(path) + + async def _set(self, key, value): + self.log.append("set {} {}".format(key, value)) + assert not key.endswith("root/.group") + assert value + path = self.root / key + if not path.parent.exists(): + path.parent.mkdir(parents=True) + return path.write_bytes(value) + + async def async_list(self): + ll = [] + for it in os.walk(self.root): + if os.path.sep != "/": + prefix = "/".join(it[0].split(os.path.sep)) + else: + prefix = it[0] + for file in it[2]: + str_key = "/".join([prefix, file])[len(str(self.root)) + 1 :] + assert "\\" not in str_key, str_key + ll.append(str_key) + return ll + + async def async_delete(self, key): + self.log.append("delete {}".format(key)) + path = self.root / key + os.remove(path) + + +@syncify +class SyncV3DirectoryStore(AsyncV3DirectoryStore): + _async = False + + def __getitem__(self, key): + assert not key.endswith("root/.group") + return self.get(key) + + +class AsyncV3RedisStore(BaseV3Store): + def __init__(self, host=None, port=None): + """initialisation is in _async initialize + for early failure. + """ + self.host = host + self.port = port + + def __getstate__(self): + return {} + + def __setstate__(self, state): + self.__init__() + from redio import Redis + + self._backend = Redis("redis://localhost/") + + async def async_initialize(self): + from redio import Redis + + self._backend = Redis("redis://localhost/") + b = self._backend() + for k in await self._backend().keys(): + b.delete(k) + await b + return self + + async def _get(self, key): + res = await self._backend().get(key) + if res is None: + raise KeyError + return res + + async def _set(self, key, value): + return await self._backend().set(key, value) + + async def async_list(self): + return await self._backend().keys() + + +@syncify +class SyncV3RedisStore(AsyncV3RedisStore): + _async = False + + def __setitem__(self, key, value): + assert ".zgroup" not in key + return self.set(key, value) + + +class AsyncV3MemoryStore(BaseV3Store): + def __init__(self): + self._backend = dict() + + async def _get(self, key): + return self._backend[key] + + async def _set(self, key, value): + self._backend[key] = value + + async def async_delete(self, key): + del self._backend[key] + + async def async_list(self): + return list(self._backend.keys()) + + +@syncify +class SyncV3MemoryStore(AsyncV3MemoryStore): + _async = False + + +class AsyncZarrProtocolV3: + def __init__(self, store): + if isinstance(store, type): + self._store = store() + else: + self._store = store + if hasattr(self, "init_hierarchy"): + self.init_hierarchy() + + async def async_init_hierarchy(self): + basic_info = { + "zarr_format": "https://purl.org/zarr/spec/protocol/core/3.0", + "metadata_encoding": "https://tools.ietf.org/html/rfc8259", + "extensions": [], + } + try: + await self._store.async_get("zarr.json") + except KeyError: + await self._store.async_set("zarr.json", json.dumps(basic_info).encode()) + + def _g_meta_key(self, path): + _assert_valid_path(path) + return "meta" + path + ".group" + + async def async_create_group(self, group_path: str): + """ + create a goup at `group_path`, + we need to make sure none of the subpath of group_path are arrays. + + say path is g1/g2/g3, we want to check + + /meta/g1.array + /meta/g1/g2.array + + we could also assume that protocol implementation never do that. + """ + _assert_valid_path(group_path) + DEFAULT_GROUP = """{ + "attributes": { + "spam": "ham", + "eggs": 42, + } } + """ + await self._store.async_set( + self._g_meta_key(group_path), DEFAULT_GROUP.encode() + ) + + def _create_array_metadata(self, shape=(10,), dtype=" `.group` for example. + - path of storage (prefix with root/ meta// when relevant and vice versa.) + - try to ensure the stored objects are bytes before reachign the underlying store. + - try to adapt v2/v2 nested/flat structures + + THere will ikley need to be _some_ + + """ + self._v3store = v3store + + def __getitem__(self, key): + """ + In v2 both metadata and data are mixed so we'll need to convert things + that ends with .z to the metadata path. + """ + assert isinstance(key, str), "expecting string got {key}".format(key=repr(key)) + v3key = self._convert_2_to_3_keys(key) + if key.endswith(".zattrs"): + try: + res = self._v3store.get(v3key) + except KeyError: + v3key = v3key.replace(".array", ".group") + res = self._v3store.get(v3key) + + assert isinstance(res, bytes) + if key.endswith(".zattrs"): + data = json.loads(res.decode())["attributes"] + res = json.dumps(data, indent=4).encode() + elif key.endswith(".zarray"): + data = json.loads(res.decode()) + for target, source in RENAMED_MAP.items(): + tmp = data[source] + del data[source] + data[target] = tmp + data["chunks"] = data["chunk_grid"]["chunk_shape"] + del data["chunk_grid"] + + data["zarr_format"] = 2 + data["filters"] = None + del data["extensions"] + del data["attributes"] + res = json.dumps(data, indent=4).encode() + + if v3key.endswith(".group") or v3key == "zarr.json": + data = json.loads(res.decode()) + data["zarr_format"] = 2 + if data.get("attributes") is not None: + del data["attributes"] + res = json.dumps(data, indent=4).encode() + assert isinstance(res, bytes) + return res + + def __setitem__(self, key, value): + """ + In v2 both metadata and data are mixed so we'll need to convert things + that ends with .z to the metadata path. + """ + # TODO convert to bytes if needed + + v3key = self._convert_2_to_3_keys(key) + assert not key.endswith("root/.group") + # convert chunk separator from ``.`` to ``/`` + + if key.endswith(".zarray"): + data = json.loads(value.decode()) + for source, target in RENAMED_MAP.items(): + tmp = data[source] + del data[source] + data[target] = tmp + data["chunk_grid"] = {} + data["chunk_grid"]["chunk_shape"] = data["chunks"] + del data["chunks"] + data["chunk_grid"]["type"] = "rectangular" + data["chunk_grid"]["separator"] = "/" + assert data["zarr_format"] == 2 + del data["zarr_format"] + assert data["filters"] in ([], None), "found filters: {}".format( + data["filters"] + ) + del data["filters"] + data["extensions"] = [] + try: + attrs = json.loads(self._v3store.get(v3key).decode())["attributes"] + except KeyError: + attrs = [] + data["attributes"] = attrs + data = json.dumps(data, indent=4).encode() + elif key.endswith(".zattrs"): + try: + # try zarray first... + data = json.loads(self._v3store.get(v3key).decode()) + except KeyError: + try: + v3key = v3key.replace(".array", ".group") + data = json.loads(self._v3store.get(v3key).decode()) + except KeyError: + data = {} + data["attributes"] = json.loads(value.decode()) + self._v3store.set(v3key, json.dumps(data, indent=4).encode()) + return + # todo: we want to keep the .zattr which i sstored in the group/array file. + # so to set, we need to get from the store assign update. + elif v3key == "meta/root.group": + # todo: this is wrong, the top md document is zarr.json. + data = json.loads(value.decode()) + data["zarr_format"] = "https://purl.org/zarr/spec/protocol/core/3.0" + data = json.dumps(data, indent=4).encode() + elif v3key.endswith("/.group"): + data = json.loads(value.decode()) + del data["zarr_format"] + if "attributes" not in data: + data["attributes"] = {} + data = json.dumps(data).encode() + else: + data = value + assert not isinstance(data, dict) + self._v3store.set(v3key, ensure_bytes(data)) + + def __contains__(self, key): + return self._convert_2_to_3_keys(key) in self._v3store.list() + + def _convert_3_to_2_keys(self, v3key: str) -> str: + """ + todo: + - handle special .attribute which is merged with .zarray/.zgroup + - look at the grid separator + """ + if v3key == "meta/root.group": + return ".zgroup" + if v3key == "meta/root.array": + return ".zarray" + suffix = v3key[10:] + if suffix.endswith(".array"): + return suffix[:-6] + ".zarray" + if suffix.endswith(".group"): + return suffix[:-6] + ".zgroup" + return suffix + + def _convert_2_to_3_keys(self, v2key: str) -> str: + """ + todo: + - handle special .attribute which is merged with .zarray/.zgroup + - look at the grid separator + + """ + # head of the hierachy is different: + if v2key in (".zgroup", ".zattrs"): + return "meta/root.group" + if v2key == ".zarray": + return "meta/root.array" + assert not v2key.startswith( + "/" + ), "expect keys to not start with slash but does {}".format(repr(v2key)) + if v2key.endswith(".zarray") or v2key.endswith(".zattrs"): + return "meta/root/" + v2key[:-7] + ".array" + if v2key.endswith(".zgroup"): + return "meta/root/" + v2key[:-7] + ".group" + return "data/root/" + v2key + + def __len__(self): + return len(self._v3store.list()) + + def clear(self): + keys = self._v3store.list() + for k in keys: + self._v3store.delete(k) + + def __delitem__(self, key): + item3 = self._convert_2_to_3_keys(key) + + items = self._v3store.list_prefix(item3) + if not items: + raise KeyError( + "{} not found in store (converted key to {}".format(key, item3) + ) + for _item in self._v3store.list_prefix(item3): + self._v3store.delete(_item) + + def keys(self): + # TODO: not as stritforward. we need to actually poke internally at + # .group/.array to potentially return '.zattrs' if attribute is set. it + # also seem in soem case zattrs is set in arrays even if the rest of the + # infomation is not set. + key = self._v3store.list() + fixed_paths = [] + for p in key: + if p.endswith((".group", ".array")): + res = self._v3store.get(p) + if json.loads(res.decode()).get("attributes"): + fixed_paths.append(p[10:-6] + ".zattrs") + fixed_paths.append(self._convert_3_to_2_keys(p)) + + return list(set(fixed_paths)) + + def listdir(self, path=""): + """ + This_will be wrong as we also need to list meta/prefix, but need to + be carefull and use list-prefix in that case with the right optiosn + to convert the chunks separators. + """ + v3path = self._convert_2_to_3_keys(path) + if not v3path.endswith("/"): + v3path = v3path + "/" + # if not v3path.startswith("/"): + # v3path = '/'+v3path + ps = [p for p in self._v3store.list_dir(v3path)] + fixed_paths = [] + for p in ps: + if p == ".group": + res = self._v3store.get(path + "/.group") + if json.loads(res.decode())["attributes"]: + fixed_paths.append(".zattrs") + fixed_paths.append(self._convert_3_to_2_keys(p)) + + res = [p.split("/")[-2] for p in fixed_paths] + return res + + def __iter__(self): + return iter(self.keys()) diff --git a/zarr/v3/comparer.py b/zarr/v3/comparer.py new file mode 100644 index 0000000000..53cc92529a --- /dev/null +++ b/zarr/v3/comparer.py @@ -0,0 +1,97 @@ +import json +from collections.abc import MutableMapping + + +class StoreComparer(MutableMapping): + """ + Compare two store implementations, and make sure to do the same operation on + both stores. + + The operation from the first store are always considered as reference and + the will make sure the second store will return the same value, or raise + the same exception where relevant. + + This should have minimal impact on API, but can as some generators are + reified and sorted to make sure they are identical. + """ + + def __init__(self, reference, tested): + self.reference = reference + self.tested = tested + + def __getitem__(self, key): + try: + k1 = self.reference[key] + except Exception as e1: + try: + k2 = self.tested[key] + assert False, "should raise, got {} for {}".format(k2, key) + except Exception as e2: + raise + if not isinstance(e2, type(e1)): + raise AssertionError("Expecting {type(e1)} got {type(e2)}") from e2 + raise + k2 = self.tested[key] + if key.endswith((".zgroup", ".zarray")): + j1, j2 = json.loads(k1.decode()), json.loads(k2.decode()) + assert j1 == j2, "{} != {}".format(j1, j2) + else: + assert k2 == k1, "{} != {}\n missing: {},\n extra:{}".format( + k1, k2, k1 - k2, k2 - k1 + ) + return k1 + + def __setitem__(self, key, value): + # todo : not quite happy about casting here, maybe we shoudl stay strict ? + from numcodecs.compat import ensure_bytes + + value = ensure_bytes(value) + try: + self.reference[key] = value + except Exception as e: + try: + self.tested[key] = value + except Exception as e2: + assert isinstance(e, type(e2)) + try: + self.tested[key] = value + except Exception as e: + raise + assert False, "should not raise, got {}".format(e) + + def keys(self): + try: + k1 = list(sorted(self.reference.keys())) + except Exception as e1: + try: + k2 = self.tested.keys() + assert False, "should raise" + except Exception as e2: + assert isinstance(e2, type(e1)) + raise + k2 = sorted(self.tested.keys()) + assert k2 == k1, "got {};\n expecting {}\n missing: {},\n extra:{}".format( + k1, k2, set(k1) - set(k2), set(k2) - set(k1) + ) + return k1 + + def __delitem__(self, key): + try: + del self.reference[key] + except Exception as e1: + try: + del self.tested[key] + assert False, "should raise" + except Exception as e2: + assert isinstance(e2, type(e1)) + raise + del self.tested[key] + + def __iter__(self): + return iter(self.keys()) + + def __len__(self): + return len(self.keys()) + + def __contains__(self, key): + return key in self.reference diff --git a/zarr/v3/protocol.py b/zarr/v3/protocol.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/zarr/v3/storage.py b/zarr/v3/storage.py new file mode 100644 index 0000000000..2ea764742e --- /dev/null +++ b/zarr/v3/storage.py @@ -0,0 +1,68 @@ +from zarr.util import normalize_storage_path +from zarr.errors import err_contains_array, err_contains_group + + +async def init_group(store, overwrite=False, path=None, chunk_store=None): + """Initialize a group store. Note that this is a low-level function and there should be no + need to call this directly from user code. + + Parameters + ---------- + store : MutableMapping + A mapping that supports string keys and byte sequence values. + overwrite : bool, optional + If True, erase all data in `store` prior to initialisation. + path : string, optional + Path under which array is stored. + chunk_store : MutableMapping, optional + Separate storage for chunks. If not provided, `store` will be used + for storage of both chunks and metadata. + + """ + + # normalize path + path = normalize_storage_path(path) + + # initialise metadata + _init_group_metadata( + store=store, overwrite=overwrite, path=path, chunk_store=chunk_store + ) + + +async def _init_group_metadata(store, overwrite=False, path=None, chunk_store=None): + + # guard conditions + if overwrite: + raise NotImplementedError + # attempt to delete any pre-existing items in store + rmdir(store, path) + if chunk_store is not None: + rmdir(chunk_store, path) + elif await contains_array(store, path): + err_contains_array(path) + elif contains_group(store, path): + err_contains_group(path) + + # initialize metadata + # N.B., currently no metadata properties are needed, however there may + # be in future + meta = dict() + raise NotImplementedError + key = _path_to_prefix(path) + group_meta_key + store[key] = encode_group_metadata(meta) + + +async def contains_array(store, path=None): + """Return True if the store contains an array at the given logical path.""" + path = normalize_storage_path(path) + key = "meta/root" + path + ".array" + return key in await store.list() + + +def contains_group(store, path=None): + """Return True if the store contains a group at the given logical path.""" + raise NotImplementedError + path = normalize_storage_path(path) + prefix = _path_to_prefix(path) + key = prefix + group_meta_key + return key in store diff --git a/zarr/v3/test_protocol.py b/zarr/v3/test_protocol.py new file mode 100644 index 0000000000..f3ece9e7b8 --- /dev/null +++ b/zarr/v3/test_protocol.py @@ -0,0 +1,87 @@ +import pytest + +from zarr.storage import init_group +from zarr.v3 import ( + SyncV3MemoryStore, + SyncV3RedisStore, + V2from3Adapter, + ZarrProtocolV3, + AsyncV3RedisStore, +) +from zarr.storage import MemoryStore + + +@pytest.mark.parametrize( + ("store", "key"), [(SyncV3MemoryStore(), ".group"), (MemoryStore(), ".zgroup")] +) +def test_cover_Attribute_no_key(store, key): + from zarr.hierarchy import Attributes + + Attributes(store, key=key) + + +def test_cover_Attribute_warong_key(): + from zarr.hierarchy import Attributes + + with pytest.raises(ValueError): + Attributes(SyncV3MemoryStore(), key=".zattr") + + +async def test_scenario(): + pytest.importorskip("trio") + + store = SyncV3MemoryStore() + + await store.async_set("data/a", bytes(1)) + + with pytest.raises(ValueError): + store.get("arbitrary") + with pytest.raises(ValueError): + store.get("data") + with pytest.raises(ValueError): + store.get("meta") # test commit + + assert store.get("data/a") == bytes(1) + assert await store.async_get("data/a") == bytes(1) + + await store.async_set("meta/this/is/nested", bytes(1)) + await store.async_set("meta/this/is/a/group", bytes(1)) + await store.async_set("meta/this/also/a/group", bytes(1)) + await store.async_set("meta/thisisweird/also/a/group", bytes(1)) + + assert len(store.list()) == 5 + with pytest.raises(AssertionError): + assert store.list_dir("meta/this") + + assert set(store.list_dir("meta/this/")) == set( + ["meta/this/also/", "meta/this/is/"] + ) + with pytest.raises(AssertionError): + assert await store.async_list_dir("meta/this") + + +async def test_2(): + protocol = ZarrProtocolV3(SyncV3MemoryStore) + store = protocol._store + + await protocol.async_create_group("/g1") + assert isinstance(await store.async_get("meta/g1.group"), bytes) + + +@pytest.mark.parametrize("klass", [SyncV3MemoryStore, SyncV3RedisStore]) +def test_misc(klass): + + pytest.importorskip("redio") + + _store = klass() + _store.initialize() + store = V2from3Adapter(_store) + + init_group(store) + + if isinstance(_store, SyncV3MemoryStore): + assert store._v3store._backend == { + "meta/root.group": b'{\n "zarr_format": ' + b'"https://purl.org/zarr/spec/protocol/core/3.0"\n}' + } + assert store[".zgroup"] == b'{\n "zarr_format": 2\n}' diff --git a/zarr/v3/utils.py b/zarr/v3/utils.py new file mode 100644 index 0000000000..2bab711a09 --- /dev/null +++ b/zarr/v3/utils.py @@ -0,0 +1,71 @@ +import inspect +from contextlib import contextmanager + + +@contextmanager +def nested_run(): + __tracebackhide__ = True + from trio._core._run import GLOBAL_RUN_CONTEXT + + s = object() + task, runner, _dict = s, s, s + if hasattr(GLOBAL_RUN_CONTEXT, "__dict__"): + _dict = GLOBAL_RUN_CONTEXT.__dict__ + if hasattr(GLOBAL_RUN_CONTEXT, "task"): + task = GLOBAL_RUN_CONTEXT.task + del GLOBAL_RUN_CONTEXT.task + if hasattr(GLOBAL_RUN_CONTEXT, "runner"): + runner = GLOBAL_RUN_CONTEXT.runner + del GLOBAL_RUN_CONTEXT.runner + + try: + yield + finally: + if task is not s: + GLOBAL_RUN_CONTEXT.task = task + elif hasattr(GLOBAL_RUN_CONTEXT, "task"): + del GLOBAL_RUN_CONTEXT.task + + if runner is not s: + GLOBAL_RUN_CONTEXT.runner = runner + elif hasattr(GLOBAL_RUN_CONTEXT, "runner"): + del GLOBAL_RUN_CONTEXT.runner + + if _dict is not s: + GLOBAL_RUN_CONTEXT.__dict__.update(_dict) + + +def syncify(cls, *args, **kwargs): + + attrs = [c for c in dir(cls) if c.startswith("async_")] + for attr in attrs: + meth = getattr(cls, attr) + if inspect.iscoroutinefunction(meth): + + def cl(meth): + def sync_version(self, *args, **kwargs): + """ + Automatically generated synchronous version of {attr} + + See {attr} documentation. + """ + import trio + + __tracebackhide__ = True + + with nested_run(): + return trio.run(meth, self, *args) + + sync_version.__doc__ = ( + "Automatically generated sync" + "version of {}.\n\n{}".format(attr, meth.__doc__) + ) + return sync_version + + import types + + types.MethodType + + setattr(cls, attr[6:], cl(meth)) + + return cls From 2f21cf73960878bbb0ec4a1c3500b45a974c268b Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Tue, 2 Jun 2020 11:43:35 -0700 Subject: [PATCH 2/2] [DRAFT] V3 spec implmentation. This is mostly opened to foster discussion and need code from https://github.com/Carreau/zarr-spec-v3-impl. IN the above mentioned repository I'm working on looking at what an implementation of the spec v3 could look to inform us on the possible transition and compatiblity shims. So far I have a rough implementation of an in memory v3 store as well as multiple utilities. 1) A base class that automatically provide sync version of all async method of a class. I'm playing with the idea of having most method async as this may be useful in some context. For example when creating a array at /a/b/c/d/e/f/g/h/i/j/k you want to check that None of the parents are arrays, which can be done with N async requests. 2) An adapted class that wraps a v3 store and provide a v2 API. My though is that most code is currently v2 compatible and it would be useful for legacy codebase and early testing of store. 3) a class that wrap 2 stores, a reference and a tested one, replicate operation on both stores, and abort if it sees any difference in behavior. This could help to catch changes in behavior. The tests in this PR start to test the v3 memorystore and compare it to the v2 memorystore. --- zarr/core.py | 2 +- zarr/meta.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index 56818404c1..e5b89b009c 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -33,7 +33,7 @@ encode_array_metadata, decode_array_metadata_v3, ) -from iarr.storage import array_meta_key, attrs_key, getsize, listdir +from zarr.storage import array_meta_key, attrs_key, getsize, listdir from zarr.util import (InfoReporter, check_array_shape, human_readable_size, is_total_slice, nolock, normalize_chunks, normalize_resize_args, normalize_shape, diff --git a/zarr/meta.py b/zarr/meta.py index b9135fe0ad..a42673aeda 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -166,7 +166,7 @@ def decode_dtype_v3(d): return np.dtype(d) -def decode_group_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]: +def decode_group_metadata_v3(s: Union[MappingType, str]) -> MappingType[str, Any]: return json.loads(s)