From 9d6d07d2c16227e955253b6c9b2d35ad77e192cf Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 23 Jun 2021 11:12:44 -0400 Subject: [PATCH 01/30] fix conflicts --- docs/tutorial.rst | 12 +- mypy.ini | 2 +- pytest.ini | 2 + zarr/convenience.py | 71 ++++---- zarr/core.py | 17 +- zarr/creation.py | 11 +- zarr/hierarchy.py | 36 ++-- zarr/storage.py | 295 +++++++++++++++++++++++++++------ zarr/tests/test_convenience.py | 8 +- zarr/tests/test_core.py | 221 ++++++++++-------------- zarr/tests/test_creation.py | 45 ++--- zarr/tests/test_hierarchy.py | 76 +++------ zarr/tests/test_storage.py | 255 +++++++++++++++------------- zarr/tests/util.py | 5 +- 14 files changed, 618 insertions(+), 438 deletions(-) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index a3421608cc..51b81e16ef 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -176,7 +176,7 @@ print some diagnostics, e.g.:: Read-only : False Compressor : Blosc(cname='zstd', clevel=3, shuffle=BITSHUFFLE, : blocksize=0) - Store type : builtins.dict + Store type : zarr.storage.KVStore No. bytes : 400000000 (381.5M) No. bytes stored : 3379344 (3.2M) Storage ratio : 118.4 @@ -268,7 +268,7 @@ Here is an example using a delta filter with the Blosc compressor:: Read-only : False Filter [0] : Delta(dtype='>> z[:] array([b'H', b'e', b'l', b'l', b'o', b' ', b'f', b'r', b'o', b'm', b' ', @@ -1264,7 +1266,7 @@ ratios, depending on the correlation structure within the data. E.g.:: Order : C Read-only : False Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - Store type : builtins.dict + Store type : zarr.storage.KVStore No. bytes : 400000000 (381.5M) No. bytes stored : 6696010 (6.4M) Storage ratio : 59.7 @@ -1278,7 +1280,7 @@ ratios, depending on the correlation structure within the data. E.g.:: Order : F Read-only : False Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - Store type : builtins.dict + Store type : zarr.storage.KVStore No. bytes : 400000000 (381.5M) No. bytes stored : 4684636 (4.5M) Storage ratio : 85.4 diff --git a/mypy.ini b/mypy.ini index 998aed51a2..7c1be49cd6 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,4 +1,4 @@ [mypy] -python_version = 3.6 +python_version = 3.8 ignore_missing_imports = True follow_imports = silent diff --git a/pytest.ini b/pytest.ini index 61a0a99ab5..8e3c0adb22 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,4 +3,6 @@ doctest_optionflags = NORMALIZE_WHITESPACE ELLIPSIS IGNORE_EXCEPTION_DETAIL addopts = --durations=10 filterwarnings = error::DeprecationWarning:zarr.* + error::UserWarning:zarr.* ignore:PY_SSIZE_T_CLEAN will be required.*:DeprecationWarning + ignore:The loop argument is deprecated since Python 3.8.*:DeprecationWarning diff --git a/zarr/convenience.py b/zarr/convenience.py index 1ed0f92ff3..f380536ca3 100644 --- a/zarr/convenience.py +++ b/zarr/convenience.py @@ -12,17 +12,21 @@ from zarr.hierarchy import group as _create_group from zarr.hierarchy import open_group from zarr.meta import json_dumps, json_loads -from zarr.storage import contains_array, contains_group +from zarr.storage import contains_array, contains_group, Store from zarr.util import TreeViewer, buffer_size, normalize_storage_path +from typing import Union + +StoreLike = Union[Store, str, None] + # noinspection PyShadowingBuiltins -def open(store=None, mode='a', **kwargs): +def open(store: StoreLike = None, mode: str = "a", **kwargs): """Convenience function to open a group or array using file-mode-like semantics. Parameters ---------- - store : MutableMapping or string, optional + store : Store or string, optional Store or path to directory in file system or name of zip file. mode : {'r', 'r+', 'a', 'w', 'w-'}, optional Persistence mode: 'r' means read only (must exist); 'r+' means @@ -75,32 +79,33 @@ def open(store=None, mode='a', **kwargs): clobber = mode == 'w' # we pass storage options explicitly, since normalize_store_arg might construct # a store if the input is a fsspec-compatible URL - store = normalize_store_arg(store, clobber=clobber, - storage_options=kwargs.pop("storage_options", {})) + _store: Store = normalize_store_arg( + store, clobber=clobber, storage_options=kwargs.pop("storage_options", {}) + ) path = normalize_storage_path(path) if mode in {'w', 'w-', 'x'}: if 'shape' in kwargs: - return open_array(store, mode=mode, **kwargs) + return open_array(_store, mode=mode, **kwargs) else: - return open_group(store, mode=mode, **kwargs) + return open_group(_store, mode=mode, **kwargs) elif mode == "a": - if "shape" in kwargs or contains_array(store, path): - return open_array(store, mode=mode, **kwargs) + if "shape" in kwargs or contains_array(_store, path): + return open_array(_store, mode=mode, **kwargs) else: - return open_group(store, mode=mode, **kwargs) + return open_group(_store, mode=mode, **kwargs) else: - if contains_array(store, path): - return open_array(store, mode=mode, **kwargs) - elif contains_group(store, path): - return open_group(store, mode=mode, **kwargs) + if contains_array(_store, path): + return open_array(_store, mode=mode, **kwargs) + elif contains_group(_store, path): + return open_group(_store, mode=mode, **kwargs) else: raise PathNotFoundError(path) -def save_array(store, arr, **kwargs): +def save_array(store: StoreLike, arr, **kwargs): """Convenience function to save a NumPy array to the local file system, following a similar API to the NumPy save() function. @@ -132,16 +137,16 @@ def save_array(store, arr, **kwargs): """ may_need_closing = isinstance(store, str) - store = normalize_store_arg(store, clobber=True) + _store: Store = normalize_store_arg(store, clobber=True) try: - _create_array(arr, store=store, overwrite=True, **kwargs) + _create_array(arr, store=_store, overwrite=True, **kwargs) finally: - if may_need_closing and hasattr(store, 'close'): + if may_need_closing: # needed to ensure zip file records are written - store.close() + _store.close() -def save_group(store, *args, **kwargs): +def save_group(store: StoreLike, *args, **kwargs): """Convenience function to save several NumPy arrays to the local file system, following a similar API to the NumPy savez()/savez_compressed() functions. @@ -203,21 +208,21 @@ def save_group(store, *args, **kwargs): raise ValueError('at least one array must be provided') # handle polymorphic store arg may_need_closing = isinstance(store, str) - store = normalize_store_arg(store, clobber=True) + _store: Store = normalize_store_arg(store, clobber=True) try: - grp = _create_group(store, overwrite=True) + grp = _create_group(_store, overwrite=True) for i, arr in enumerate(args): k = 'arr_{}'.format(i) grp.create_dataset(k, data=arr, overwrite=True) for k, arr in kwargs.items(): grp.create_dataset(k, data=arr, overwrite=True) finally: - if may_need_closing and hasattr(store, 'close'): + if may_need_closing: # needed to ensure zip file records are written - store.close() + _store.close() -def save(store, *args, **kwargs): +def save(store: StoreLike, *args, **kwargs): """Convenience function to save an array or group of arrays to the local file system. Parameters @@ -327,7 +332,7 @@ def __repr__(self): return r -def load(store): +def load(store: StoreLike): """Load data from an array or group into memory. Parameters @@ -353,11 +358,11 @@ def load(store): """ # handle polymorphic store arg - store = normalize_store_arg(store) - if contains_array(store, path=None): - return Array(store=store, path=None)[...] - elif contains_group(store, path=None): - grp = Group(store=store, path=None) + _store = normalize_store_arg(store) + if contains_array(_store, path=None): + return Array(store=_store, path=None)[...] + elif contains_group(_store, path=None): + grp = Group(store=_store, path=None) return LazyLoader(grp) @@ -1073,7 +1078,7 @@ def copy_all(source, dest, shallow=False, without_attrs=False, log=None, return n_copied, n_skipped, n_bytes_copied -def consolidate_metadata(store, metadata_key='.zmetadata'): +def consolidate_metadata(store: Store, metadata_key=".zmetadata"): """ Consolidate all metadata for groups and arrays within the given store into a single resource and put it under the given key. @@ -1124,7 +1129,7 @@ def is_zarr_key(key): return open_consolidated(store, metadata_key=metadata_key) -def open_consolidated(store, metadata_key='.zmetadata', mode='r+', **kwargs): +def open_consolidated(store: Store, metadata_key=".zmetadata", mode="r+", **kwargs): """Open group using metadata previously consolidated into a single key. This is an optimised method for opening a Zarr group, where instead of diff --git a/zarr/core.py b/zarr/core.py index 3df8043000..60d92a2c52 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -9,6 +9,8 @@ import numpy as np from numcodecs.compat import ensure_bytes, ensure_ndarray +from collections.abc import MutableMapping + from zarr.attrs import Attributes from zarr.codecs import AsType, get_codec from zarr.errors import ArrayNotFoundError, ReadOnlyError, ArrayIndexError @@ -29,7 +31,7 @@ pop_fields, ) from zarr.meta import decode_array_metadata, encode_array_metadata -from zarr.storage import array_meta_key, attrs_key, getsize, listdir +from zarr.storage import array_meta_key, attrs_key, getsize, listdir, Store from zarr.util import ( InfoReporter, check_array_shape, @@ -130,7 +132,7 @@ class Array: def __init__( self, - store, + store: Store, path=None, read_only=False, chunk_store=None, @@ -142,6 +144,9 @@ def __init__( # N.B., expect at this point store is fully initialized with all # configuration metadata fully specified and normalized + store = Store._ensure_store(store) + chunk_store = Store._ensure_store(chunk_store) + self._store = store self._chunk_store = chunk_store self._path = normalize_storage_path(path) @@ -2011,7 +2016,7 @@ def _encode_chunk(self, chunk): cdata = chunk # ensure in-memory data is immutable and easy to compare - if isinstance(self.chunk_store, dict): + if isinstance(self.chunk_store, MutableMapping): cdata = ensure_bytes(cdata) return cdata @@ -2044,10 +2049,10 @@ def info(self): Order : C Read-only : False Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - Store type : builtins.dict + Store type : zarr.storage.KVStore No. bytes : 4000000 (3.8M) - No. bytes stored : ... - Storage ratio : ... + No. bytes stored : 320 + Storage ratio : 12500.0 Chunks initialized : 0/10 """ diff --git a/zarr/creation.py b/zarr/creation.py index 73e10adff1..600252b5ba 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -2,6 +2,7 @@ import numpy as np from numcodecs.registry import codec_registry +from collections.abc import MutableMapping from zarr.core import Array from zarr.errors import ( @@ -10,9 +11,9 @@ ContainsGroupError, ) from zarr.n5 import N5Store -from zarr.storage import (DirectoryStore, ZipStore, contains_array, +from zarr.storage import (DirectoryStore, ZipStore, KVStore, contains_array, contains_group, default_compressor, init_array, - normalize_storage_path, FSStore) + normalize_storage_path, FSStore, Store) from zarr.util import normalize_dimension_separator @@ -145,9 +146,9 @@ def create(shape, chunks=True, dtype=None, compressor='default', return z -def normalize_store_arg(store, clobber=False, storage_options=None, mode='w'): +def normalize_store_arg(store, clobber=False, storage_options=None, mode="w") -> Store: if store is None: - return dict() + return Store._ensure_store(dict()) elif isinstance(store, str): mode = mode if clobber else "r" if "://" in store or "::" in store: @@ -161,6 +162,8 @@ def normalize_store_arg(store, clobber=False, storage_options=None, mode='w'): else: return DirectoryStore(store) else: + if not isinstance(store, Store) and isinstance(store, MutableMapping): + store = KVStore(store) return store diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 89804d445b..11717e572b 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -15,11 +15,26 @@ ReadOnlyError, ) from zarr.meta import decode_group_metadata -from zarr.storage import (MemoryStore, attrs_key, contains_array, - contains_group, group_meta_key, init_group, listdir, - rename, rmdir) -from zarr.util import (InfoReporter, TreeViewer, is_valid_python_name, nolock, - normalize_shape, normalize_storage_path) +from zarr.storage import ( + MemoryStore, + attrs_key, + contains_array, + contains_group, + group_meta_key, + init_group, + listdir, + rename, + rmdir, + Store, +) +from zarr.util import ( + InfoReporter, + TreeViewer, + is_valid_python_name, + nolock, + normalize_shape, + normalize_storage_path, +) class Group(MutableMapping): @@ -96,6 +111,8 @@ class Group(MutableMapping): def __init__(self, store, path=None, read_only=False, chunk_store=None, cache_attrs=True, synchronizer=None): + store = Store._ensure_store(store) + chunk_store = Store._ensure_store(chunk_store) self._store = store self._chunk_store = chunk_store self._path = normalize_storage_path(path) @@ -237,11 +254,8 @@ def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): - """If the underlying Store has a ``close`` method, call it.""" - try: - self.store.close() - except AttributeError: - pass + """If the underlying Store should always heave a ``close`` method, call it.""" + self.store.close() def info_items(self): @@ -804,11 +818,13 @@ def create_dataset(self, name, **kwargs): """ + assert "mode" not in kwargs return self._write_op(self._create_dataset_nosync, name, **kwargs) def _create_dataset_nosync(self, name, data=None, **kwargs): + assert "mode" not in kwargs path = self._item_path(name) # determine synchronizer diff --git a/zarr/storage.py b/zarr/storage.py index d2de2cda4c..06e213985f 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -31,7 +31,7 @@ from os import scandir from pickle import PicklingError from threading import Lock, RLock -from typing import Optional, Union, List, Tuple, Dict +from typing import Optional, Union, List, Tuple, Dict, Any import uuid import time @@ -79,6 +79,124 @@ Path = Union[str, bytes, None] +class Store(MutableMapping): + """Base class for stores implementation. + + + Provide a number of default method as well as other typing guaranties for mypy. + + Stores cannot be mutable mapping as they do have a couple of other + requirements that would break Liskov substitution principle (stores only + allow strings as keys, mutable mapping are more generic). + + And Stores do requires a few other method. + + Having no-op base method also helps simplifying store usage and do not need + to check the presence of attributes and methods, like `close()`. + + Stores can be used as context manager to make sure they close on exit. + + .. added: 2.5.0 + + """ + + _readable = True + _writeable = True + _erasable = True + _listable = True + + def is_readable(self): + return self._readable + + def is_writeable(self): + return self._writeable + + def is_listable(self): + return self._listable + + def is_erasable(self): + return self._erasable + + def __enter__(self): + if not hasattr(self, "_open_count"): + self._open_count = 0 + self._open_count += 1 + return self + + def __exit__(self, exc_type, exc_value, traceback): + self._open_count -= 1 + if self._open_count == 0: + self.close() + + def listdir(self, path: str = "") -> List[str]: + path = normalize_storage_path(path) + return _listdir_from_keys(self, path) + + def rename(self, src_path: str, dst_path: str) -> None: + if not self.is_erasable(): + raise NotImplementedError( + f'{type(self)} is not erasable, cannot call "rmdir"' + ) + _rename_from_keys(self, src_path, dst_path) + + def rmdir(self, path: str = "") -> None: + if not self.is_erasable(): + raise NotImplementedError( + f'{type(self)} is not erasable, cannot call "rmdir"' + ) + path = normalize_storage_path(path) + _rmdir_from_keys(self, path) + + # def getsize(self, path: Path = None): + # pass + + # def clear(self): + # pass + + def close(self) -> None: + """Do nothing by default""" + pass + + # def pop(self, key): + # raise NotImplementedError + + @staticmethod + def _ensure_store(store): + """ + We want to make sure internally that zarr storage are internally always + a class with a specific interface derived from Store, which is slightly + different than mutable mapping. + + We'll do this conversion in a few places automatically + """ + if store is None: + return None + elif isinstance(store, Store): + return store + elif isinstance(store, MutableMapping): + return KVStore(store) + else: + for attr in [ + "keys", + "values", + "get", + "__setitem__", + "__getitem__", + "__delitem__", + "__contains__", + ]: + if not hasattr(store, attr): + break + else: + return KVStore(store) + + raise ValueError( + "Starting with Zarr X.y.z, stores must be subclasses of Store, if " + "you store expose the MutableMapping interface wrap them in " + f"Zarr.storage.KVStore. Got {store}" + ) + + def _path_to_prefix(path: Optional[str]) -> str: # assume path already normalized if path: @@ -88,7 +206,7 @@ def _path_to_prefix(path: Optional[str]) -> str: return prefix -def contains_array(store: MutableMapping, path: Path = None) -> bool: +def contains_array(store: Store, path: Path = None) -> bool: """Return True if the store contains an array at the given logical path.""" path = normalize_storage_path(path) prefix = _path_to_prefix(path) @@ -96,7 +214,7 @@ def contains_array(store: MutableMapping, path: Path = None) -> bool: return key in store -def contains_group(store: MutableMapping, path: Path = None) -> bool: +def contains_group(store: Store, path: Path = None) -> bool: """Return True if the store contains a group at the given logical path.""" path = normalize_storage_path(path) prefix = _path_to_prefix(path) @@ -104,7 +222,7 @@ def contains_group(store: MutableMapping, path: Path = None) -> bool: return key in store -def _rmdir_from_keys(store: MutableMapping, path: Optional[str] = None) -> None: +def _rmdir_from_keys(store: Store, path: Optional[str] = None) -> None: # assume path already normalized prefix = _path_to_prefix(path) for key in list(store.keys()): @@ -112,20 +230,20 @@ def _rmdir_from_keys(store: MutableMapping, path: Optional[str] = None) -> None: del store[key] -def rmdir(store, path: Path = None): +def rmdir(store: Store, path: Path = None): """Remove all items under the given path. If `store` provides a `rmdir` method, this will be called, otherwise will fall back to implementation via the - `MutableMapping` interface.""" + `Store` interface.""" path = normalize_storage_path(path) if hasattr(store, 'rmdir'): # pass through - store.rmdir(path) + store.rmdir(path) # type: ignore else: # slow version, delete one key at a time _rmdir_from_keys(store, path) -def _rename_from_keys(store: MutableMapping, src_path: str, dst_path: str) -> None: +def _rename_from_keys(store: Store, src_path: str, dst_path: str) -> None: # assume path already normalized src_prefix = _path_to_prefix(src_path) dst_prefix = _path_to_prefix(dst_path) @@ -135,10 +253,10 @@ def _rename_from_keys(store: MutableMapping, src_path: str, dst_path: str) -> No store[new_key] = store.pop(key) -def rename(store, src_path: Path, dst_path: Path): +def rename(store: Store, src_path: Path, dst_path: Path): """Rename all items under the given path. If `store` provides a `rename` method, this will be called, otherwise will fall back to implementation via the - `MutableMapping` interface.""" + `Store` interface.""" src_path = normalize_storage_path(src_path) dst_path = normalize_storage_path(dst_path) if hasattr(store, 'rename'): @@ -149,7 +267,7 @@ def rename(store, src_path: Path, dst_path: Path): _rename_from_keys(store, src_path, dst_path) -def _listdir_from_keys(store: MutableMapping, path: Optional[str] = None) -> List[str]: +def _listdir_from_keys(store: Store, path: Optional[str] = None) -> List[str]: # assume path already normalized prefix = _path_to_prefix(path) children = set() @@ -161,27 +279,32 @@ def _listdir_from_keys(store: MutableMapping, path: Optional[str] = None) -> Lis return sorted(children) -def listdir(store, path: Path = None): +def listdir(store: Store, path: Path = None): """Obtain a directory listing for the given path. If `store` provides a `listdir` method, this will be called, otherwise will fall back to implementation via the `MutableMapping` interface.""" path = normalize_storage_path(path) if hasattr(store, 'listdir'): # pass through - return store.listdir(path) + return store.listdir(path) # type: ignore else: # slow version, iterate through all keys + warnings.warn( + "Store {store} has not `listdir` method. From zarr 2.5 you may want" + "to inherit from `Store`.".format(store=store), + stacklevel=2, + ) return _listdir_from_keys(store, path) -def getsize(store, path: Path = None) -> int: +def getsize(store: Store, path: Path = None) -> int: """Compute size of stored items for a given path. If `store` provides a `getsize` method, this will be called, otherwise will return -1.""" path = normalize_storage_path(path) if hasattr(store, 'getsize'): # pass through - return store.getsize(path) - elif isinstance(store, dict): + return store.getsize(path) # type: ignore + elif isinstance(store, MutableMapping): # compute from size of values if path in store: v = store[path] @@ -207,8 +330,8 @@ def getsize(store, path: Path = None) -> int: def _require_parent_group( path: Optional[str], - store: MutableMapping, - chunk_store: Optional[MutableMapping], + store: Store, + chunk_store: Optional[Store], overwrite: bool, ): # assume path is normalized @@ -224,7 +347,7 @@ def _require_parent_group( def init_array( - store: MutableMapping, + store: Store, shape: Tuple[int, ...], chunks: Union[bool, int, Tuple[int, ...]] = True, dtype=None, @@ -233,7 +356,7 @@ def init_array( order: str = "C", overwrite: bool = False, path: Path = None, - chunk_store: MutableMapping = None, + chunk_store: Store = None, filters=None, object_codec=None, dimension_separator=None, @@ -243,7 +366,7 @@ def init_array( Parameters ---------- - store : MutableMapping + store : Store A mapping that supports string keys and bytes-like values. shape : int or tuple of ints Array shape. @@ -262,7 +385,7 @@ def init_array( If True, erase all data in `store` prior to initialisation. path : string, bytes, optional Path under which array is stored. - chunk_store : MutableMapping, optional + chunk_store : Store, optional Separate storage for chunks. If not provided, `store` will be used for storage of both chunks and metadata. filters : sequence, optional @@ -455,23 +578,23 @@ def _init_array_metadata( def init_group( - store: MutableMapping, + store: Store, overwrite: bool = False, path: Path = None, - chunk_store: MutableMapping = None, + chunk_store: Store = None, ): """Initialize a group store. Note that this is a low-level function and there should be no need to call this directly from user code. Parameters ---------- - store : MutableMapping + store : Store A mapping that supports string keys and byte sequence values. overwrite : bool, optional If True, erase all data in `store` prior to initialisation. path : string, optional Path under which array is stored. - chunk_store : MutableMapping, optional + chunk_store : Store, optional Separate storage for chunks. If not provided, `store` will be used for storage of both chunks and metadata. @@ -490,10 +613,10 @@ def init_group( def _init_group_metadata( - store: MutableMapping, + store: Store, overwrite: Optional[bool] = False, path: Optional[str] = None, - chunk_store: MutableMapping = None, + chunk_store: Store = None, ): # guard conditions @@ -525,7 +648,69 @@ def _dict_store_keys(d: Dict, prefix="", cls=dict): yield prefix + k -class MemoryStore(MutableMapping): +class KVStore(Store): + """ + This provide an default implementation of a store interface around + a mutable mapping, to avoid having to test stores for presence of methods + + This, for most method should just be a pass-through to the underlying KV + store which is likely to expose a MuttableMapping interface, + """ + + def __init__(self, mutablemapping): + self.mm = mutablemapping + + def __getitem__(self, key): + return self.mm[key] + + def __setitem__(self, key, value): + # assert isinstance(value, bytes) + self.mm[key] = value + + def __delitem__(self, key): + del self.mm[key] + + def get(self, key, default=None): + return self.mm.get(key, default) + + def values(self): + return self.mm.values() + + def __iter__(self): + return iter(self.mm) + + def __len__(self): + return len(self.mm) + + def __repr__(self): + return f"<{self.__class__.__name__}: \n{repr(self.mm)}\n at {hex(id(self))}>" + + def __eq__(self, other): + if isinstance(other, KVStore): + return self.mm == other.mm + else: + return NotImplemented + + # def __contains__(self, key): + # return key in self.mm + + # def pop(self): + # return self.mm.pop() + + # def popitem + # def clear + # def update + # def setdefault + # def __eq__ + # def __ne__ + # def __contains__ + # def keys() + # def items() + # def values() + # def get + + +class MemoryStore(Store): """Store class that uses a hierarchy of :class:`dict` objects, thus all data will be held in main memory. @@ -543,7 +728,7 @@ class MemoryStore(MutableMapping): >>> z = zarr.zeros(100) >>> type(z.store) - + Notes ----- @@ -729,7 +914,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) -class DirectoryStore(MutableMapping): +class DirectoryStore(Store): """Storage class using directories and files on a standard file system. Parameters @@ -1015,7 +1200,7 @@ def atexit_rmglob(path, rmtree(p) -class FSStore(MutableMapping): +class FSStore(Store): """Wraps an fsspec.FSMap to give access to arbitrary filesystems Requires that ``fsspec`` is installed, as well as any additional @@ -1359,7 +1544,7 @@ def listdir(self, path=None): # noinspection PyPep8Naming -class ZipStore(MutableMapping): +class ZipStore(Store): """Storage class using a Zip file. Parameters @@ -1451,6 +1636,8 @@ class also supports the context manager protocol, which ensures the ``close()`` """ + _erasable = False + def __init__(self, path, compression=zipfile.ZIP_STORED, allowZip64=True, mode='a', dimension_separator=None): @@ -1613,7 +1800,7 @@ def migrate_1to2(store): Parameters ---------- - store : MutableMapping + store : Store Store to be migrated. Notes @@ -1657,7 +1844,7 @@ def migrate_1to2(store): # noinspection PyShadowingBuiltins -class DBMStore(MutableMapping): +class DBMStore(Store): """Storage class using a DBM-style database. Parameters @@ -1849,7 +2036,7 @@ def __contains__(self, key): return key in self.db -class LMDBStore(MutableMapping): +class LMDBStore(Store): """Storage class using LMDB. Requires the `lmdb `_ package to be installed. @@ -2026,7 +2213,7 @@ def __len__(self): return self.db.stat()['entries'] -class LRUStoreCache(MutableMapping): +class LRUStoreCache(Store): """Storage class that implements a least-recently-used (LRU) cache layer over some other store. Intended primarily for use with stores that can be slow to access, e.g., remote stores that require network communication to store and @@ -2034,7 +2221,7 @@ class LRUStoreCache(MutableMapping): Parameters ---------- - store : MutableMapping + store : Store The store containing the actual data to be cached. max_size : int The maximum size that the cache may grow to, in number of bytes. Provide `None` @@ -2063,14 +2250,14 @@ class LRUStoreCache(MutableMapping): """ - def __init__(self, store, max_size): - self._store = store + def __init__(self, store: Store, max_size: int): + self._store = Store._ensure_store(store) self._max_size = max_size self._current_size = 0 self._keys_cache = None self._contains_cache = None - self._listdir_cache = dict() - self._values_cache = OrderedDict() + self._listdir_cache: Dict[Path, Any] = dict() + self._values_cache: Dict[Path, Any] = OrderedDict() self._mutex = Lock() self.hits = self.misses = 0 @@ -2110,7 +2297,7 @@ def _keys(self): self._keys_cache = list(self._store.keys()) return self._keys_cache - def listdir(self, path=None): + def listdir(self, path: Path = None): with self._mutex: try: return self._listdir_cache[path] @@ -2119,7 +2306,7 @@ def listdir(self, path=None): self._listdir_cache[path] = listing return listing - def getsize(self, path=None): + def getsize(self, path=None) -> int: return getsize(self._store, path=path) def _pop_value(self): @@ -2136,7 +2323,7 @@ def _accommodate_value(self, value_size): v = self._pop_value() self._current_size -= buffer_size(v) - def _cache_value(self, key, value): + def _cache_value(self, key: Path, value): # cache a value value_size = buffer_size(value) # check size of the value against max size, as if the value itself exceeds max @@ -2208,7 +2395,7 @@ def __delitem__(self, key): self._invalidate_value(key) -class ABSStore(MutableMapping): +class ABSStore(Store): """Storage class using Azure Blob Storage (ABS). Parameters @@ -2355,7 +2542,7 @@ def __contains__(self, key): blob_name = self._append_path_to_prefix(key) return self.client.get_blob_client(blob_name).exists() - def listdir(self, path=None): + def listdir(self, path: Path = None): dir_path = normalize_storage_path(self._append_path_to_prefix(path)) if dir_path: dir_path += '/' @@ -2398,7 +2585,7 @@ def clear(self): self.rmdir() -class SQLiteStore(MutableMapping): +class SQLiteStore(Store): """Storage class using SQLite. Parameters @@ -2601,7 +2788,7 @@ def clear(self): ) -class MongoDBStore(MutableMapping): +class MongoDBStore(Store): """Storage class using MongoDB. .. note:: This is an experimental feature. @@ -2684,7 +2871,7 @@ def clear(self): self.collection.delete_many({}) -class RedisStore(MutableMapping): +class RedisStore(Store): """Storage class using Redis. .. note:: This is an experimental feature. @@ -2753,7 +2940,7 @@ def clear(self): del self[key] -class ConsolidatedMetadataStore(MutableMapping): +class ConsolidatedMetadataStore(Store): """A layer over other storage, where the metadata has been consolidated into a single key. @@ -2777,7 +2964,7 @@ class ConsolidatedMetadataStore(MutableMapping): Parameters ---------- - store: MutableMapping + store: Store Containing the zarr array. metadata_key: str The target in the store where all of the metadata are stored. We @@ -2789,7 +2976,7 @@ class ConsolidatedMetadataStore(MutableMapping): """ - def __init__(self, store, metadata_key='.zmetadata'): + def __init__(self, store: Store, metadata_key=".zmetadata"): self.store = store # retrieve consolidated metadata @@ -2802,7 +2989,7 @@ def __init__(self, store, metadata_key='.zmetadata'): consolidated_format) # decode metadata - self.meta_store = meta['metadata'] + self.meta_store: Store = KVStore(meta["metadata"]) def __getitem__(self, key): return self.meta_store[key] diff --git a/zarr/tests/test_convenience.py b/zarr/tests/test_convenience.py index 20cd25027c..fab97d2492 100644 --- a/zarr/tests/test_convenience.py +++ b/zarr/tests/test_convenience.py @@ -23,8 +23,12 @@ from zarr.core import Array from zarr.errors import CopyError from zarr.hierarchy import Group, group -from zarr.storage import (ConsolidatedMetadataStore, MemoryStore, - atexit_rmtree, getsize) +from zarr.storage import ( + ConsolidatedMetadataStore, + MemoryStore, + atexit_rmtree, + getsize, +) def test_open_array(): diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index cd3efcd82f..3be1f8d3e6 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -23,11 +23,12 @@ ABSStore, DBMStore, DirectoryStore, + FSStore, + KVStore, LMDBStore, LRUStoreCache, NestedDirectoryStore, SQLiteStore, - FSStore, atexit_rmglob, atexit_rmtree, init_array, @@ -44,8 +45,8 @@ class TestArray(unittest.TestCase): def test_array_init(self): # normal initialization - store = dict() - init_array(store, shape=100, chunks=10, dtype=' end assert [] == list(z.islice(6, 5)) - if hasattr(z.store, 'close'): - z.store.close() + z.store.close() def test_iter(self): params = ( @@ -1419,8 +1363,7 @@ def test_iter(self): z[:] = a for expect, actual in zip_longest(a, z): assert_array_equal(expect, actual) - if hasattr(z.store, 'close'): - z.store.close() + z.store.close() def test_islice(self): params = ( @@ -1458,8 +1401,7 @@ def test_compressors(self): assert np.all(a[0:100] == 1) a[:] = 1 assert np.all(a[:] == 1) - if hasattr(a.store, 'close'): - a.store.close() + a.store.close() def test_endian(self): dtype = np.dtype('float32') @@ -1470,10 +1412,8 @@ def test_endian(self): a2[:] = 1 x2 = a2[:] assert_array_equal(x1, x2) - if hasattr(a1.store, 'close'): - a1.store.close() - if hasattr(a2.store, 'close'): - a2.store.close() + a1.store.close() + a2.store.close() def test_attributes(self): a = self.create_array(shape=10, chunks=10, dtype='i8') @@ -1487,8 +1427,7 @@ def test_attributes(self): attrs = json_loads(a.store[a.attrs.key]) assert 'foo' in attrs and attrs['foo'] == 'bar' assert 'bar' in attrs and attrs['bar'] == 'foo' - if hasattr(a.store, 'close'): - a.store.close() + a.store.close() class TestArrayWithPath(TestArray): @@ -1502,6 +1441,9 @@ def create_array(read_only=False, **kwargs): return Array(store, path='foo/bar', read_only=read_only, cache_metadata=cache_metadata, cache_attrs=cache_attrs) + def test_nchunks_initialized(self): + pass + def test_hexdigest(self): # Check basic 1-D array z = self.create_array(shape=(1050,), chunks=100, dtype=' Date: Sat, 27 Feb 2021 10:44:47 -0800 Subject: [PATCH 02/30] cleanup naming --- zarr/storage.py | 48 ++++++++++-------------------------------------- 1 file changed, 10 insertions(+), 38 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 06e213985f..ed6436bc89 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -147,19 +147,10 @@ def rmdir(self, path: str = "") -> None: path = normalize_storage_path(path) _rmdir_from_keys(self, path) - # def getsize(self, path: Path = None): - # pass - - # def clear(self): - # pass - def close(self) -> None: """Do nothing by default""" pass - # def pop(self, key): - # raise NotImplementedError - @staticmethod def _ensure_store(store): """ @@ -658,57 +649,38 @@ class KVStore(Store): """ def __init__(self, mutablemapping): - self.mm = mutablemapping + self._mutable_mapping = mutablemapping def __getitem__(self, key): - return self.mm[key] + return self._mutable_mapping[key] def __setitem__(self, key, value): - # assert isinstance(value, bytes) - self.mm[key] = value + self._mutable_mapping[key] = value def __delitem__(self, key): - del self.mm[key] + del self._mutable_mapping[key] def get(self, key, default=None): - return self.mm.get(key, default) + return self._mutable_mapping.get(key, default) def values(self): - return self.mm.values() + return self._mutable_mapping.values() def __iter__(self): - return iter(self.mm) + return iter(self._mutable_mapping) def __len__(self): - return len(self.mm) + return len(self._mutable_mapping) def __repr__(self): - return f"<{self.__class__.__name__}: \n{repr(self.mm)}\n at {hex(id(self))}>" + return f"<{self.__class__.__name__}: \n{repr(self._mutable_mapping)}\n at {hex(id(self))}>" def __eq__(self, other): if isinstance(other, KVStore): - return self.mm == other.mm + return self._mutable_mapping == other._mutable_mapping else: return NotImplemented - # def __contains__(self, key): - # return key in self.mm - - # def pop(self): - # return self.mm.pop() - - # def popitem - # def clear - # def update - # def setdefault - # def __eq__ - # def __ne__ - # def __contains__ - # def keys() - # def items() - # def values() - # def get - class MemoryStore(Store): """Store class that uses a hierarchy of :class:`dict` objects, thus all data From 6ca96315792b52c7ef42491b5035ab4250a4d71e Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Sat, 27 Feb 2021 10:49:21 -0800 Subject: [PATCH 03/30] zip move --- zarr/tests/test_hierarchy.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index a6bdc72891..27f9aec6ff 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -1017,6 +1017,11 @@ def test_context_manager(self): with pytest.raises(ValueError): store.zf.extractall() + def test_move(self): + # zip store is not erasable (can so far only append to a zip + # so we can't test for move. + pass + class TestGroupWithDBMStore(TestGroup): From b3db0f1266195b59fa0f92e1fe309c7e180cdc47 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Sat, 27 Feb 2021 10:55:31 -0800 Subject: [PATCH 04/30] fix erasability test --- zarr/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index ed6436bc89..f5445aa312 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -226,7 +226,7 @@ def rmdir(store: Store, path: Path = None): this will be called, otherwise will fall back to implementation via the `Store` interface.""" path = normalize_storage_path(path) - if hasattr(store, 'rmdir'): + if hasattr(store, "rmdir") and store.is_erasable(): # pass through store.rmdir(path) # type: ignore else: From 1db3be27d05fa4823a2f3f25a54f7face07656a5 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Sat, 27 Feb 2021 11:06:48 -0800 Subject: [PATCH 05/30] test for warning --- zarr/tests/test_storage.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 36a01365c2..6520c6f3db 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -91,8 +91,7 @@ def test_coverage_rename(): def test_deprecated_listdir_nosotre(): store = dict() - with warnings.catch_warnings(): - warnings.simplefilter("default") + with pytest.warns(UserWarning, match="has not `listdir`"): listdir(store) From 8d286780bfb452a815cbf92419f57765e59f0867 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 10 Mar 2021 11:58:21 -0800 Subject: [PATCH 06/30] please flake --- zarr/tests/test_storage.py | 1 - 1 file changed, 1 deletion(-) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 6520c6f3db..55c293430f 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -9,7 +9,6 @@ from contextlib import contextmanager from pickle import PicklingError from zipfile import ZipFile -import warnings import numpy as np import pytest From b5240e4dc25e73fefde5dd11dad21adca9481986 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 10 Mar 2021 12:14:11 -0800 Subject: [PATCH 07/30] remove uncovered lines --- zarr/tests/test_storage.py | 94 ++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 49 deletions(-) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 55c293430f..84305f9234 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -346,55 +346,51 @@ def test_hierarchy(self): # test rename (optional) if store.is_erasable(): - try: - store.rename("c/e", "c/e2") - assert "c/d" in store - assert "c/e" not in store - assert "c/e/f" not in store - assert "c/e/g" not in store - assert "c/e2" not in store - assert "c/e2/f" in store - assert "c/e2/g" in store - store.rename("c/e2", "c/e") - assert "c/d" in store - assert "c/e2" not in store - assert "c/e2/f" not in store - assert "c/e2/g" not in store - assert "c/e" not in store - assert "c/e/f" in store - assert "c/e/g" in store - store.rename("c", "c1/c2/c3") - assert "a" in store - assert "c" not in store - assert "c/d" not in store - assert "c/e" not in store - assert "c/e/f" not in store - assert "c/e/g" not in store - assert "c1" not in store - assert "c1/c2" not in store - assert "c1/c2/c3" not in store - assert "c1/c2/c3/d" in store - assert "c1/c2/c3/e" not in store - assert "c1/c2/c3/e/f" in store - assert "c1/c2/c3/e/g" in store - store.rename("c1/c2/c3", "c") - assert "c" not in store - assert "c/d" in store - assert "c/e" not in store - assert "c/e/f" in store - assert "c/e/g" in store - assert "c1" not in store - assert "c1/c2" not in store - assert "c1/c2/c3" not in store - assert "c1/c2/c3/d" not in store - assert "c1/c2/c3/e" not in store - assert "c1/c2/c3/e/f" not in store - assert "c1/c2/c3/e/g" not in store - except NotImplementedError: - pass - - # test rmdir (optional) - if store.is_erasable(): + store.rename("c/e", "c/e2") + assert "c/d" in store + assert "c/e" not in store + assert "c/e/f" not in store + assert "c/e/g" not in store + assert "c/e2" not in store + assert "c/e2/f" in store + assert "c/e2/g" in store + store.rename("c/e2", "c/e") + assert "c/d" in store + assert "c/e2" not in store + assert "c/e2/f" not in store + assert "c/e2/g" not in store + assert "c/e" not in store + assert "c/e/f" in store + assert "c/e/g" in store + store.rename("c", "c1/c2/c3") + assert "a" in store + assert "c" not in store + assert "c/d" not in store + assert "c/e" not in store + assert "c/e/f" not in store + assert "c/e/g" not in store + assert "c1" not in store + assert "c1/c2" not in store + assert "c1/c2/c3" not in store + assert "c1/c2/c3/d" in store + assert "c1/c2/c3/e" not in store + assert "c1/c2/c3/e/f" in store + assert "c1/c2/c3/e/g" in store + store.rename("c1/c2/c3", "c") + assert "c" not in store + assert "c/d" in store + assert "c/e" not in store + assert "c/e/f" in store + assert "c/e/g" in store + assert "c1" not in store + assert "c1/c2" not in store + assert "c1/c2/c3" not in store + assert "c1/c2/c3/d" not in store + assert "c1/c2/c3/e" not in store + assert "c1/c2/c3/e/f" not in store + assert "c1/c2/c3/e/g" not in store + + # test rmdir (optional) store.rmdir("c/e") assert "c/d" in store assert "c/e/f" not in store From b7aa2d876291fe89be7ee18dbde283926df6eae4 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 10 Mar 2021 12:52:31 -0800 Subject: [PATCH 08/30] remove uncovered lines in tests --- zarr/tests/test_hierarchy.py | 67 +++++++++++++++++------------------- 1 file changed, 32 insertions(+), 35 deletions(-) diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index 27f9aec6ff..e9a8c6cf79 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -738,42 +738,39 @@ def test_move(self): data = np.arange(100) g['foo'] = data - try: - g.move('foo', 'bar') - assert 'foo' not in g - assert 'bar' in g - assert_array_equal(data, g['bar']) + g.move("foo", "bar") + assert "foo" not in g + assert "bar" in g + assert_array_equal(data, g["bar"]) + + g.move("bar", "foo/bar") + assert "bar" not in g + assert "foo" in g + assert "foo/bar" in g + assert isinstance(g["foo"], Group) + assert_array_equal(data, g["foo/bar"]) + + g.move("foo", "foo2") + assert "foo" not in g + assert "foo/bar" not in g + assert "foo2" in g + assert "foo2/bar" in g + assert isinstance(g["foo2"], Group) + assert_array_equal(data, g["foo2/bar"]) + + g2 = g["foo2"] + g2.move("bar", "/bar") + assert "foo2" in g + assert "foo2/bar" not in g + assert "bar" in g + assert isinstance(g["foo2"], Group) + assert_array_equal(data, g["bar"]) - g.move('bar', 'foo/bar') - assert 'bar' not in g - assert 'foo' in g - assert 'foo/bar' in g - assert isinstance(g['foo'], Group) - assert_array_equal(data, g['foo/bar']) - - g.move('foo', 'foo2') - assert 'foo' not in g - assert 'foo/bar' not in g - assert 'foo2' in g - assert 'foo2/bar' in g - assert isinstance(g['foo2'], Group) - assert_array_equal(data, g['foo2/bar']) - - g2 = g['foo2'] - g2.move('bar', '/bar') - assert 'foo2' in g - assert 'foo2/bar' not in g - assert 'bar' in g - assert isinstance(g['foo2'], Group) - assert_array_equal(data, g['bar']) - - with pytest.raises(ValueError): - g2.move('bar', 'bar2') - - with pytest.raises(ValueError): - g.move('bar', 'boo') - except NotImplementedError: - pass + with pytest.raises(ValueError): + g2.move("bar", "bar2") + + with pytest.raises(ValueError): + g.move("bar", "boo") g.store.close() From 30afcbbc3bbf5aef5bb249cb69472be672567ecd Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 10 Mar 2021 12:54:53 -0800 Subject: [PATCH 09/30] pragma no cover for exceptional case --- zarr/storage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index f5445aa312..2364d2bc2e 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -136,14 +136,14 @@ def rename(self, src_path: str, dst_path: str) -> None: if not self.is_erasable(): raise NotImplementedError( f'{type(self)} is not erasable, cannot call "rmdir"' - ) + ) # pragma: no cover _rename_from_keys(self, src_path, dst_path) def rmdir(self, path: str = "") -> None: if not self.is_erasable(): raise NotImplementedError( f'{type(self)} is not erasable, cannot call "rmdir"' - ) + ) # pragma: no cover path = normalize_storage_path(path) _rmdir_from_keys(self, path) From 813671318edbca75b6bd0de3acde4ce555d026a2 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 23 Jun 2021 12:42:43 -0400 Subject: [PATCH 10/30] minor docstring fixes add assert statements to test_capabilities --- zarr/hierarchy.py | 2 +- zarr/storage.py | 28 ++++++++++++++-------------- zarr/tests/test_storage.py | 11 +++++------ 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 11717e572b..fc64c1cf96 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -254,7 +254,7 @@ def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): - """If the underlying Store should always heave a ``close`` method, call it.""" + """Call the close method of the underlying Store.""" self.store.close() def info_items(self): diff --git a/zarr/storage.py b/zarr/storage.py index 2364d2bc2e..01dd9a0bde 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -82,8 +82,8 @@ class Store(MutableMapping): """Base class for stores implementation. - - Provide a number of default method as well as other typing guaranties for mypy. + Provide a number of default method as well as other typing guaranties for + mypy. Stores cannot be mutable mapping as they do have a couple of other requirements that would break Liskov substitution principle (stores only @@ -96,7 +96,7 @@ class Store(MutableMapping): Stores can be used as context manager to make sure they close on exit. - .. added: 2.5.0 + .. added: 2.9.0 """ @@ -135,7 +135,7 @@ def listdir(self, path: str = "") -> List[str]: def rename(self, src_path: str, dst_path: str) -> None: if not self.is_erasable(): raise NotImplementedError( - f'{type(self)} is not erasable, cannot call "rmdir"' + f'{type(self)} is not erasable, cannot call "rename"' ) # pragma: no cover _rename_from_keys(self, src_path, dst_path) @@ -154,9 +154,9 @@ def close(self) -> None: @staticmethod def _ensure_store(store): """ - We want to make sure internally that zarr storage are internally always - a class with a specific interface derived from Store, which is slightly - different than mutable mapping. + We want to make sure internally that zarr stores are always a class + with a specific interface derived from ``Store``, which is slightly + different than ``MutableMapping``. We'll do this conversion in a few places automatically """ @@ -182,8 +182,8 @@ def _ensure_store(store): return KVStore(store) raise ValueError( - "Starting with Zarr X.y.z, stores must be subclasses of Store, if " - "you store expose the MutableMapping interface wrap them in " + "Starting with Zarr 2.9.0, stores must be subclasses of Store, if " + "your store exposes the MutableMapping interface wrap it in " f"Zarr.storage.KVStore. Got {store}" ) @@ -281,8 +281,8 @@ def listdir(store: Store, path: Path = None): else: # slow version, iterate through all keys warnings.warn( - "Store {store} has not `listdir` method. From zarr 2.5 you may want" - "to inherit from `Store`.".format(store=store), + f"Store {store} has no `listdir` method. From zarr 2.9 onwards " + "may want to inherit from `Store`.", stacklevel=2, ) return _listdir_from_keys(store, path) @@ -641,10 +641,10 @@ def _dict_store_keys(d: Dict, prefix="", cls=dict): class KVStore(Store): """ - This provide an default implementation of a store interface around - a mutable mapping, to avoid having to test stores for presence of methods + This provides a default implementation of a store interface around + a mutable mapping, to avoid having to test stores for presence of methods. - This, for most method should just be a pass-through to the underlying KV + This, for most methods should just be a pass-through to the underlying KV store which is likely to expose a MuttableMapping interface, """ diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 84305f9234..0d920c1293 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -68,11 +68,10 @@ class InvalidStore: def test_capabilities(): s = KVStore(dict()) - s.is_readable() - s.is_listable() - s.is_erasable() - s.is_writeable() - + assert s.is_readable() + assert s.is_listable() + assert s.is_erasable() + assert s.is_writeable() def test_getsize_non_implemented(): assert getsize(object()) == -1 @@ -90,7 +89,7 @@ def test_coverage_rename(): def test_deprecated_listdir_nosotre(): store = dict() - with pytest.warns(UserWarning, match="has not `listdir`"): + with pytest.warns(UserWarning, match="has no `listdir`"): listdir(store) From e38a9684043d101f82db09d832d6f0df672f2433 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 23 Jun 2021 16:04:57 -0400 Subject: [PATCH 11/30] pep8 fix --- zarr/tests/test_storage.py | 1 + 1 file changed, 1 insertion(+) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 0d920c1293..4e8c79918c 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -73,6 +73,7 @@ def test_capabilities(): assert s.is_erasable() assert s.is_writeable() + def test_getsize_non_implemented(): assert getsize(object()) == -1 From eb2a6da51bf8761b4994c201bd861734fabba810 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Thu, 24 Jun 2021 08:51:38 -0400 Subject: [PATCH 12/30] avoid NumPy 1.21.0 due to https://github.com/numpy/numpy/issues/19325 --- .github/workflows/python-package.yml | 2 +- requirements_rtfd.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index e143d4e2c5..d9bc362d12 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -16,7 +16,7 @@ jobs: strategy: matrix: python-version: [3.7, 3.8] - numpy_version: ['', '==1.17.*'] + numpy_version: ['!=1.21.0', '==1.17.*'] services: redis: image: redis diff --git a/requirements_rtfd.txt b/requirements_rtfd.txt index fbf38a025d..2cdb12377d 100644 --- a/requirements_rtfd.txt +++ b/requirements_rtfd.txt @@ -5,4 +5,4 @@ sphinx sphinx-issues sphinx-rtd-theme numpydoc -numpy +numpy!=1.21.0 From 6796f794467c5331306e458a67b530554afa51f3 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 10 Sep 2021 14:48:45 -0400 Subject: [PATCH 13/30] move Store class and some helper functions to zarr._storage.store update version in Store docstring --- zarr/_storage/store.py | 159 +++++++++++++++++++++++++++++++++++++++++ zarr/storage.py | 159 +++-------------------------------------- 2 files changed, 167 insertions(+), 151 deletions(-) create mode 100644 zarr/_storage/store.py diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py new file mode 100644 index 0000000000..302e234c40 --- /dev/null +++ b/zarr/_storage/store.py @@ -0,0 +1,159 @@ +from collections.abc import MutableMapping +from typing import Optional, List + +from zarr.util import normalize_storage_path + +# v2 store keys +array_meta_key = '.zarray' +group_meta_key = '.zgroup' +attrs_key = '.zattrs' + + +class Store(MutableMapping): + """Base class for stores implementation. + + Provide a number of default method as well as other typing guaranties for + mypy. + + Stores cannot be mutable mapping as they do have a couple of other + requirements that would break Liskov substitution principle (stores only + allow strings as keys, mutable mapping are more generic). + + And Stores do requires a few other method. + + Having no-op base method also helps simplifying store usage and do not need + to check the presence of attributes and methods, like `close()`. + + Stores can be used as context manager to make sure they close on exit. + + .. added: 2.10.0 + + """ + + _readable = True + _writeable = True + _erasable = True + _listable = True + + def is_readable(self): + return self._readable + + def is_writeable(self): + return self._writeable + + def is_listable(self): + return self._listable + + def is_erasable(self): + return self._erasable + + def __enter__(self): + if not hasattr(self, "_open_count"): + self._open_count = 0 + self._open_count += 1 + return self + + def __exit__(self, exc_type, exc_value, traceback): + self._open_count -= 1 + if self._open_count == 0: + self.close() + + def listdir(self, path: str = "") -> List[str]: + path = normalize_storage_path(path) + return _listdir_from_keys(self, path) + + def rename(self, src_path: str, dst_path: str) -> None: + if not self.is_erasable(): + raise NotImplementedError( + f'{type(self)} is not erasable, cannot call "rename"' + ) # pragma: no cover + _rename_from_keys(self, src_path, dst_path) + + def rmdir(self, path: str = "") -> None: + if not self.is_erasable(): + raise NotImplementedError( + f'{type(self)} is not erasable, cannot call "rmdir"' + ) # pragma: no cover + path = normalize_storage_path(path) + _rmdir_from_keys(self, path) + + def close(self) -> None: + """Do nothing by default""" + pass + + @staticmethod + def _ensure_store(store): + """ + We want to make sure internally that zarr stores are always a class + with a specific interface derived from ``Store``, which is slightly + different than ``MutableMapping``. + + We'll do this conversion in a few places automatically + """ + from zarr.storage import KVStore # avoid circular import + + if store is None: + return None + elif isinstance(store, Store): + return store + elif isinstance(store, MutableMapping): + return KVStore(store) + else: + for attr in [ + "keys", + "values", + "get", + "__setitem__", + "__getitem__", + "__delitem__", + "__contains__", + ]: + if not hasattr(store, attr): + break + else: + return KVStore(store) + + raise ValueError( + "Starting with Zarr 2.9.0, stores must be subclasses of Store, if " + "your store exposes the MutableMapping interface wrap it in " + f"Zarr.storage.KVStore. Got {store}" + ) + + +def _path_to_prefix(path: Optional[str]) -> str: + # assume path already normalized + if path: + prefix = path + '/' + else: + prefix = '' + return prefix + + +def _rename_from_keys(store: Store, src_path: str, dst_path: str) -> None: + # assume path already normalized + src_prefix = _path_to_prefix(src_path) + dst_prefix = _path_to_prefix(dst_path) + for key in list(store.keys()): + if key.startswith(src_prefix): + new_key = dst_prefix + key.lstrip(src_prefix) + store[new_key] = store.pop(key) + + +def _rmdir_from_keys(store: Store, path: Optional[str] = None) -> None: + # assume path already normalized + prefix = _path_to_prefix(path) + for key in list(store.keys()): + if key.startswith(prefix): + del store[key] + + +def _listdir_from_keys(store: Store, path: Optional[str] = None) -> List[str]: + # assume path already normalized + prefix = _path_to_prefix(path) + children = set() + for key in list(store.keys()): + if key.startswith(prefix) and len(key) > len(prefix): + suffix = key[len(prefix):] + child = suffix.split('/')[0] + children.add(child) + return sorted(children) diff --git a/zarr/storage.py b/zarr/storage.py index b6c4f840e7..3793179fb5 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -57,6 +57,14 @@ normalize_shape, normalize_storage_path, retry_call) from zarr._storage.absstore import ABSStore # noqa: F401 +from zarr._storage.store import (_listdir_from_keys, + _path_to_prefix, + _rename_from_keys, + _rmdir_from_keys, + array_meta_key, + group_meta_key, + attrs_key, + Store) __doctest_requires__ = { ('RedisStore', 'RedisStore.*'): ['redis'], @@ -65,9 +73,6 @@ } -array_meta_key = '.zarray' -group_meta_key = '.zgroup' -attrs_key = '.zattrs' try: # noinspection PyUnresolvedReferences from zarr.codecs import Blosc @@ -80,124 +85,6 @@ Path = Union[str, bytes, None] -class Store(MutableMapping): - """Base class for stores implementation. - - Provide a number of default method as well as other typing guaranties for - mypy. - - Stores cannot be mutable mapping as they do have a couple of other - requirements that would break Liskov substitution principle (stores only - allow strings as keys, mutable mapping are more generic). - - And Stores do requires a few other method. - - Having no-op base method also helps simplifying store usage and do not need - to check the presence of attributes and methods, like `close()`. - - Stores can be used as context manager to make sure they close on exit. - - .. added: 2.9.0 - - """ - - _readable = True - _writeable = True - _erasable = True - _listable = True - - def is_readable(self): - return self._readable - - def is_writeable(self): - return self._writeable - - def is_listable(self): - return self._listable - - def is_erasable(self): - return self._erasable - - def __enter__(self): - if not hasattr(self, "_open_count"): - self._open_count = 0 - self._open_count += 1 - return self - - def __exit__(self, exc_type, exc_value, traceback): - self._open_count -= 1 - if self._open_count == 0: - self.close() - - def listdir(self, path: str = "") -> List[str]: - path = normalize_storage_path(path) - return _listdir_from_keys(self, path) - - def rename(self, src_path: str, dst_path: str) -> None: - if not self.is_erasable(): - raise NotImplementedError( - f'{type(self)} is not erasable, cannot call "rename"' - ) # pragma: no cover - _rename_from_keys(self, src_path, dst_path) - - def rmdir(self, path: str = "") -> None: - if not self.is_erasable(): - raise NotImplementedError( - f'{type(self)} is not erasable, cannot call "rmdir"' - ) # pragma: no cover - path = normalize_storage_path(path) - _rmdir_from_keys(self, path) - - def close(self) -> None: - """Do nothing by default""" - pass - - @staticmethod - def _ensure_store(store): - """ - We want to make sure internally that zarr stores are always a class - with a specific interface derived from ``Store``, which is slightly - different than ``MutableMapping``. - - We'll do this conversion in a few places automatically - """ - if store is None: - return None - elif isinstance(store, Store): - return store - elif isinstance(store, MutableMapping): - return KVStore(store) - else: - for attr in [ - "keys", - "values", - "get", - "__setitem__", - "__getitem__", - "__delitem__", - "__contains__", - ]: - if not hasattr(store, attr): - break - else: - return KVStore(store) - - raise ValueError( - "Starting with Zarr 2.9.0, stores must be subclasses of Store, if " - "your store exposes the MutableMapping interface wrap it in " - f"Zarr.storage.KVStore. Got {store}" - ) - - -def _path_to_prefix(path: Optional[str]) -> str: - # assume path already normalized - if path: - prefix = path + '/' - else: - prefix = '' - return prefix - - def contains_array(store: Store, path: Path = None) -> bool: """Return True if the store contains an array at the given logical path.""" path = normalize_storage_path(path) @@ -214,14 +101,6 @@ def contains_group(store: Store, path: Path = None) -> bool: return key in store -def _rmdir_from_keys(store: Store, path: Optional[str] = None) -> None: - # assume path already normalized - prefix = _path_to_prefix(path) - for key in list(store.keys()): - if key.startswith(prefix): - del store[key] - - def rmdir(store: Store, path: Path = None): """Remove all items under the given path. If `store` provides a `rmdir` method, this will be called, otherwise will fall back to implementation via the @@ -235,16 +114,6 @@ def rmdir(store: Store, path: Path = None): _rmdir_from_keys(store, path) -def _rename_from_keys(store: Store, src_path: str, dst_path: str) -> None: - # assume path already normalized - src_prefix = _path_to_prefix(src_path) - dst_prefix = _path_to_prefix(dst_path) - for key in list(store.keys()): - if key.startswith(src_prefix): - new_key = dst_prefix + key.lstrip(src_prefix) - store[new_key] = store.pop(key) - - def rename(store: Store, src_path: Path, dst_path: Path): """Rename all items under the given path. If `store` provides a `rename` method, this will be called, otherwise will fall back to implementation via the @@ -259,18 +128,6 @@ def rename(store: Store, src_path: Path, dst_path: Path): _rename_from_keys(store, src_path, dst_path) -def _listdir_from_keys(store: Store, path: Optional[str] = None) -> List[str]: - # assume path already normalized - prefix = _path_to_prefix(path) - children = set() - for key in list(store.keys()): - if key.startswith(prefix) and len(key) > len(prefix): - suffix = key[len(prefix):] - child = suffix.split('/')[0] - children.add(child) - return sorted(children) - - def listdir(store: Store, path: Path = None): """Obtain a directory listing for the given path. If `store` provides a `listdir` method, this will be called, otherwise will fall back to implementation via the From d0a1b00629fc09663dac7663ec4df0e0cdb1d13f Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 10 Sep 2021 15:20:57 -0400 Subject: [PATCH 14/30] BUG: ABSStore should inherit from Store --- zarr/_storage/absstore.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/zarr/_storage/absstore.py b/zarr/_storage/absstore.py index 0dc5bf1892..ad03c7a023 100644 --- a/zarr/_storage/absstore.py +++ b/zarr/_storage/absstore.py @@ -4,13 +4,14 @@ from collections.abc import MutableMapping from numcodecs.compat import ensure_bytes from zarr.util import normalize_storage_path +from zarr._storage.store import Store __doctest_requires__ = { ('ABSStore', 'ABSStore.*'): ['azure.storage.blob'], } -class ABSStore(MutableMapping): +class ABSStore(Store): """Storage class using Azure Blob Storage (ABS). Parameters From 3d39d8c47478832ffabd5ad20f89b4931fffa26b Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 10 Sep 2021 17:26:01 -0400 Subject: [PATCH 15/30] pep8 fix --- zarr/_storage/absstore.py | 1 - zarr/convenience.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/zarr/_storage/absstore.py b/zarr/_storage/absstore.py index ad03c7a023..01bfbd5039 100644 --- a/zarr/_storage/absstore.py +++ b/zarr/_storage/absstore.py @@ -1,7 +1,6 @@ """This module contains storage classes related to Azure Blob Storage (ABS)""" import warnings -from collections.abc import MutableMapping from numcodecs.compat import ensure_bytes from zarr.util import normalize_storage_path from zarr._storage.store import Store diff --git a/zarr/convenience.py b/zarr/convenience.py index 24251fa95e..1de7d5fd8a 100644 --- a/zarr/convenience.py +++ b/zarr/convenience.py @@ -213,7 +213,7 @@ def save_group(store: StoreLike, *args, **kwargs): raise ValueError('at least one array must be provided') # handle polymorphic store arg may_need_closing = _might_close(store) - _store: Store = normalize_store_arg(store, clobber=True) + _store: Store = normalize_store_arg(store, clobber=True) try: grp = _create_group(_store, overwrite=True) for i, arr in enumerate(args): From 5f7c7f58b8a6f80f5178c1fa0d1da3f8b574960f Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 22 Sep 2021 15:32:01 -0400 Subject: [PATCH 16/30] TST: make CustomMapping a subclass of Store TST: initialize stores with KVStore(dict()) instead of bare dict() --- zarr/tests/test_core.py | 34 +++++++++++++++++++--------------- zarr/tests/test_storage.py | 6 +++--- zarr/tests/test_sync.py | 5 +++-- 3 files changed, 25 insertions(+), 20 deletions(-) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index f44a98ae0a..97ad3b68e2 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -30,6 +30,7 @@ LRUStoreCache, NestedDirectoryStore, SQLiteStore, + Store, atexit_rmglob, atexit_rmtree, init_array, @@ -73,18 +74,18 @@ def test_array_init(self): assert "8fecb7a17ea1493d9c1430d04437b4f5b0b34985" == a.hexdigest() # store not initialized - store = dict() + store = KVStore(dict()) with pytest.raises(ValueError): Array(store) # group is in the way - store = dict() + store = KVStore(dict()) init_group(store, path='baz') with pytest.raises(ValueError): Array(store, path='baz') def create_array(self, read_only=False, **kwargs): - store = dict() + store = KVStore(dict()) kwargs.setdefault('compressor', Zlib(level=1)) cache_metadata = kwargs.pop('cache_metadata', True) cache_attrs = kwargs.pop('cache_attrs', True) @@ -1481,7 +1482,7 @@ class TestArrayWithPath(TestArray): @staticmethod def create_array(read_only=False, **kwargs): - store = dict() + store = KVStore(dict()) cache_metadata = kwargs.pop('cache_metadata', True) cache_attrs = kwargs.pop('cache_attrs', True) init_array(store, path='foo/bar', **kwargs) @@ -1537,9 +1538,9 @@ class TestArrayWithChunkStore(TestArray): @staticmethod def create_array(read_only=False, **kwargs): - store = dict() + store = KVStore(dict()) # separate chunk store - chunk_store = dict() + chunk_store = KVStore(dict()) cache_metadata = kwargs.pop('cache_metadata', True) cache_attrs = kwargs.pop('cache_attrs', True) init_array(store, chunk_store=chunk_store, **kwargs) @@ -2060,7 +2061,7 @@ def test_nbytes_stored(self): class TestArrayWithNoCompressor(TestArray): def create_array(self, read_only=False, **kwargs): - store = dict() + store = KVStore(dict()) kwargs.setdefault('compressor', None) cache_metadata = kwargs.pop('cache_metadata', True) cache_attrs = kwargs.pop('cache_attrs', True) @@ -2095,7 +2096,7 @@ def test_hexdigest(self): class TestArrayWithBZ2Compressor(TestArray): def create_array(self, read_only=False, **kwargs): - store = dict() + store = KVStore(dict()) compressor = BZ2(level=1) kwargs.setdefault('compressor', compressor) cache_metadata = kwargs.pop('cache_metadata', True) @@ -2131,7 +2132,7 @@ def test_hexdigest(self): class TestArrayWithBloscCompressor(TestArray): def create_array(self, read_only=False, **kwargs): - store = dict() + store = KVStore(dict()) compressor = Blosc(cname='zstd', clevel=1, shuffle=1) kwargs.setdefault('compressor', compressor) cache_metadata = kwargs.pop('cache_metadata', True) @@ -2174,7 +2175,7 @@ def test_hexdigest(self): class TestArrayWithLZMACompressor(TestArray): def create_array(self, read_only=False, **kwargs): - store = dict() + store = KVStore(dict()) compressor = LZMA(preset=1) kwargs.setdefault('compressor', compressor) cache_metadata = kwargs.pop('cache_metadata', True) @@ -2211,7 +2212,7 @@ class TestArrayWithFilters(TestArray): @staticmethod def create_array(read_only=False, **kwargs): - store = dict() + store = KVStore(dict()) dtype = kwargs.get('dtype', None) filters = [ Delta(dtype=dtype), @@ -2254,7 +2255,7 @@ def test_astype_no_filters(self): dtype = np.dtype(np.int8) astype = np.dtype(np.float32) - store = dict() + store = KVStore(dict()) init_array(store, shape=shape, chunks=10, dtype=dtype) data = np.arange(np.prod(shape), dtype=dtype).reshape(shape) @@ -2329,14 +2330,17 @@ def test_structured_array_contain_object(self): # custom store, does not support getsize() -class CustomMapping(object): +class CustomMapping(Store): def __init__(self): - self.inner = dict() + self.inner = KVStore(dict()) def __iter__(self): return iter(self.keys()) + def __len__(self): + return len(self.inner) + def keys(self): return self.inner.keys() @@ -2385,7 +2389,7 @@ class TestArrayNoCache(TestArray): @staticmethod def create_array(read_only=False, **kwargs): - store = dict() + store = KVStore(dict()) kwargs.setdefault('compressor', Zlib(level=1)) cache_metadata = kwargs.pop('cache_metadata', True) cache_attrs = kwargs.pop('cache_attrs', True) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 62e2525990..c76fa38f22 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1804,7 +1804,7 @@ def test_migrate_1to2(): # concerned about migrating a single array at the root of the store # setup - store = dict() + store = KVStore(dict()) meta = dict( shape=(100,), chunks=(10,), @@ -1842,7 +1842,7 @@ def test_migrate_1to2(): assert meta_migrated['compressor'] == Zlib(1).get_config() # check dict compression_opts - store = dict() + store = KVStore(dict()) meta['compression'] = 'blosc' meta['compression_opts'] = dict(cname='lz4', clevel=5, shuffle=1) meta_json = meta_v1.encode_metadata(meta) @@ -1856,7 +1856,7 @@ def test_migrate_1to2(): Blosc(cname='lz4', clevel=5, shuffle=1).get_config()) # check 'none' compression is migrated to None (null in JSON) - store = dict() + store = KVStore(dict()) meta['compression'] = 'none' meta_json = meta_v1.encode_metadata(meta) store['meta'] = meta_json diff --git a/zarr/tests/test_sync.py b/zarr/tests/test_sync.py index 51b7fe0e10..d420df7c68 100644 --- a/zarr/tests/test_sync.py +++ b/zarr/tests/test_sync.py @@ -12,7 +12,8 @@ from zarr.attrs import Attributes from zarr.core import Array from zarr.hierarchy import Group -from zarr.storage import DirectoryStore, atexit_rmtree, init_array, init_group +from zarr.storage import (DirectoryStore, KVStore, atexit_rmtree, init_array, + init_group) from zarr.sync import ProcessSynchronizer, ThreadSynchronizer from zarr.tests.test_attrs import TestAttributes from zarr.tests.test_core import TestArray @@ -96,7 +97,7 @@ def test_parallel_append(self): class TestArrayWithThreadSynchronizer(TestArray, MixinArraySyncTests): def create_array(self, read_only=False, **kwargs): - store = dict() + store = KVStore(dict()) cache_metadata = kwargs.pop('cache_metadata', True) cache_attrs = kwargs.pop('cache_attrs', True) init_array(store, **kwargs) From 52deac068410d37b1240124aa24782393006406b Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 22 Sep 2021 15:35:17 -0400 Subject: [PATCH 17/30] update version mentioned in Store docstring --- zarr/_storage/store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 302e234c40..fb151d8547 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -26,7 +26,7 @@ class Store(MutableMapping): Stores can be used as context manager to make sure they close on exit. - .. added: 2.10.0 + .. added: 2.11.0 """ From a7cc4dbb193bb8533db5fd7e2ab475dc5b334720 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 22 Sep 2021 15:36:09 -0400 Subject: [PATCH 18/30] update version mentioned in warning message --- zarr/_storage/store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index fb151d8547..7037e2822c 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -114,7 +114,7 @@ def _ensure_store(store): return KVStore(store) raise ValueError( - "Starting with Zarr 2.9.0, stores must be subclasses of Store, if " + "Starting with Zarr 2.11.0, stores must be subclasses of Store, if " "your store exposes the MutableMapping interface wrap it in " f"Zarr.storage.KVStore. Got {store}" ) From c59f3be239af3709889fc33f1dcff197177dcbfb Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 22 Sep 2021 15:50:11 -0400 Subject: [PATCH 19/30] use Store._ensure_store in Attributes class ensures Attributes.store is a Store --- zarr/attrs.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/zarr/attrs.py b/zarr/attrs.py index ea6b831608..ec01dbe04f 100644 --- a/zarr/attrs.py +++ b/zarr/attrs.py @@ -1,6 +1,7 @@ from collections.abc import MutableMapping from zarr.meta import parse_metadata +from zarr._storage.store import Store from zarr.util import json_dumps @@ -26,7 +27,7 @@ class Attributes(MutableMapping): def __init__(self, store, key='.zattrs', read_only=False, cache=True, synchronizer=None): - self.store = store + self.store = Store._ensure_store(store) self.key = key self.read_only = read_only self.cache = cache From a9a98a97e3d0547aba0fc94fb3b7fd1a0709f473 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 22 Sep 2021 15:57:18 -0400 Subject: [PATCH 20/30] TST: add Attributes test case ensuring store gets coerced to a Store --- zarr/tests/test_attrs.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/zarr/tests/test_attrs.py b/zarr/tests/test_attrs.py index 2aced3abaa..01d9640f64 100644 --- a/zarr/tests/test_attrs.py +++ b/zarr/tests/test_attrs.py @@ -5,17 +5,23 @@ from zarr.attrs import Attributes from zarr.tests.util import CountingDict +from zarr.storage import KVStore -class TestAttributes(unittest.TestCase): +class TestAttributes(): def init_attributes(self, store, read_only=False, cache=True): return Attributes(store, key='attrs', read_only=read_only, cache=cache) - def test_storage(self): + @pytest.mark.parametrize('store_from_dict', [False, True]) + def test_storage(self, store_from_dict): - store = dict() + if store_from_dict: + store = dict() + else: + store = KVStore(dict()) a = Attributes(store=store, key='attrs') + assert isinstance(a.store, KVStore) assert 'foo' not in a assert 'bar' not in a assert dict() == a.asdict() From 0a6d92341cb0aceffff093d5a06aac4c4745a536 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 22 Sep 2021 15:51:20 -0400 Subject: [PATCH 21/30] use Store._ensure_store in normalize_store_arg ensures open_array, etc can work when the user supplies a dict --- zarr/creation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/creation.py b/zarr/creation.py index 854cd91890..968ce8f20f 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -167,7 +167,7 @@ def normalize_store_arg(store, clobber=False, storage_options=None, mode="w") -> return DirectoryStore(store) else: if not isinstance(store, Store) and isinstance(store, MutableMapping): - store = KVStore(store) + store = Store._ensure_store(store) return store From a40ed54a360e5e6bb7fb356d6da0c53485f4950c Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 22 Sep 2021 16:25:45 -0400 Subject: [PATCH 22/30] TST: make sure high level creation functions also work when passed a dict for store --- zarr/tests/test_creation.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/zarr/tests/test_creation.py b/zarr/tests/test_creation.py index bf25bc618c..117bc338b6 100644 --- a/zarr/tests/test_creation.py +++ b/zarr/tests/test_creation.py @@ -15,7 +15,7 @@ zeros_like) from zarr.hierarchy import open_group from zarr.n5 import N5Store -from zarr.storage import DirectoryStore +from zarr.storage import DirectoryStore, KVStore from zarr.sync import ThreadSynchronizer @@ -272,6 +272,30 @@ def test_open_array(): assert_array_equal(np.full(100, fill_value=42), a[:]) +def test_open_array_dict_store(): + + # dict will become a KVStore + store = dict() + + # mode == 'w' + z = open_array(store, mode='w', shape=100, chunks=10) + z[:] = 42 + assert isinstance(z, Array) + assert isinstance(z.store, KVStore) + assert (100,) == z.shape + assert (10,) == z.chunks + assert_array_equal(np.full(100, fill_value=42), z[:]) + + +def test_create_in_dict(): + for func in [empty, zeros, ones]: + a = func(100, store=dict()) + assert isinstance(a.store, KVStore) + + a = full(100, 5, store=dict()) + assert isinstance(a.store, KVStore) + + def test_empty_like(): # zarr array From 3fa6f51fbc1ea2137e14e555ca3d63ab10f7b9ad Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 22 Sep 2021 16:34:08 -0400 Subject: [PATCH 23/30] TST: add test case with group initialized from dict --- zarr/tests/test_hierarchy.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index e9a8c6cf79..000b8ebf7a 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -905,6 +905,18 @@ def test_context_manager(self): d[:] = np.arange(100) +def test_group_init_from_dict(): + store, chunk_store = dict(), None + init_group(store, path=None, chunk_store=chunk_store) + g = Group(store, path=None, read_only=False, chunk_store=chunk_store) + assert store is not g.store + assert isinstance(g.store, KVStore) + if chunk_store is None: + assert g.store is g.chunk_store + else: + assert chunk_store is g.chunk_store + + class TestGroupWithMemoryStore(TestGroup): @staticmethod From 0f34ad24ba50d5cab8cb477d574c0549b2741a21 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 22 Sep 2021 16:36:48 -0400 Subject: [PATCH 24/30] TST: add test case with Array initialized from dict --- zarr/tests/test_core.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 5e3d83be59..1e7d7754c6 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -1613,6 +1613,16 @@ def test_nbytes_stored(self): assert expect_nbytes_stored == z.nbytes_stored +def test_array_init_from_dict(): + # initialization via non-Store MutableMapping + store = dict() + init_array(store, shape=100, chunks=10, dtype=" Date: Wed, 22 Sep 2021 16:42:49 -0400 Subject: [PATCH 25/30] change CustomMapping back to type object, not Store want to test the non-Store code path in _ensure_store --- zarr/tests/test_core.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 1e7d7754c6..eb021ea017 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -2356,7 +2356,7 @@ def test_structured_array_contain_object(self): # custom store, does not support getsize() -class CustomMapping(Store): +class CustomMapping(object): def __init__(self): self.inner = KVStore(dict()) @@ -2364,9 +2364,6 @@ def __init__(self): def __iter__(self): return iter(self.keys()) - def __len__(self): - return len(self.inner) - def keys(self): return self.inner.keys() From cb1c5f05871668975a13815da5d643f1f6f00b14 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 24 Sep 2021 14:04:59 -0400 Subject: [PATCH 26/30] pep8 fixes --- zarr/creation.py | 2 +- zarr/tests/test_attrs.py | 1 - zarr/tests/test_core.py | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/zarr/creation.py b/zarr/creation.py index 968ce8f20f..9836b062db 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -12,7 +12,7 @@ ContainsGroupError, ) from zarr.n5 import N5Store -from zarr.storage import (DirectoryStore, ZipStore, KVStore, contains_array, +from zarr.storage import (DirectoryStore, ZipStore, contains_array, contains_group, default_compressor, init_array, normalize_storage_path, FSStore, Store) from zarr.util import normalize_dimension_separator diff --git a/zarr/tests/test_attrs.py b/zarr/tests/test_attrs.py index 01d9640f64..b2de736d4a 100644 --- a/zarr/tests/test_attrs.py +++ b/zarr/tests/test_attrs.py @@ -1,5 +1,4 @@ import json -import unittest import pytest diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index eb021ea017..35294c9d43 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -30,7 +30,6 @@ LRUStoreCache, NestedDirectoryStore, SQLiteStore, - Store, atexit_rmglob, atexit_rmtree, init_array, From 6ce098145df323ee8c93280d1ea88f167c99fcb6 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 24 Sep 2021 16:27:40 -0400 Subject: [PATCH 27/30] update/fix new hierarchy test case to complete code coverage --- zarr/tests/test_hierarchy.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index 000b8ebf7a..1b4f96a73f 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -905,8 +905,12 @@ def test_context_manager(self): d[:] = np.arange(100) -def test_group_init_from_dict(): - store, chunk_store = dict(), None +@pytest.mark.parametrize('chunk_dict', [False, True]) +def test_group_init_from_dict(chunk_dict): + if chunk_dict: + store, chunk_store = dict(), dict() + else: + store, chunk_store = dict(), None init_group(store, path=None, chunk_store=chunk_store) g = Group(store, path=None, read_only=False, chunk_store=chunk_store) assert store is not g.store @@ -914,7 +918,7 @@ def test_group_init_from_dict(): if chunk_store is None: assert g.store is g.chunk_store else: - assert chunk_store is g.chunk_store + assert chunk_store is not g.chunk_store class TestGroupWithMemoryStore(TestGroup): From 82f7376703a586c8bf149ea0e60da5ee21e96e5a Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 20 Oct 2021 18:36:23 -0400 Subject: [PATCH 28/30] create a BaseStore parent for Store BaseStore does not have the listdir or rmdir methods cleaned up some type declerations, making sure mypy passes --- zarr/_storage/store.py | 72 ++++++++++++++++++++++++------------------ zarr/convenience.py | 16 +++++----- zarr/core.py | 8 ++--- zarr/creation.py | 30 +++--------------- zarr/hierarchy.py | 6 ++-- zarr/storage.py | 67 +++++++++++++++++++++++++++------------ 6 files changed, 107 insertions(+), 92 deletions(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 7037e2822c..d68da68304 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -1,5 +1,6 @@ +from abc import abstractmethod from collections.abc import MutableMapping -from typing import Optional, List +from typing import Any, List, Optional, Union from zarr.util import normalize_storage_path @@ -9,18 +10,16 @@ attrs_key = '.zattrs' -class Store(MutableMapping): - """Base class for stores implementation. +class BaseStore(MutableMapping): + """Abstract base class for store implementations. - Provide a number of default method as well as other typing guaranties for - mypy. + This is a thin wrapper over MutableMapping that provides methods to check + whether a store is readable, writeable, eraseable and or listable. Stores cannot be mutable mapping as they do have a couple of other requirements that would break Liskov substitution principle (stores only allow strings as keys, mutable mapping are more generic). - And Stores do requires a few other method. - Having no-op base method also helps simplifying store usage and do not need to check the presence of attributes and methods, like `close()`. @@ -58,9 +57,9 @@ def __exit__(self, exc_type, exc_value, traceback): if self._open_count == 0: self.close() - def listdir(self, path: str = "") -> List[str]: - path = normalize_storage_path(path) - return _listdir_from_keys(self, path) + def close(self) -> None: + """Do nothing by default""" + pass def rename(self, src_path: str, dst_path: str) -> None: if not self.is_erasable(): @@ -69,23 +68,11 @@ def rename(self, src_path: str, dst_path: str) -> None: ) # pragma: no cover _rename_from_keys(self, src_path, dst_path) - def rmdir(self, path: str = "") -> None: - if not self.is_erasable(): - raise NotImplementedError( - f'{type(self)} is not erasable, cannot call "rmdir"' - ) # pragma: no cover - path = normalize_storage_path(path) - _rmdir_from_keys(self, path) - - def close(self) -> None: - """Do nothing by default""" - pass - @staticmethod - def _ensure_store(store): + def _ensure_store(store: Any): """ We want to make sure internally that zarr stores are always a class - with a specific interface derived from ``Store``, which is slightly + with a specific interface derived from ``BaseStore``, which is slightly different than ``MutableMapping``. We'll do this conversion in a few places automatically @@ -94,7 +81,7 @@ def _ensure_store(store): if store is None: return None - elif isinstance(store, Store): + elif isinstance(store, BaseStore): return store elif isinstance(store, MutableMapping): return KVStore(store) @@ -114,12 +101,35 @@ def _ensure_store(store): return KVStore(store) raise ValueError( - "Starting with Zarr 2.11.0, stores must be subclasses of Store, if " - "your store exposes the MutableMapping interface wrap it in " - f"Zarr.storage.KVStore. Got {store}" + "Starting with Zarr 2.11.0, stores must be subclasses of " + "BaseStore, if your store exposes the MutableMapping interface " + f"wrap it in Zarr.storage.KVStore. Got {store}" ) +class Store(BaseStore): + """Abstract store class used by implementations following the Zarr v2 spec. + + Adds public `listdir`, `rename`, and `rmdir` methods on top of BaseStore. + + .. added: 2.11.0 + + """ + def listdir(self, path: str = "") -> List[str]: + path = normalize_storage_path(path) + return _listdir_from_keys(self, path) + + + def rmdir(self, path: str = "") -> None: + if not self.is_erasable(): + raise NotImplementedError( + f'{type(self)} is not erasable, cannot call "rmdir"' + ) # pragma: no cover + path = normalize_storage_path(path) + _rmdir_from_keys(self, path) + + + def _path_to_prefix(path: Optional[str]) -> str: # assume path already normalized if path: @@ -129,7 +139,7 @@ def _path_to_prefix(path: Optional[str]) -> str: return prefix -def _rename_from_keys(store: Store, src_path: str, dst_path: str) -> None: +def _rename_from_keys(store: BaseStore, src_path: str, dst_path: str) -> None: # assume path already normalized src_prefix = _path_to_prefix(src_path) dst_prefix = _path_to_prefix(dst_path) @@ -139,7 +149,7 @@ def _rename_from_keys(store: Store, src_path: str, dst_path: str) -> None: store[new_key] = store.pop(key) -def _rmdir_from_keys(store: Store, path: Optional[str] = None) -> None: +def _rmdir_from_keys(store: Union[BaseStore, MutableMapping], path: Optional[str] = None) -> None: # assume path already normalized prefix = _path_to_prefix(path) for key in list(store.keys()): @@ -147,7 +157,7 @@ def _rmdir_from_keys(store: Store, path: Optional[str] = None) -> None: del store[key] -def _listdir_from_keys(store: Store, path: Optional[str] = None) -> List[str]: +def _listdir_from_keys(store: BaseStore, path: Optional[str] = None) -> List[str]: # assume path already normalized prefix = _path_to_prefix(path) children = set() diff --git a/zarr/convenience.py b/zarr/convenience.py index 1de7d5fd8a..26c58d6018 100644 --- a/zarr/convenience.py +++ b/zarr/convenience.py @@ -3,7 +3,7 @@ import itertools import os import re -from collections.abc import Mapping +from collections.abc import Mapping, MutableMapping from zarr.core import Array from zarr.creation import array as _create_array @@ -13,12 +13,12 @@ from zarr.hierarchy import group as _create_group from zarr.hierarchy import open_group from zarr.meta import json_dumps, json_loads -from zarr.storage import contains_array, contains_group, Store +from zarr.storage import contains_array, contains_group, BaseStore, Store from zarr.util import TreeViewer, buffer_size, normalize_storage_path from typing import Union -StoreLike = Union[Store, str, None] +StoreLike = Union[BaseStore, MutableMapping, str, None] # noinspection PyShadowingBuiltins @@ -80,7 +80,7 @@ def open(store: StoreLike = None, mode: str = "a", **kwargs): clobber = mode == 'w' # we pass storage options explicitly, since normalize_store_arg might construct # a store if the input is a fsspec-compatible URL - _store: Store = normalize_store_arg( + _store: BaseStore = normalize_store_arg( store, clobber=clobber, storage_options=kwargs.pop("storage_options", {}) ) path = normalize_storage_path(path) @@ -142,7 +142,7 @@ def save_array(store: StoreLike, arr, **kwargs): """ may_need_closing = _might_close(store) - _store: Store = normalize_store_arg(store, clobber=True) + _store: BaseStore = normalize_store_arg(store, clobber=True) try: _create_array(arr, store=_store, overwrite=True, **kwargs) finally: @@ -213,7 +213,7 @@ def save_group(store: StoreLike, *args, **kwargs): raise ValueError('at least one array must be provided') # handle polymorphic store arg may_need_closing = _might_close(store) - _store: Store = normalize_store_arg(store, clobber=True) + _store: BaseStore = normalize_store_arg(store, clobber=True) try: grp = _create_group(_store, overwrite=True) for i, arr in enumerate(args): @@ -1083,7 +1083,7 @@ def copy_all(source, dest, shallow=False, without_attrs=False, log=None, return n_copied, n_skipped, n_bytes_copied -def consolidate_metadata(store: Store, metadata_key=".zmetadata"): +def consolidate_metadata(store: StoreLike, metadata_key=".zmetadata"): """ Consolidate all metadata for groups and arrays within the given store into a single resource and put it under the given key. @@ -1134,7 +1134,7 @@ def is_zarr_key(key): return open_consolidated(store, metadata_key=metadata_key) -def open_consolidated(store: Store, metadata_key=".zmetadata", mode="r+", **kwargs): +def open_consolidated(store: StoreLike, metadata_key=".zmetadata", mode="r+", **kwargs): """Open group using metadata previously consolidated into a single key. This is an optimised method for opening a Zarr group, where instead of diff --git a/zarr/core.py b/zarr/core.py index 8f21a6ac02..becf4e790a 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -32,7 +32,7 @@ pop_fields, ) from zarr.meta import decode_array_metadata, encode_array_metadata -from zarr.storage import array_meta_key, attrs_key, getsize, listdir, Store +from zarr.storage import array_meta_key, attrs_key, getsize, listdir, BaseStore from zarr.util import ( all_equal, InfoReporter, @@ -143,7 +143,7 @@ class Array: def __init__( self, - store: Store, + store: BaseStore, path=None, read_only=False, chunk_store=None, @@ -156,8 +156,8 @@ def __init__( # N.B., expect at this point store is fully initialized with all # configuration metadata fully specified and normalized - store = Store._ensure_store(store) - chunk_store = Store._ensure_store(chunk_store) + store = BaseStore._ensure_store(store) + chunk_store = BaseStore._ensure_store(chunk_store) self._store = store self._chunk_store = chunk_store diff --git a/zarr/creation.py b/zarr/creation.py index f0f7ea17d7..d50f67c31a 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -12,9 +12,10 @@ ContainsGroupError, ) from zarr.n5 import N5Store -from zarr.storage import (DirectoryStore, ZipStore, contains_array, - contains_group, default_compressor, init_array, - normalize_storage_path, FSStore, Store) +from zarr.storage import (BaseStore, DirectoryStore, FSStore, ZipStore, + contains_array, contains_group, default_compressor, + init_array, normalize_storage_path, + normalize_store_arg) from zarr.util import normalize_dimension_separator @@ -158,29 +159,6 @@ def create(shape, chunks=True, dtype=None, compressor='default', return z -def normalize_store_arg(store, clobber=False, storage_options=None, mode="w") -> Store: - if store is None: - return Store._ensure_store(dict()) - elif isinstance(store, os.PathLike): - store = os.fspath(store) - if isinstance(store, str): - mode = mode if clobber else "r" - if "://" in store or "::" in store: - return FSStore(store, mode=mode, **(storage_options or {})) - elif storage_options: - raise ValueError("storage_options passed with non-fsspec path") - if store.endswith('.zip'): - return ZipStore(store, mode=mode) - elif store.endswith('.n5'): - return N5Store(store) - else: - return DirectoryStore(store) - else: - if not isinstance(store, Store) and isinstance(store, MutableMapping): - store = Store._ensure_store(store) - return store - - def _kwargs_compat(compressor, fill_value, kwargs): # to be compatible with h5py, as well as backwards-compatible with Zarr diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index c94079117c..402b8dd976 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -16,6 +16,7 @@ ) from zarr.meta import decode_group_metadata from zarr.storage import ( + BaseStore, MemoryStore, attrs_key, contains_array, @@ -25,7 +26,6 @@ listdir, rename, rmdir, - Store, ) from zarr.util import ( InfoReporter, @@ -111,8 +111,8 @@ class Group(MutableMapping): def __init__(self, store, path=None, read_only=False, chunk_store=None, cache_attrs=True, synchronizer=None): - store = Store._ensure_store(store) - chunk_store = Store._ensure_store(chunk_store) + store: BaseStore = BaseStore._ensure_store(store) + chunk_store: BaseStore = BaseStore._ensure_store(chunk_store) self._store = store self._chunk_store = chunk_store self._path = normalize_storage_path(path) diff --git a/zarr/storage.py b/zarr/storage.py index 4427577668..72b20cd797 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -64,6 +64,7 @@ array_meta_key, group_meta_key, attrs_key, + BaseStore, Store) __doctest_requires__ = { @@ -83,9 +84,11 @@ Path = Union[str, bytes, None] +# allow MutableMapping for backwards compatibility +StoreLike = Union[BaseStore, MutableMapping] -def contains_array(store: Store, path: Path = None) -> bool: +def contains_array(store: StoreLike, path: Path = None) -> bool: """Return True if the store contains an array at the given logical path.""" path = normalize_storage_path(path) prefix = _path_to_prefix(path) @@ -93,7 +96,7 @@ def contains_array(store: Store, path: Path = None) -> bool: return key in store -def contains_group(store: Store, path: Path = None) -> bool: +def contains_group(store: StoreLike, path: Path = None) -> bool: """Return True if the store contains a group at the given logical path.""" path = normalize_storage_path(path) prefix = _path_to_prefix(path) @@ -101,12 +104,36 @@ def contains_group(store: Store, path: Path = None) -> bool: return key in store -def rmdir(store: Store, path: Path = None): +def normalize_store_arg(store: Any, clobber=False, storage_options=None, mode="w") -> BaseStore: + if store is None: + return BaseStore._ensure_store(dict()) + elif isinstance(store, os.PathLike): + store = os.fspath(store) + if isinstance(store, str): + mode = mode if clobber else "r" + if "://" in store or "::" in store: + return FSStore(store, mode=mode, **(storage_options or {})) + elif storage_options: + raise ValueError("storage_options passed with non-fsspec path") + if store.endswith('.zip'): + return ZipStore(store, mode=mode) + elif store.endswith('.n5'): + from zarr.n5 import N5Store + return N5Store(store) + else: + return DirectoryStore(store) + else: + if not isinstance(store, BaseStore) and isinstance(store, MutableMapping): + store = BaseStore._ensure_store(store) + return store + + +def rmdir(store: StoreLike, path: Path = None): """Remove all items under the given path. If `store` provides a `rmdir` method, this will be called, otherwise will fall back to implementation via the `Store` interface.""" path = normalize_storage_path(path) - if hasattr(store, "rmdir") and store.is_erasable(): + if hasattr(store, "rmdir"): # pass through store.rmdir(path) # type: ignore else: @@ -114,7 +141,7 @@ def rmdir(store: Store, path: Path = None): _rmdir_from_keys(store, path) -def rename(store: Store, src_path: Path, dst_path: Path): +def rename(store: BaseStore, src_path: Path, dst_path: Path): """Rename all items under the given path. If `store` provides a `rename` method, this will be called, otherwise will fall back to implementation via the `Store` interface.""" @@ -128,7 +155,7 @@ def rename(store: Store, src_path: Path, dst_path: Path): _rename_from_keys(store, src_path, dst_path) -def listdir(store: Store, path: Path = None): +def listdir(store: BaseStore, path: Path = None): """Obtain a directory listing for the given path. If `store` provides a `listdir` method, this will be called, otherwise will fall back to implementation via the `MutableMapping` interface.""" @@ -146,7 +173,7 @@ def listdir(store: Store, path: Path = None): return _listdir_from_keys(store, path) -def getsize(store: Store, path: Path = None) -> int: +def getsize(store: BaseStore, path: Path = None) -> int: """Compute size of stored items for a given path. If `store` provides a `getsize` method, this will be called, otherwise will return -1.""" path = normalize_storage_path(path) @@ -179,8 +206,8 @@ def getsize(store: Store, path: Path = None) -> int: def _require_parent_group( path: Optional[str], - store: Store, - chunk_store: Optional[Store], + store: StoreLike, + chunk_store: Optional[StoreLike], overwrite: bool, ): # assume path is normalized @@ -196,7 +223,7 @@ def _require_parent_group( def init_array( - store: Store, + store: StoreLike, shape: Tuple[int, ...], chunks: Union[bool, int, Tuple[int, ...]] = True, dtype=None, @@ -205,7 +232,7 @@ def init_array( order: str = "C", overwrite: bool = False, path: Path = None, - chunk_store: Store = None, + chunk_store: StoreLike = None, filters=None, object_codec=None, dimension_separator=None, @@ -248,8 +275,8 @@ def init_array( -------- Initialize an array store:: - >>> from zarr.storage import init_array - >>> store = dict() + >>> from zarr.storage import init_array, KVStore + >>> store = KVStore(dict()) >>> init_array(store, shape=(10000, 10000), chunks=(1000, 1000)) >>> sorted(store.keys()) ['.zarray'] @@ -282,7 +309,7 @@ def init_array( Initialize an array using a storage path:: - >>> store = dict() + >>> store = KVStore(dict()) >>> init_array(store, shape=100000000, chunks=1000000, dtype='i1', path='foo') >>> sorted(store.keys()) ['.zgroup', 'foo/.zarray'] @@ -427,10 +454,10 @@ def _init_array_metadata( def init_group( - store: Store, + store: StoreLike, overwrite: bool = False, path: Path = None, - chunk_store: Store = None, + chunk_store: StoreLike = None, ): """Initialize a group store. Note that this is a low-level function and there should be no need to call this directly from user code. @@ -462,10 +489,10 @@ def init_group( def _init_group_metadata( - store: Store, + store: StoreLike, overwrite: Optional[bool] = False, path: Optional[str] = None, - chunk_store: Store = None, + chunk_store: StoreLike = None, ): # guard conditions @@ -2608,8 +2635,8 @@ class ConsolidatedMetadataStore(Store): """ - def __init__(self, store: Store, metadata_key=".zmetadata"): - self.store = store + def __init__(self, store: StoreLike, metadata_key=".zmetadata"): + self.store = Store._ensure_store(store) # retrieve consolidated metadata meta = json_loads(store[metadata_key]) From 278037eade733bc21299aa797b87d59e873b6685 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 20 Oct 2021 20:42:22 -0400 Subject: [PATCH 29/30] flake8 --- zarr/_storage/store.py | 3 --- zarr/convenience.py | 2 +- zarr/creation.py | 6 +----- 3 files changed, 2 insertions(+), 9 deletions(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index d68da68304..a779a4e26a 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -1,4 +1,3 @@ -from abc import abstractmethod from collections.abc import MutableMapping from typing import Any, List, Optional, Union @@ -119,7 +118,6 @@ def listdir(self, path: str = "") -> List[str]: path = normalize_storage_path(path) return _listdir_from_keys(self, path) - def rmdir(self, path: str = "") -> None: if not self.is_erasable(): raise NotImplementedError( @@ -129,7 +127,6 @@ def rmdir(self, path: str = "") -> None: _rmdir_from_keys(self, path) - def _path_to_prefix(path: Optional[str]) -> str: # assume path already normalized if path: diff --git a/zarr/convenience.py b/zarr/convenience.py index 26c58d6018..18b59a77b2 100644 --- a/zarr/convenience.py +++ b/zarr/convenience.py @@ -13,7 +13,7 @@ from zarr.hierarchy import group as _create_group from zarr.hierarchy import open_group from zarr.meta import json_dumps, json_loads -from zarr.storage import contains_array, contains_group, BaseStore, Store +from zarr.storage import contains_array, contains_group, BaseStore from zarr.util import TreeViewer, buffer_size, normalize_storage_path from typing import Union diff --git a/zarr/creation.py b/zarr/creation.py index d50f67c31a..244a9b080c 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -1,9 +1,7 @@ -import os from warnings import warn import numpy as np from numcodecs.registry import codec_registry -from collections.abc import MutableMapping from zarr.core import Array from zarr.errors import ( @@ -11,9 +9,7 @@ ContainsArrayError, ContainsGroupError, ) -from zarr.n5 import N5Store -from zarr.storage import (BaseStore, DirectoryStore, FSStore, ZipStore, - contains_array, contains_group, default_compressor, +from zarr.storage import (contains_array, contains_group, default_compressor, init_array, normalize_storage_path, normalize_store_arg) from zarr.util import normalize_dimension_separator From 06086dca25d2dd4813d9685b6472a59694e9b523 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Thu, 21 Oct 2021 00:50:23 -0400 Subject: [PATCH 30/30] restore is_erasable check to rmdir function Otherwise the save_array doc example fails to write to a ZipStore --- zarr/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index 72b20cd797..901011c9d2 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -133,7 +133,7 @@ def rmdir(store: StoreLike, path: Path = None): this will be called, otherwise will fall back to implementation via the `Store` interface.""" path = normalize_storage_path(path) - if hasattr(store, "rmdir"): + if hasattr(store, "rmdir") and store.is_erasable(): # type: ignore # pass through store.rmdir(path) # type: ignore else: