From d48468fe2682dcdc50f092bdb13c8bfde0779b1b Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 17 Mar 2020 15:16:17 -0400 Subject: [PATCH 01/20] Add FSStore --- zarr/storage.py | 81 ++++++++++++++++++++++++++++++++++++++ zarr/tests/test_storage.py | 4 +- 2 files changed, 84 insertions(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index 81b6d08d9c..8829c239c3 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -946,6 +946,87 @@ def atexit_rmglob(path, rmtree(p) +class FSStore(MutableMapping): + + def __init__(self, url, normalize_keys=True, key_separator='.', + **storage_options): + import fsspec + self.path = url + self.normalize_keys = normalize_keys + self.key_separator = key_separator + self.map = fsspec.get_mapper(url, **storage_options) + self.fs = self.map.fs # for direct operations + + def _normalize_key(self, key): + key = normalize_storage_path(key) + if key: + *bits, end = key.split('/') + key = '/'.join(bits + [end.replace('.', self.key_separator)]) + return key.lower() if self.normalize_keys else key + + def __getitem__(self, key): + key = self._normalize_key(key) + return self.map[key] + + def __setitem__(self, key, value): + key = self._normalize_key(key) + path = self.dir_path(key) + value = ensure_contiguous_ndarray(value) + try: + if self.fs.isdir(path): + self.fs.rm(path, recursive=True) + self.map[key] = value + except IOError: + raise KeyError(key) + + def __delitem__(self, key): + key = self._normalize_key(key) + path = self.dir_path(key) + if self.fs.isdir(path): + self.fs.rm(path, recursive=True) + else: + del self.map[key] + + def __contains__(self, key): + key = self._normalize_key(key) + return key in self.map + + def __eq__(self, other): + return type(self) == type(other) and self.map == other.map + + def keys(self): + return iter(self.map) + + def __iter__(self): + return self.keys() + + def __len__(self): + return len(list(self.keys())) + + def dir_path(self, path=None): + store_path = normalize_storage_path(path) + return self.map._key_to_str(store_path) + + def listdir(self, path=None): + dir_path = self.dir_path(path) + try: + return sorted(p.rsplit('/', 1)[-1] for p in self.fs.ls(dir_path)) + except IOError: + return [] + + def rmdir(self, path=None): + store_path = self.dir_path(path) + if self.fs.isdir(store_path): + self.fs.rm(store_path, recursive=True) + + def getsize(self, path=None): + store_path = self.dir_path(path) + return self.fs.du(store_path, True, True) + + def clear(self): + self.map.clear() + + class TempStore(DirectoryStore): """Directory store using a temporary directory for storage. diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 38acbd16d7..7e112d34b3 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -29,6 +29,7 @@ array_meta_key, atexit_rmglob, atexit_rmtree, attrs_key, default_compressor, getsize, group_meta_key, init_array, init_group, migrate_1to2) +from zarr.storage import FSStore as NestedDirectoryStore from zarr.tests.util import CountingDict, skip_test_env_var @@ -762,7 +763,8 @@ class TestNestedDirectoryStore(TestDirectoryStore, unittest.TestCase): def create_store(self, normalize_keys=False): path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) - store = NestedDirectoryStore(path, normalize_keys=normalize_keys) + store = NestedDirectoryStore(path, normalize_keys=normalize_keys, + key_separator='/', auto_mkdir=True) return store def test_chunk_nesting(self): From babbfd559ccdbd1e4dadf78465fa0e0ff15b505c Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 16 Apr 2020 09:46:55 -0400 Subject: [PATCH 02/20] fix ls --- zarr/storage.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index 8829c239c3..f2165eb360 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1010,7 +1010,8 @@ def dir_path(self, path=None): def listdir(self, path=None): dir_path = self.dir_path(path) try: - return sorted(p.rsplit('/', 1)[-1] for p in self.fs.ls(dir_path)) + return sorted(p.rsplit('/', 1)[-1] + for p in self.fs.ls(dir_path, detail=False)) except IOError: return [] From c7fbaa0f402c5b123f5533d4a231272f278ab1bb Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 16 Apr 2020 09:58:32 -0400 Subject: [PATCH 03/20] Allow `zarr.open` This demonstrates the purpose of this PR! Now allows, for exmple `zarr.open('http://localhost:8000')`, although there is no way to pass on further args with this method yet. --- zarr/creation.py | 6 ++++-- zarr/storage.py | 7 ++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/zarr/creation.py b/zarr/creation.py index 3958e2eb78..31ff18f5d5 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -10,7 +10,7 @@ from zarr.n5 import N5Store from zarr.storage import (DirectoryStore, ZipStore, contains_array, contains_group, default_compressor, init_array, - normalize_storage_path) + normalize_storage_path, FSStore) def create(shape, chunks=True, dtype=None, compressor='default', @@ -131,8 +131,10 @@ def normalize_store_arg(store, clobber=False, default=dict): if store is None: return default() elif isinstance(store, str): + mode = 'w' if clobber else 'r' + if "://" in store: + return FSStore(store) if store.endswith('.zip'): - mode = 'w' if clobber else 'r' return ZipStore(store, mode=mode) elif store.endswith('.n5'): return N5Store(store) diff --git a/zarr/storage.py b/zarr/storage.py index f2165eb360..c3ef68f18b 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -949,13 +949,14 @@ def atexit_rmglob(path, class FSStore(MutableMapping): def __init__(self, url, normalize_keys=True, key_separator='.', - **storage_options): + mode='r', **storage_options): import fsspec self.path = url self.normalize_keys = normalize_keys self.key_separator = key_separator self.map = fsspec.get_mapper(url, **storage_options) self.fs = self.map.fs # for direct operations + self.mode = mode def _normalize_key(self, key): key = normalize_storage_path(key) @@ -969,6 +970,8 @@ def __getitem__(self, key): return self.map[key] def __setitem__(self, key, value): + if self.mode == 'r': + raise PermissionError key = self._normalize_key(key) path = self.dir_path(key) value = ensure_contiguous_ndarray(value) @@ -980,6 +983,8 @@ def __setitem__(self, key, value): raise KeyError(key) def __delitem__(self, key): + if self.mode == 'r': + raise PermissionError key = self._normalize_key(key) path = self.dir_path(key) if self.fs.isdir(path): From 3cc93861d0ac1d25963c603ed865d72226a8267f Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 16 Apr 2020 10:40:54 -0400 Subject: [PATCH 04/20] fix for http --- zarr/storage.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index c3ef68f18b..f997cccda7 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -959,7 +959,7 @@ def __init__(self, url, normalize_keys=True, key_separator='.', self.mode = mode def _normalize_key(self, key): - key = normalize_storage_path(key) + key = normalize_storage_path(key).lstrip('/') if key: *bits, end = key.split('/') key = '/'.join(bits + [end.replace('.', self.key_separator)]) @@ -1015,8 +1015,9 @@ def dir_path(self, path=None): def listdir(self, path=None): dir_path = self.dir_path(path) try: - return sorted(p.rsplit('/', 1)[-1] + out = sorted(p.rstrip('/').rsplit('/', 1)[-1] for p in self.fs.ls(dir_path, detail=False)) + return out except IOError: return [] From 18ae5adc71bf433c62fec277f8e70b82148b19b1 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 28 Apr 2020 14:24:01 -0400 Subject: [PATCH 05/20] default mode --- zarr/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index f997cccda7..6233f3afe6 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -949,7 +949,7 @@ def atexit_rmglob(path, class FSStore(MutableMapping): def __init__(self, url, normalize_keys=True, key_separator='.', - mode='r', **storage_options): + mode='w', **storage_options): import fsspec self.path = url self.normalize_keys = normalize_keys From cbd05dbe6eee1f1d1ac5a50f261df962bb9eab71 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 28 Apr 2020 15:23:32 -0400 Subject: [PATCH 06/20] Fix all --- zarr/storage.py | 41 +++++++++++++++++++++++++++++++++++--- zarr/tests/test_storage.py | 1 + 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 6233f3afe6..4140638a2c 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -21,6 +21,7 @@ import multiprocessing import operator import os +import json import re import shutil import sys @@ -949,7 +950,7 @@ def atexit_rmglob(path, class FSStore(MutableMapping): def __init__(self, url, normalize_keys=True, key_separator='.', - mode='w', **storage_options): + mode='w', consolidated=False, metadata_key='.zmetadata', **storage_options): import fsspec self.path = url self.normalize_keys = normalize_keys @@ -957,6 +958,24 @@ def __init__(self, url, normalize_keys=True, key_separator='.', self.map = fsspec.get_mapper(url, **storage_options) self.fs = self.map.fs # for direct operations self.mode = mode + # TODO: should warn if consolidated and write mode? + if self.fs.exists(url) and not self.fs.isdir(url): + err_fspath_exists_notdir(url) + self.consolidated = consolidated + self.metadata_key = metadata_key + if consolidated: + self.meta = json.loads(self.map.get(metadata_key, b"{}").decode()) + if mode == 'r' or 'zarr_consolidated_format' in self.meta: + consolidated_format = self.meta.get('zarr_consolidated_format', None) + if consolidated_format != 1: + raise MetadataError('unsupported zarr consolidated metadata format: %s' % + consolidated_format) + else: + self.meta['zarr_consolidated_format'] = 1 + + @staticmethod + def _is_meta(key): + return key.split('/')[-1] in [attrs_key, group_meta_key, array_meta_key] def _normalize_key(self, key): key = normalize_storage_path(key).lstrip('/') @@ -966,12 +985,17 @@ def _normalize_key(self, key): return key.lower() if self.normalize_keys else key def __getitem__(self, key): + if self.consolidated and self._is_meta(key): + return self.meta[key] key = self._normalize_key(key) return self.map[key] def __setitem__(self, key, value): if self.mode == 'r': - raise PermissionError + err_read_only() + if self.consolidated and self._is_meta(key): + self.meta[key] = value.decode() + self.map[self.metadata_key] = json.dumps(self.meta).encode() key = self._normalize_key(key) path = self.dir_path(key) value = ensure_contiguous_ndarray(value) @@ -984,7 +1008,10 @@ def __setitem__(self, key, value): def __delitem__(self, key): if self.mode == 'r': - raise PermissionError + err_read_only() + if self.consolidated and self._is_meta(key): + del self.meta[key] + self.map[self.metadata_key] = json.dumps(self.meta).encode() key = self._normalize_key(key) path = self.dir_path(key) if self.fs.isdir(path): @@ -993,6 +1020,8 @@ def __delitem__(self, key): del self.map[key] def __contains__(self, key): + if self.consolidated and self._is_meta(key): + return key in self.meta key = self._normalize_key(key) return key in self.map @@ -1022,6 +1051,8 @@ def listdir(self, path=None): return [] def rmdir(self, path=None): + if self.mode == 'r': + err_read_only() store_path = self.dir_path(path) if self.fs.isdir(store_path): self.fs.rm(store_path, recursive=True) @@ -1031,6 +1062,10 @@ def getsize(self, path=None): return self.fs.du(store_path, True, True) def clear(self): + if self.mode == 'r': + err_read_only() + if self.consolidated: + self.meta = {} self.map.clear() diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 0bedc19960..b207496448 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -30,6 +30,7 @@ attrs_key, default_compressor, getsize, group_meta_key, init_array, init_group, migrate_1to2) from zarr.storage import FSStore as NestedDirectoryStore +from zarr.storage import FSStore as DirectoryStore from zarr.tests.util import CountingDict, skip_test_env_var From 942bcfb6501ca652f8ec91a3b7313d82ef01b86f Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 13 Aug 2020 13:52:26 -0400 Subject: [PATCH 07/20] Separate tests --- zarr/creation.py | 14 +++++++++----- zarr/hierarchy.py | 5 +++-- zarr/tests/test_storage.py | 37 +++++++++++++++++++++++++++++++++++-- 3 files changed, 47 insertions(+), 9 deletions(-) diff --git a/zarr/creation.py b/zarr/creation.py index 31ff18f5d5..4e13b69fc8 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -127,13 +127,15 @@ def create(shape, chunks=True, dtype=None, compressor='default', return z -def normalize_store_arg(store, clobber=False, default=dict): +def normalize_store_arg(store, clobber=False, default=dict, storage_options=None): if store is None: return default() elif isinstance(store, str): mode = 'w' if clobber else 'r' - if "://" in store: - return FSStore(store) + if "://" in store or "::" in store: + return FSStore(store, **(storage_options or {})) + elif storage_options: + raise ValueError("storage_options passed with non-fsspec path") if store.endswith('.zip'): return ZipStore(store, mode=mode) elif store.endswith('.n5'): @@ -437,9 +439,11 @@ def open_array(store=None, mode='a', shape=None, chunks=True, dtype=None, # handle polymorphic store arg clobber = mode == 'w' - store = normalize_store_arg(store, clobber=clobber) + storage_options = kwargs.pop("storage_options", None) + store = normalize_store_arg(store, clobber=clobber, storage_options=storage_options) if chunk_store is not None: - chunk_store = normalize_store_arg(chunk_store, clobber=clobber) + chunk_store = normalize_store_arg(chunk_store, clobber=clobber, + storage_options=storage_options) path = normalize_storage_path(path) # API compatibility with h5py diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 4fab679318..97fe1992b4 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -1032,8 +1032,9 @@ def move(self, source, dest): self._write_op(self._move_nosync, source, dest) -def _normalize_store_arg(store, clobber=False): - return normalize_store_arg(store, clobber=clobber, default=MemoryStore) +def _normalize_store_arg(store, clobber=False, storage_options=None): + return normalize_store_arg(store, clobber=clobber, default=MemoryStore, + storage_options=storage_options) def group(store=None, overwrite=False, chunk_store=None, diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index b207496448..acf7c296f3 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -29,8 +29,7 @@ array_meta_key, atexit_rmglob, atexit_rmtree, attrs_key, default_compressor, getsize, group_meta_key, init_array, init_group, migrate_1to2) -from zarr.storage import FSStore as NestedDirectoryStore -from zarr.storage import FSStore as DirectoryStore +from zarr.storage import FSStore from zarr.tests.util import CountingDict, skip_test_env_var @@ -829,6 +828,30 @@ def test_normalize_keys(self): assert 'foo' in store +class TestFSStore(StoreTests, unittest.TestCase): + + def create_store(self, normalize_keys=False): + path = tempfile.mkdtemp() + atexit.register(atexit_rmtree, path) + store = FSStore(path, normalize_keys=normalize_keys) + return store + + def test_complex(self): + path1 = tempfile.mkdtemp() + path2 = tempfile.mkdtemp() + store = FSStore("simplecache::file://" + path1, + simplecache={"same_names": True, "cache_storage": path2}) + assert not store + assert not os.listdir(path1) + assert not os.listdir(path2) + store['foo'] = b"hello" + assert 'foo' in os.listdir(path1) + assert 'foo' in store + assert not os.listdir(path2) + assert store["foo"] == b"hello" + assert 'foo' in os.listdir(path2) + + class TestNestedDirectoryStore(TestDirectoryStore, unittest.TestCase): def create_store(self, normalize_keys=False): @@ -964,6 +987,16 @@ def test_filters(self): init_array(store, shape=1000, chunks=100, filters=filters) +class TestNestedFSStore(TestNestedDirectoryStore): + + def create_store(self, normalize_keys=False): + path = tempfile.mkdtemp() + atexit.register(atexit_rmtree, path) + store = FSStore(path, normalize_keys=normalize_keys, + key_separator='/', auto_mkdir=True) + return store + + class TestTempStore(StoreTests, unittest.TestCase): def create_store(self): From 58aecc75d11697f0364dc21a1ff32e5b0a9a0993 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 13 Aug 2020 14:20:21 -0400 Subject: [PATCH 08/20] Skip if no fsspec --- zarr/hierarchy.py | 6 +++--- zarr/storage.py | 2 +- zarr/tests/test_storage.py | 7 ++++--- zarr/tests/util.py | 7 +++++++ 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 97fe1992b4..08f0cebab1 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -1096,7 +1096,7 @@ def group(store=None, overwrite=False, chunk_store=None, def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=None, - chunk_store=None): + chunk_store=None, storage_options=None): """Open a group using file-mode-like semantics. Parameters @@ -1140,9 +1140,9 @@ def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=N """ # handle polymorphic store arg - store = _normalize_store_arg(store) + store = _normalize_store_arg(store, storage_options=storage_options) if chunk_store is not None: - chunk_store = _normalize_store_arg(chunk_store) + chunk_store = _normalize_store_arg(chunk_store, storage_options=storage_options) path = normalize_storage_path(path) # ensure store is initialized diff --git a/zarr/storage.py b/zarr/storage.py index e877d659a5..e9bc4c0037 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1045,7 +1045,7 @@ def listdir(self, path=None): dir_path = self.dir_path(path) try: out = sorted(p.rstrip('/').rsplit('/', 1)[-1] - for p in self.fs.ls(dir_path, detail=False)) + for p in self.fs.ls(dir_path, detail=False)) return out except IOError: return [] diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index acf7c296f3..19886233b1 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -30,7 +30,7 @@ attrs_key, default_compressor, getsize, group_meta_key, init_array, init_group, migrate_1to2) from zarr.storage import FSStore -from zarr.tests.util import CountingDict, skip_test_env_var +from zarr.tests.util import CountingDict, have_fsspec, skip_test_env_var @contextmanager @@ -828,6 +828,7 @@ def test_normalize_keys(self): assert 'foo' in store +@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestFSStore(StoreTests, unittest.TestCase): def create_store(self, normalize_keys=False): @@ -857,8 +858,7 @@ class TestNestedDirectoryStore(TestDirectoryStore, unittest.TestCase): def create_store(self, normalize_keys=False): path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) - store = NestedDirectoryStore(path, normalize_keys=normalize_keys, - key_separator='/', auto_mkdir=True) + store = NestedDirectoryStore(path, normalize_keys=normalize_keys) return store def test_chunk_nesting(self): @@ -987,6 +987,7 @@ def test_filters(self): init_array(store, shape=1000, chunks=100, filters=filters) +@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestNestedFSStore(TestNestedDirectoryStore): def create_store(self, normalize_keys=False): diff --git a/zarr/tests/util.py b/zarr/tests/util.py index 7e8e719157..f2fd515ad5 100644 --- a/zarr/tests/util.py +++ b/zarr/tests/util.py @@ -46,3 +46,10 @@ def skip_test_env_var(name): """ value = os.environ.get(name, '0') return pytest.mark.skipif(value == '0', reason='Tests not enabled via environment variable') + + +try: + import fsspec # noqa: F401 + have_fsspec = True +except ImportError: + have_fsspec = False From be78b2b583d501ba7188104314d001ee26f52773 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 13 Aug 2020 14:40:18 -0400 Subject: [PATCH 09/20] Skip on py35 (should not be able to install fsspec there) --- zarr/tests/util.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/zarr/tests/util.py b/zarr/tests/util.py index f2fd515ad5..1cd53b7980 100644 --- a/zarr/tests/util.py +++ b/zarr/tests/util.py @@ -1,7 +1,9 @@ # -*- coding: utf-8 -*- import collections +from distutils.version import LooseVersion from collections.abc import MutableMapping import os +import sys import pytest @@ -50,6 +52,6 @@ def skip_test_env_var(name): try: import fsspec # noqa: F401 - have_fsspec = True + have_fsspec = LooseVersion(sys.version) >= LooseVersion("3.6") except ImportError: have_fsspec = False From 203cf0c7793ff335eaa31829568fb0bbaa5cbcff Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 13 Aug 2020 14:48:47 -0400 Subject: [PATCH 10/20] Don't even import --- zarr/tests/util.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/zarr/tests/util.py b/zarr/tests/util.py index 1cd53b7980..9396592bab 100644 --- a/zarr/tests/util.py +++ b/zarr/tests/util.py @@ -51,7 +51,10 @@ def skip_test_env_var(name): try: - import fsspec # noqa: F401 - have_fsspec = LooseVersion(sys.version) >= LooseVersion("3.6") + if LooseVersion(sys.version) >= LooseVersion("3.6"): + import fsspec # noqa: F401 + have_fsspec = True + else: + have_fsspec = False except ImportError: have_fsspec = False From a987dbecce25a30dfe717cebcdd498ae1c0d1787 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 13 Aug 2020 16:20:49 -0400 Subject: [PATCH 11/20] Add a couple of little tests --- zarr/storage.py | 14 ++++++++++---- zarr/tests/test_storage.py | 12 ++++++++++++ zarr/tests/util.py | 4 ++-- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index e9bc4c0037..49a9064924 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -950,7 +950,9 @@ def atexit_rmglob(path, class FSStore(MutableMapping): def __init__(self, url, normalize_keys=True, key_separator='.', - mode='w', consolidated=False, metadata_key='.zmetadata', **storage_options): + mode='w', consolidated=False, metadata_key='.zmetadata', + exceptions=(KeyError, PermissionError, IOError), + **storage_options): import fsspec self.path = url self.normalize_keys = normalize_keys @@ -958,6 +960,7 @@ def __init__(self, url, normalize_keys=True, key_separator='.', self.map = fsspec.get_mapper(url, **storage_options) self.fs = self.map.fs # for direct operations self.mode = mode + self.exceptions = exceptions # TODO: should warn if consolidated and write mode? if self.fs.exists(url) and not self.fs.isdir(url): err_fspath_exists_notdir(url) @@ -988,7 +991,10 @@ def __getitem__(self, key): if self.consolidated and self._is_meta(key): return self.meta[key] key = self._normalize_key(key) - return self.map[key] + try: + return self.map[key] + except self.exceptions as e: + raise KeyError(key) from e def __setitem__(self, key, value): if self.mode == 'r': @@ -1003,8 +1009,8 @@ def __setitem__(self, key, value): if self.fs.isdir(path): self.fs.rm(path, recursive=True) self.map[key] = value - except IOError: - raise KeyError(key) + except self.exceptions as e: + raise KeyError(key) from e def __delitem__(self, key): if self.mode == 'r': diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 19886233b1..dc260d811e 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -852,6 +852,18 @@ def test_complex(self): assert store["foo"] == b"hello" assert 'foo' in os.listdir(path2) + def test_consolidated(self): + path = tempfile.mkdtemp() + store = FSStore(path, consolidated=True) + store[".zarray"] = b"{}" + assert ".zmetadata" in os.listdir(path) + + def test_not_fsspec(self): + import zarr + path = tempfile.mkdtemp() + with pytest.raises(ValueError, match="storage_options"): + zarr.open_array(path, mode='w', storage_options={"some": "kwargs"}) + class TestNestedDirectoryStore(TestDirectoryStore, unittest.TestCase): diff --git a/zarr/tests/util.py b/zarr/tests/util.py index 9396592bab..a61461d4a9 100644 --- a/zarr/tests/util.py +++ b/zarr/tests/util.py @@ -54,7 +54,7 @@ def skip_test_env_var(name): if LooseVersion(sys.version) >= LooseVersion("3.6"): import fsspec # noqa: F401 have_fsspec = True - else: + else: # pragma: no cover have_fsspec = False -except ImportError: +except ImportError: # pragma: no cover have_fsspec = False From f5809a519a7b7f828880f6126f0ffbdb4074dbb0 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 13 Aug 2020 16:57:46 -0400 Subject: [PATCH 12/20] more coverage --- zarr/storage.py | 9 +++++---- zarr/tests/test_storage.py | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 49a9064924..ac0046bb3b 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -970,7 +970,7 @@ def __init__(self, url, normalize_keys=True, key_separator='.', self.meta = json.loads(self.map.get(metadata_key, b"{}").decode()) if mode == 'r' or 'zarr_consolidated_format' in self.meta: consolidated_format = self.meta.get('zarr_consolidated_format', None) - if consolidated_format != 1: + if consolidated_format != 1: # pragma: no cover raise MetadataError('unsupported zarr consolidated metadata format: %s' % consolidated_format) else: @@ -989,11 +989,11 @@ def _normalize_key(self, key): def __getitem__(self, key): if self.consolidated and self._is_meta(key): - return self.meta[key] + return self.meta[key].encode() # expect bytes out key = self._normalize_key(key) try: return self.map[key] - except self.exceptions as e: + except self.exceptions as e: raise KeyError(key) from e def __setitem__(self, key, value): @@ -1032,7 +1032,8 @@ def __contains__(self, key): return key in self.map def __eq__(self, other): - return type(self) == type(other) and self.map == other.map + return (type(self) == type(other) and self.map == other.map + and self.mode == other.mode) def keys(self): return iter(self.map) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index dc260d811e..a9207f1a78 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -857,12 +857,31 @@ def test_consolidated(self): store = FSStore(path, consolidated=True) store[".zarray"] = b"{}" assert ".zmetadata" in os.listdir(path) + del store[".zarray"] + with pytest.raises(KeyError): + store[".zarray"] + store[".zarray"] = b"{}" + + os.remove(os.path.join(path, ".zarray")) + store2 = FSStore(path, mode='r', consolidated=True) + assert store != store2 + assert ".zarray" in store2 + assert store2[".zarray"] == b"{}" + with pytest.raises(PermissionError): + store2[".zarray"] = b"{{}}" + with pytest.raises(PermissionError): + del store2[".zarray"] + + store.clear() + assert ".zmetadata" not in store def test_not_fsspec(self): import zarr path = tempfile.mkdtemp() with pytest.raises(ValueError, match="storage_options"): zarr.open_array(path, mode='w', storage_options={"some": "kwargs"}) + with pytest.raises(ValueError, match="storage_options"): + zarr.open_group(path, mode='w', storage_options={"some": "kwargs"}) class TestNestedDirectoryStore(TestDirectoryStore, unittest.TestCase): From b5d8c9f962b3e77bb282a5ac8d191fb5e13055ab Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 13 Aug 2020 17:23:20 -0400 Subject: [PATCH 13/20] more coverage --- zarr/creation.py | 2 +- zarr/tests/test_storage.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/zarr/creation.py b/zarr/creation.py index 4e13b69fc8..6eb5dfa6d4 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -133,7 +133,7 @@ def normalize_store_arg(store, clobber=False, default=dict, storage_options=None elif isinstance(store, str): mode = 'w' if clobber else 'r' if "://" in store or "::" in store: - return FSStore(store, **(storage_options or {})) + return FSStore(store, mode=mode, **(storage_options or {})) elif storage_options: raise ValueError("storage_options passed with non-fsspec path") if store.endswith('.zip'): diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index a9207f1a78..1e6713a9cd 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -871,6 +871,12 @@ def test_consolidated(self): store2[".zarray"] = b"{{}}" with pytest.raises(PermissionError): del store2[".zarray"] + with pytest.raises(PermissionError): + store2.clear() + with pytest.raises(PermissionError): + store2.rmdir("any") + with pytest.raises(ValueError): + FSStore(os.path.join(path, ".zmetadata")) store.clear() assert ".zmetadata" not in store @@ -882,6 +888,7 @@ def test_not_fsspec(self): zarr.open_array(path, mode='w', storage_options={"some": "kwargs"}) with pytest.raises(ValueError, match="storage_options"): zarr.open_group(path, mode='w', storage_options={"some": "kwargs"}) + zarr.open_array("file://" + path, mode='w', shape=(1,), dtype="f8") class TestNestedDirectoryStore(TestDirectoryStore, unittest.TestCase): From c3f7327baa671d4d9bab5faff6db16332e975c16 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 10 Sep 2020 10:08:31 -0400 Subject: [PATCH 14/20] Remove consolidated functionality, add tests --- zarr/creation.py | 7 ++++-- zarr/hierarchy.py | 8 ++++-- zarr/storage.py | 51 ++++++++++++++++---------------------- zarr/tests/test_storage.py | 47 ++++++++++++++--------------------- 4 files changed, 51 insertions(+), 62 deletions(-) diff --git a/zarr/creation.py b/zarr/creation.py index 6eb5dfa6d4..591feeed29 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -357,7 +357,8 @@ def array(data, **kwargs): def open_array(store=None, mode='a', shape=None, chunks=True, dtype=None, compressor='default', fill_value=0, order='C', synchronizer=None, filters=None, cache_metadata=True, cache_attrs=True, path=None, - object_codec=None, chunk_store=None, **kwargs): + object_codec=None, chunk_store=None, storage_options=None, + **kwargs): """Open an array using file-mode-like semantics. Parameters @@ -403,6 +404,9 @@ def open_array(store=None, mode='a', shape=None, chunks=True, dtype=None, A codec to encode object arrays, only needed if dtype=object. chunk_store : MutableMapping or string, optional Store or path to directory in file system or name of zip file. + storage_options : dict + If using an fsspec URL to create the store, these will be passed to + the backend implementation. Ignored otherwise. Returns ------- @@ -439,7 +443,6 @@ def open_array(store=None, mode='a', shape=None, chunks=True, dtype=None, # handle polymorphic store arg clobber = mode == 'w' - storage_options = kwargs.pop("storage_options", None) store = normalize_store_arg(store, clobber=clobber, storage_options=storage_options) if chunk_store is not None: chunk_store = normalize_store_arg(chunk_store, clobber=clobber, diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 08f0cebab1..6cbab193d1 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -1118,6 +1118,9 @@ def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=N Group path within store. chunk_store : MutableMapping or string, optional Store or path to directory in file system or name of zip file. + storage_options : dict + If using an fsspec URL to create the store, these will be passed to + the backend implementation. Ignored otherwise. Returns ------- @@ -1140,9 +1143,10 @@ def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=N """ # handle polymorphic store arg - store = _normalize_store_arg(store, storage_options=storage_options) + clobber = mode != 'r' + store = _normalize_store_arg(store, clobber=clobber, storage_options=storage_options) if chunk_store is not None: - chunk_store = _normalize_store_arg(chunk_store, storage_options=storage_options) + chunk_store = _normalize_store_arg(chunk_store, clobber=clobber, storage_options=storage_options) path = normalize_storage_path(path) # ensure store is initialized diff --git a/zarr/storage.py b/zarr/storage.py index ac0046bb3b..4381e9f41d 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -948,9 +948,30 @@ def atexit_rmglob(path, class FSStore(MutableMapping): + """Wraps an fsspec.FSMap to give access to arbitrary filesystems + + Requires that ``fsspec`` is installed, as well as any additional + requirements for the protocol chosen. + + Parameters + ---------- + url : str + The destination to map. Should include protocol and path, + like "s3://bucket/root" + normalize_keys : bool + key_separator : str + Character to use when constructing the target path strings + for data keys + mode : str + "w" for writable, "r" for read-only + exceptions : list of Exception subclasses + When accessing data, any of these exceptions will be treated + as a missing key + storage_options : passed to the fsspec implementation + """ def __init__(self, url, normalize_keys=True, key_separator='.', - mode='w', consolidated=False, metadata_key='.zmetadata', + mode='w', exceptions=(KeyError, PermissionError, IOError), **storage_options): import fsspec @@ -961,24 +982,8 @@ def __init__(self, url, normalize_keys=True, key_separator='.', self.fs = self.map.fs # for direct operations self.mode = mode self.exceptions = exceptions - # TODO: should warn if consolidated and write mode? if self.fs.exists(url) and not self.fs.isdir(url): err_fspath_exists_notdir(url) - self.consolidated = consolidated - self.metadata_key = metadata_key - if consolidated: - self.meta = json.loads(self.map.get(metadata_key, b"{}").decode()) - if mode == 'r' or 'zarr_consolidated_format' in self.meta: - consolidated_format = self.meta.get('zarr_consolidated_format', None) - if consolidated_format != 1: # pragma: no cover - raise MetadataError('unsupported zarr consolidated metadata format: %s' % - consolidated_format) - else: - self.meta['zarr_consolidated_format'] = 1 - - @staticmethod - def _is_meta(key): - return key.split('/')[-1] in [attrs_key, group_meta_key, array_meta_key] def _normalize_key(self, key): key = normalize_storage_path(key).lstrip('/') @@ -988,8 +993,6 @@ def _normalize_key(self, key): return key.lower() if self.normalize_keys else key def __getitem__(self, key): - if self.consolidated and self._is_meta(key): - return self.meta[key].encode() # expect bytes out key = self._normalize_key(key) try: return self.map[key] @@ -999,9 +1002,6 @@ def __getitem__(self, key): def __setitem__(self, key, value): if self.mode == 'r': err_read_only() - if self.consolidated and self._is_meta(key): - self.meta[key] = value.decode() - self.map[self.metadata_key] = json.dumps(self.meta).encode() key = self._normalize_key(key) path = self.dir_path(key) value = ensure_contiguous_ndarray(value) @@ -1015,9 +1015,6 @@ def __setitem__(self, key, value): def __delitem__(self, key): if self.mode == 'r': err_read_only() - if self.consolidated and self._is_meta(key): - del self.meta[key] - self.map[self.metadata_key] = json.dumps(self.meta).encode() key = self._normalize_key(key) path = self.dir_path(key) if self.fs.isdir(path): @@ -1026,8 +1023,6 @@ def __delitem__(self, key): del self.map[key] def __contains__(self, key): - if self.consolidated and self._is_meta(key): - return key in self.meta key = self._normalize_key(key) return key in self.map @@ -1071,8 +1066,6 @@ def getsize(self, path=None): def clear(self): if self.mode == 'r': err_read_only() - if self.consolidated: - self.meta = {} self.map.clear() diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 1e6713a9cd..28d0294075 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -852,35 +852,6 @@ def test_complex(self): assert store["foo"] == b"hello" assert 'foo' in os.listdir(path2) - def test_consolidated(self): - path = tempfile.mkdtemp() - store = FSStore(path, consolidated=True) - store[".zarray"] = b"{}" - assert ".zmetadata" in os.listdir(path) - del store[".zarray"] - with pytest.raises(KeyError): - store[".zarray"] - store[".zarray"] = b"{}" - - os.remove(os.path.join(path, ".zarray")) - store2 = FSStore(path, mode='r', consolidated=True) - assert store != store2 - assert ".zarray" in store2 - assert store2[".zarray"] == b"{}" - with pytest.raises(PermissionError): - store2[".zarray"] = b"{{}}" - with pytest.raises(PermissionError): - del store2[".zarray"] - with pytest.raises(PermissionError): - store2.clear() - with pytest.raises(PermissionError): - store2.rmdir("any") - with pytest.raises(ValueError): - FSStore(os.path.join(path, ".zmetadata")) - - store.clear() - assert ".zmetadata" not in store - def test_not_fsspec(self): import zarr path = tempfile.mkdtemp() @@ -890,6 +861,24 @@ def test_not_fsspec(self): zarr.open_group(path, mode='w', storage_options={"some": "kwargs"}) zarr.open_array("file://" + path, mode='w', shape=(1,), dtype="f8") + def test_create(self): + import zarr + path1 = tempfile.mkdtemp() + path2 = tempfile.mkdtemp() + g = zarr.open_group("file://" + path1, mode='w', + storage_options={"auto_mkdir": True}) + a = g.create_dataset("data", shape=(8,)) + a[:4] = [0, 1, 2, 3] + assert "data" in os.listdir(path1) + assert ".zgroup" in os.listdir(path1) + + g = zarr.open_group("simplecache::file://" + path1, mode='r', + storage_options={"cache_storage": path2, + "same_names": True}) + assert g.data[:].tolist() == [0, 1, 2, 3, 0, 0, 0, 0] + with pytest.raises(PermissionError): + g.data[:] = 1 + class TestNestedDirectoryStore(TestDirectoryStore, unittest.TestCase): From d9734c3f495577df9960e17c572b1114a0d4beee Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 10 Sep 2020 10:10:23 -0400 Subject: [PATCH 15/20] flake clean --- zarr/hierarchy.py | 3 ++- zarr/storage.py | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 6cbab193d1..2c5d25be82 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -1146,7 +1146,8 @@ def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=N clobber = mode != 'r' store = _normalize_store_arg(store, clobber=clobber, storage_options=storage_options) if chunk_store is not None: - chunk_store = _normalize_store_arg(chunk_store, clobber=clobber, storage_options=storage_options) + chunk_store = _normalize_store_arg(chunk_store, clobber=clobber, + storage_options=storage_options) path = normalize_storage_path(path) # ensure store is initialized diff --git a/zarr/storage.py b/zarr/storage.py index 4381e9f41d..cd4b37eee2 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -21,7 +21,6 @@ import multiprocessing import operator import os -import json import re import shutil import sys From 23bce6716adc6845a2ea9394dd6188e475523400 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 10 Sep 2020 12:07:25 -0400 Subject: [PATCH 16/20] Add FSStore to API doc --- docs/api/storage.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/api/storage.rst b/docs/api/storage.rst index a2aff3b6ef..4321837449 100644 --- a/docs/api/storage.rst +++ b/docs/api/storage.rst @@ -35,6 +35,8 @@ Storage (``zarr.storage``) .. autoclass:: ABSStore +.. autoclass:: FSStore + .. autoclass:: ConsolidatedMetadataStore .. autofunction:: init_array From 1dadee64e7e921ee1286a5eb649f6cb8b1b3ee08 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 10 Sep 2020 12:33:24 -0400 Subject: [PATCH 17/20] Coverage --- zarr/tests/test_storage.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 28d0294075..f4c45862d5 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -879,6 +879,37 @@ def test_create(self): with pytest.raises(PermissionError): g.data[:] = 1 + def test_read_only(self): + path = tempfile.mkdtemp() + atexit.register(atexit_rmtree, path) + store = FSStore(path) + store['foo'] = b"bar" + + store = FSStore(path, mode='r') + + with pytest.raises(PermissionError): + store['foo'] = b"hex" + + with pytest.raises(PermissionError): + del store['foo'] + + with pytest.raises(PermissionError): + store.clear() + + with pytest.raises(PermissionError): + store.rmdir("anydir") + + assert store['foo'] == b"bar" + + filepath = os.path.join(path, "foo") + with pytest.raises(ValueError): + FSStore(filepath, mode='r') + + def test_eq(self): + store1 = FSStore("anypath") + store2 = FSStore("anypath") + assert store1 == store2 + class TestNestedDirectoryStore(TestDirectoryStore, unittest.TestCase): From 51ec63413644b54fc6e6a103f3037adfd804bb90 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 10 Sep 2020 16:16:32 -0400 Subject: [PATCH 18/20] kick From 2da30775c0367d982023db06ee06aeb9b4824a0f Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Fri, 11 Sep 2020 09:36:14 -0400 Subject: [PATCH 19/20] Add s3 test --- requirements_dev_optional.txt | 3 +- zarr/tests/test_storage.py | 52 +++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 5777ad56a0..e37d27b6cf 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -18,4 +18,5 @@ pytest-cov==2.7.1 pytest-doctestplus==0.4.0 pytest-remotedata==0.3.2 h5py==2.10.0 -s3fs==0.3.4; python_version > '3.0' +s3fs==0.5.0; python_version > '3.6' +moto>=1.3.14; python_version > '3.6' diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index f4c45862d5..d8730b2e7d 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -910,6 +910,58 @@ def test_eq(self): store2 = FSStore("anypath") assert store1 == store2 + @pytest.mark.usefixtures("s3") + def test_s3(self): + import zarr + g = zarr.open_group("s3://test/out.zarr", mode='w', + storage_options=self.s3so) + a = g.create_dataset("data", shape=(8,)) + a[:4] = [0, 1, 2, 3] + + g = zarr.open_group("s3://test/out.zarr", mode='r', + storage_options=self.s3so) + + assert g.data[:].tolist() == [0, 1, 2, 3, 0, 0, 0, 0] + + +@pytest.fixture() +def s3(request): + # writable local S3 system + import shlex + import subprocess + import time + if "BOTO_CONFIG" not in os.environ: # pragma: no cover + os.environ["BOTO_CONFIG"] = "/dev/null" + if "AWS_ACCESS_KEY_ID" not in os.environ: # pragma: no cover + os.environ["AWS_ACCESS_KEY_ID"] = "foo" + if "AWS_SECRET_ACCESS_KEY" not in os.environ: # pragma: no cover + os.environ["AWS_SECRET_ACCESS_KEY"] = "bar" + requests = pytest.importorskip("requests") + s3fs = pytest.importorskip("s3fs") + pytest.importorskip("moto") + + port = 5555 + endpoint_uri = 'http://127.0.0.1:%s/' % port + proc = subprocess.Popen(shlex.split("moto_server s3 -p %s" % port)) + + timeout = 5 + while timeout > 0: + try: + r = requests.get(endpoint_uri) + if r.ok: + break + except Exception: # pragma: no cover + pass + timeout -= 0.1 # pragma: no cover + time.sleep(0.1) # pragma: no cover + s3so = dict(client_kwargs={'endpoint_url': endpoint_uri}) + s3 = s3fs.S3FileSystem(anon=False, **s3so) + s3.mkdir("test") + request.cls.s3so = s3so + yield + proc.terminate() + proc.wait() + class TestNestedDirectoryStore(TestDirectoryStore, unittest.TestCase): From 7ad67e16e7113827da381ac52507a36fbb76ce7d Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Fri, 11 Sep 2020 10:08:11 -0400 Subject: [PATCH 20/20] Add flask to test reqs (needed for moto server) --- requirements_dev_optional.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index e37d27b6cf..4fb3c21b5b 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -20,3 +20,4 @@ pytest-remotedata==0.3.2 h5py==2.10.0 s3fs==0.5.0; python_version > '3.6' moto>=1.3.14; python_version > '3.6' +flask