From d26923a220bfc02f045ff5373f09a1285872c375 Mon Sep 17 00:00:00 2001 From: jmoore Date: Mon, 14 Jun 2021 13:33:53 +0200 Subject: [PATCH 01/15] Drop skip_if_nested_chunks from test_storage.py --- zarr/tests/test_storage.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index e9b997b335..b5c738bc29 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -803,12 +803,16 @@ def test_pickle(self): class TestDirectoryStore(StoreTests): - def create_store(self, normalize_keys=False, **kwargs): - skip_if_nested_chunks(**kwargs) - + def create_store(self, + normalize_keys=False, + dimension_separator=".", + **kwargs): path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) - store = DirectoryStore(path, normalize_keys=normalize_keys, **kwargs) + store = DirectoryStore(path, + normalize_keys=normalize_keys, + dimension_separator=dimension_separator, + **kwargs) return store def test_filesystem_path(self): From c06476df551a339eb4528e07cc1d8adc9ffa6372 Mon Sep 17 00:00:00 2001 From: jmoore Date: Mon, 14 Jun 2021 13:54:41 +0200 Subject: [PATCH 02/15] Add failing nested test --- zarr/tests/test_storage.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index b5c738bc29..cf0172bdd7 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -890,6 +890,23 @@ def mock_walker_no_slash(_path): ) assert res == {'.zgroup', 'g1/.zgroup', 'd1/.zarray'} + def test_read_nested(self): + import zarr + path = tempfile.mkdtemp() + atexit.register(atexit_rmtree, path) + + store1 = NestedDirectoryStore(path) + g1 = zarr.open(store=store1, mode="w") + data = g1.create_dataset("data", data=[[1, 2], [3, 4]]) + + store2 = NestedDirectoryStore(path) + g2 = zarr.open(store=store2) + assert g2.data[0][0] == 1 + + store3 = DirectoryStore(path) + g3 = zarr.open(store=store3) + assert g3.data[0][0] == 1 + @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestFSStore(StoreTests): From ce8b2f052c8b493acbb083ef27fd099cff6b76f9 Mon Sep 17 00:00:00 2001 From: jmoore Date: Mon, 14 Jun 2021 13:34:08 +0200 Subject: [PATCH 03/15] Make DirectoryStore dimension_separator aware --- zarr/storage.py | 70 +++++++++++++++++++++++-------------------------- 1 file changed, 33 insertions(+), 37 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index d2de2cda4c..4ce8ebc120 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -803,6 +803,10 @@ def __init__(self, path, normalize_keys=False, dimension_separator=None): def _normalize_key(self, key): return key.lower() if self.normalize_keys else key + def _optionally_nested(self, key): + return self._dimension_separator == "/" and \ + _nested_map_ckey(key) or key + def _fromfile(self, fn): """ Read data from a file @@ -838,6 +842,7 @@ def _tofile(self, a, fn): f.write(a) def __getitem__(self, key): + key = self._optionally_nested(key) key = self._normalize_key(key) filepath = os.path.join(self.path, key) if os.path.isfile(filepath): @@ -846,6 +851,7 @@ def __getitem__(self, key): raise KeyError(key) def __setitem__(self, key, value): + key = self._optionally_nested(key) key = self._normalize_key(key) # coerce to flat, contiguous array (ideally without copying) @@ -887,6 +893,7 @@ def __setitem__(self, key, value): os.remove(temp_path) def __delitem__(self, key): + key = self._optionally_nested(key) key = self._normalize_key(key) path = os.path.join(self.path, key) if os.path.isfile(path): @@ -899,6 +906,7 @@ def __delitem__(self, key): raise KeyError(key) def __contains__(self, key): + key = self._optionally_nested(key) key = self._normalize_key(key) file_path = os.path.join(self.path, key) return os.path.isfile(file_path) @@ -947,12 +955,37 @@ def dir_path(self, path=None): return dir_path def listdir(self, path=None): + return self._dimension_separator == "/" and \ + self._nested_listdir(path) or self._flat_listdir(path) + + def _flat_listdir(self, path=None): dir_path = self.dir_path(path) if os.path.isdir(dir_path): return sorted(os.listdir(dir_path)) else: return [] + def _nested_listdir(self, path=None): + children = self._flat_listdir(path=path) + if array_meta_key in children: + # special handling of directories containing an array to map nested chunk + # keys back to standard chunk keys + new_children = [] + root_path = self.dir_path(path) + for entry in children: + entry_path = os.path.join(root_path, entry) + if _prog_number.match(entry) and os.path.isdir(entry_path): + for dir_path, _, file_names in os.walk(entry_path): + for file_name in file_names: + file_path = os.path.join(dir_path, file_name) + rel_path = file_path.split(root_path + os.path.sep)[1] + new_children.append(rel_path.replace(os.path.sep, '.')) + else: + new_children.append(entry) + return sorted(new_children) + else: + return children + def rename(self, src_path, dst_path): store_src_path = normalize_storage_path(src_path) store_dst_path = normalize_storage_path(dst_path) @@ -1314,49 +1347,12 @@ def __init__(self, path, normalize_keys=False, dimension_separator="/"): "NestedDirectoryStore only supports '/' as dimension_separator") self._dimension_separator = dimension_separator - def __getitem__(self, key): - key = _nested_map_ckey(key) - return super().__getitem__(key) - - def __setitem__(self, key, value): - key = _nested_map_ckey(key) - super().__setitem__(key, value) - - def __delitem__(self, key): - key = _nested_map_ckey(key) - super().__delitem__(key) - - def __contains__(self, key): - key = _nested_map_ckey(key) - return super().__contains__(key) - def __eq__(self, other): return ( isinstance(other, NestedDirectoryStore) and self.path == other.path ) - def listdir(self, path=None): - children = super().listdir(path=path) - if array_meta_key in children: - # special handling of directories containing an array to map nested chunk - # keys back to standard chunk keys - new_children = [] - root_path = self.dir_path(path) - for entry in children: - entry_path = os.path.join(root_path, entry) - if _prog_number.match(entry) and os.path.isdir(entry_path): - for dir_path, _, file_names in os.walk(entry_path): - for file_name in file_names: - file_path = os.path.join(dir_path, file_name) - rel_path = file_path.split(root_path + os.path.sep)[1] - new_children.append(rel_path.replace(os.path.sep, '.')) - else: - new_children.append(entry) - return sorted(new_children) - else: - return children - # noinspection PyPep8Naming class ZipStore(MutableMapping): From e1835667116b3f86fe002183a6b527565743795c Mon Sep 17 00:00:00 2001 From: jmoore Date: Mon, 14 Jun 2021 20:21:13 +0200 Subject: [PATCH 04/15] Migrate key logic to core rather than storage Previous tests (now commented out) used logic in the store classes to convert "0/0" keys into "0.0" keys, forcing the store to be aware of array details. This tries to swap the logic so that stores are responsible for passing dimension separator values down to the arrays only. Since arrays can also get the dimension_separator value from a .zarray file they are now in charge. --- zarr/core.py | 2 +- zarr/storage.py | 8 -------- zarr/tests/test_storage.py | 8 ++++---- 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index 3df8043000..ba3f2c1e2d 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -1952,7 +1952,7 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): return self._encode_chunk(chunk) def _chunk_key(self, chunk_coords): - return self._key_prefix + '.'.join(map(str, chunk_coords)) + return self._key_prefix + self._dimension_separator.join(map(str, chunk_coords)) def _decode_chunk(self, cdata, start=None, nitems=None, expected_shape=None): # decompress diff --git a/zarr/storage.py b/zarr/storage.py index 4ce8ebc120..42c60d50a1 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -803,10 +803,6 @@ def __init__(self, path, normalize_keys=False, dimension_separator=None): def _normalize_key(self, key): return key.lower() if self.normalize_keys else key - def _optionally_nested(self, key): - return self._dimension_separator == "/" and \ - _nested_map_ckey(key) or key - def _fromfile(self, fn): """ Read data from a file @@ -842,7 +838,6 @@ def _tofile(self, a, fn): f.write(a) def __getitem__(self, key): - key = self._optionally_nested(key) key = self._normalize_key(key) filepath = os.path.join(self.path, key) if os.path.isfile(filepath): @@ -851,7 +846,6 @@ def __getitem__(self, key): raise KeyError(key) def __setitem__(self, key, value): - key = self._optionally_nested(key) key = self._normalize_key(key) # coerce to flat, contiguous array (ideally without copying) @@ -893,7 +887,6 @@ def __setitem__(self, key, value): os.remove(temp_path) def __delitem__(self, key): - key = self._optionally_nested(key) key = self._normalize_key(key) path = os.path.join(self.path, key) if os.path.isfile(path): @@ -906,7 +899,6 @@ def __delitem__(self, key): raise KeyError(key) def __contains__(self, key): - key = self._optionally_nested(key) key = self._normalize_key(key) file_path = os.path.join(self.path, key) return os.path.isfile(file_path) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index cf0172bdd7..938746ca40 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1165,10 +1165,10 @@ def test_chunk_nesting(self): # any path where last segment looks like a chunk key gets special handling store['0.0'] = b'xxx' assert b'xxx' == store['0.0'] - assert b'xxx' == store['0/0'] + # assert b'xxx' == store['0/0'] store['foo/10.20.30'] = b'yyy' assert b'yyy' == store['foo/10.20.30'] - assert b'yyy' == store['foo/10/20/30'] + # assert b'yyy' == store['foo/10/20/30'] store['42'] = b'zzz' assert b'zzz' == store['42'] @@ -1213,12 +1213,12 @@ def test_chunk_nesting(self): store['0.0'] = b'xxx' assert '0.0' in store assert b'xxx' == store['0.0'] - assert b'xxx' == store['0/0'] + # assert b'xxx' == store['0/0'] store['foo/10.20.30'] = b'yyy' assert 'foo/10.20.30' in store assert b'yyy' == store['foo/10.20.30'] # N5 reverses axis order - assert b'yyy' == store['foo/30/20/10'] + # assert b'yyy' == store['foo/30/20/10'] store['42'] = b'zzz' assert '42' in store assert b'zzz' == store['42'] From 449a67fc943d3555739f5d61a54293911ea39c1a Mon Sep 17 00:00:00 2001 From: jmoore Date: Mon, 14 Jun 2021 21:45:08 +0200 Subject: [PATCH 05/15] Fix linting in new test --- zarr/tests/test_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 938746ca40..a690a36d21 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -897,7 +897,7 @@ def test_read_nested(self): store1 = NestedDirectoryStore(path) g1 = zarr.open(store=store1, mode="w") - data = g1.create_dataset("data", data=[[1, 2], [3, 4]]) + g1.create_dataset("data", data=[[1, 2], [3, 4]]) store2 = NestedDirectoryStore(path) g2 = zarr.open(store=store2) From 2e4f4d7990de3fdfcb3d200ca29536b622db7728 Mon Sep 17 00:00:00 2001 From: jmoore Date: Thu, 17 Jun 2021 13:41:10 +0200 Subject: [PATCH 06/15] Extend the test suite for dim_sep --- fixture/flat/.zarray | 22 +++++++++ fixture/flat/0.0 | Bin 0 -> 48 bytes fixture/nested/.zarray | 23 ++++++++++ fixture/nested/0/0 | Bin 0 -> 48 bytes zarr/tests/test_dim_separator.py | 75 +++++++++++++++++++++++++++++++ zarr/tests/test_storage.py | 17 ------- 6 files changed, 120 insertions(+), 17 deletions(-) create mode 100644 fixture/flat/.zarray create mode 100644 fixture/flat/0.0 create mode 100644 fixture/nested/.zarray create mode 100644 fixture/nested/0/0 create mode 100644 zarr/tests/test_dim_separator.py diff --git a/fixture/flat/.zarray b/fixture/flat/.zarray new file mode 100644 index 0000000000..8ec79419da --- /dev/null +++ b/fixture/flat/.zarray @@ -0,0 +1,22 @@ +{ + "chunks": [ + 2, + 2 + ], + "compressor": { + "blocksize": 0, + "clevel": 5, + "cname": "lz4", + "id": "blosc", + "shuffle": 1 + }, + "dtype": " Date: Mon, 5 Jul 2021 19:01:59 -0400 Subject: [PATCH 07/15] add n5fsstore and tests --- zarr/n5.py | 246 ++++++++++++++++++++++++++++++++++++- zarr/storage.py | 10 +- zarr/tests/test_storage.py | 82 ++++++++++++- 3 files changed, 332 insertions(+), 6 deletions(-) diff --git a/zarr/n5.py b/zarr/n5.py index fa01005302..4d53902ac8 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -11,7 +11,7 @@ from numcodecs.registry import get_codec, register_codec from .meta import ZARR_FORMAT, json_dumps, json_loads -from .storage import NestedDirectoryStore, _prog_ckey, _prog_number +from .storage import NestedDirectoryStore, _prog_ckey, _prog_number, FSStore, normalize_storage_path from .storage import array_meta_key as zarr_array_meta_key from .storage import attrs_key as zarr_attrs_key from .storage import group_meta_key as zarr_group_meta_key @@ -281,6 +281,250 @@ def _contains_attrs(self, path): return len(attrs) > 0 +class N5FSStore(FSStore): + """Implentation of the N5 format (https://github.com/saalfeldlab/n5) using `fsspec`, + which allows storage on a variety of filesystems. Based on `zarr.N5Store`. + Parameters + ---------- + path : string + Location of directory to use as the root of the storage hierarchy. + normalize_keys : bool, optional + If True, all store keys will be normalized to use lower case characters + (e.g. 'foo' and 'FOO' will be treated as equivalent). This can be + useful to avoid potential discrepancies between case-senstive and + case-insensitive file system. Default value is False. + Examples + -------- + Store a single array:: + >>> import zarr + >>> store = zarr.N5FSStore('data/array.n5') + >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) + >>> z[...] = 42 + Store a group:: + >>> store = zarr.N5FSStore('data/group.n5') + >>> root = zarr.group(store=store, overwrite=True) + >>> foo = root.create_group('foo') + >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) + >>> bar[...] = 42 + Notes + ----- + This is an experimental feature. + Safe to write in multiple threads or processes. + """ + + def __init__(self, *args, **kwargs): + kwargs["key_separator"] = "/" + kwargs["meta_keys"] = ("attributes.json",) + super().__init__(*args, **kwargs) + + def _normalize_key(self, key): + if is_chunk_key(key): + key = invert_chunk_coords(key) + + key = normalize_storage_path(key).lstrip("/") + if key: + *bits, end = key.split("/") + + if end not in self._META_KEYS: + end = end.replace(".", self.key_separator) + key = "/".join(bits + [end]) + return key.lower() if self.normalize_keys else key + + def __getitem__(self, key): + if key.endswith(zarr_group_meta_key): + + key = key.replace(zarr_group_meta_key, self._META_KEYS[0]) + value = group_metadata_to_zarr(self._load_n5_attrs(key)) + + return json_dumps(value) + + elif key.endswith(zarr_array_meta_key): + + key = key.replace(zarr_array_meta_key, self._META_KEYS[0]) + value = array_metadata_to_zarr(self._load_n5_attrs(key)) + + return json_dumps(value) + + elif key.endswith(zarr_attrs_key): + + key = key.replace(zarr_attrs_key, self._META_KEYS[0]) + value = attrs_to_zarr(self._load_n5_attrs(key)) + + if len(value) == 0: + raise KeyError(key) + else: + return json_dumps(value) + return super().__getitem__(key) + + def __setitem__(self, key, value): + if key.endswith(zarr_group_meta_key): + + key = key.replace(zarr_group_meta_key, self._META_KEYS[0]) + + n5_attrs = self._load_n5_attrs(key) + n5_attrs.update(**group_metadata_to_n5(json_loads(value))) + + value = json_dumps(n5_attrs) + + elif key.endswith(zarr_array_meta_key): + + key = key.replace(zarr_array_meta_key, self._META_KEYS[0]) + + n5_attrs = self._load_n5_attrs(key) + n5_attrs.update(**array_metadata_to_n5(json_loads(value))) + + value = json_dumps(n5_attrs) + + elif key.endswith(zarr_attrs_key): + + key = key.replace(zarr_attrs_key, self._META_KEYS[0]) + + n5_attrs = self._load_n5_attrs(key) + zarr_attrs = json_loads(value) + + for k in n5_keywords: + if k in zarr_attrs.keys(): + raise ValueError( + "Can not set attribute %s, this is a reserved N5 keyword" % k + ) + + # replace previous user attributes + for k in list(n5_attrs.keys()): + if k not in n5_keywords: + del n5_attrs[k] + + # add new user attributes + n5_attrs.update(**zarr_attrs) + + value = json_dumps(n5_attrs) + + super().__setitem__(key, value) + + def __delitem__(self, key): + + if key.endswith(zarr_group_meta_key): # pragma: no cover + key = key.replace(zarr_group_meta_key, self._META_KEYS[0]) + elif key.endswith(zarr_array_meta_key): # pragma: no cover + key = key.replace(zarr_array_meta_key, self._META_KEYS[0]) + elif key.endswith(zarr_attrs_key): # pragma: no cover + key = key.replace(zarr_attrs_key, self._META_KEYS[0]) + + super().__delitem__(key) + + def __contains__(self, key): + if key.endswith(zarr_group_meta_key): + + key = key.replace(zarr_group_meta_key, self._META_KEYS[0]) + if key not in self: + return False + # group if not a dataset (attributes do not contain 'dimensions') + return "dimensions" not in self._load_n5_attrs(key) + + elif key.endswith(zarr_array_meta_key): + + key = key.replace(zarr_array_meta_key, self._META_KEYS[0]) + # array if attributes contain 'dimensions' + return "dimensions" in self._load_n5_attrs(key) + + elif key.endswith(zarr_attrs_key): + + key = key.replace(zarr_attrs_key, self._META_KEYS[0]) + return self._contains_attrs(key) + + return super().__contains__(key) + + def __eq__(self, other): + return isinstance(other, N5FSStore) and self.path == other.path + + def listdir(self, path=None): + + if path is not None: + path = invert_chunk_coords(path) + + # We can't use NestedDirectoryStore's listdir, as it requires + # array_meta_key to be present in array directories, which this store + # doesn't provide. + children = super().listdir(path=path) + if self._is_array(path): + + # replace n5 attribute file with respective zarr attribute files + children.remove(self._META_KEYS[0]) + children.append(zarr_array_meta_key) + if self._contains_attrs(path): + children.append(zarr_attrs_key) + + # special handling of directories containing an array to map + # inverted nested chunk keys back to standard chunk keys + new_children = [] + root_path = self.dir_path(path) + for entry in children: + entry_path = os.path.join(root_path, entry) + if _prog_number.match(entry) and self.fs.isdir(entry_path): + for dir_path, _, file_names in self.fs.walk(entry_path): + for file_name in file_names: + file_path = os.path.join(dir_path, file_name) + rel_path = file_path.split(root_path + os.path.sep)[1] + new_child = rel_path.replace(os.path.sep, ".") + new_children.append(invert_chunk_coords(new_child)) + else: + new_children.append(entry) + + return sorted(new_children) + + elif self._is_group(path): + + # replace n5 attribute file with respective zarr attribute files + children.remove(self._META_KEYS[0]) + children.append(zarr_group_meta_key) + if self._contains_attrs(path): # pragma: no cover + children.append(zarr_attrs_key) + + return sorted(children) + + else: + + return children + + def _load_n5_attrs(self, path): + try: + s = super().__getitem__(path) + return json_loads(s) + except KeyError: + return {} + + def _is_group(self, path): + + if path is None: + attrs_key = self._META_KEYS[0] + else: + attrs_key = os.path.join(path, self._META_KEYS[0]) + + n5_attrs = self._load_n5_attrs(attrs_key) + return len(n5_attrs) > 0 and "dimensions" not in n5_attrs + + def _is_array(self, path): + + if path is None: + attrs_key = self._META_KEYS[0] + else: + attrs_key = os.path.join(path, self._META_KEYS[0]) + + return "dimensions" in self._load_n5_attrs(attrs_key) + + def _contains_attrs(self, path): + + if path is None: + attrs_key = self._META_KEYS[0] + else: + if not path.endswith(self._META_KEYS[0]): + attrs_key = os.path.join(path, self._META_KEYS[0]) + else: # pragma: no cover + attrs_key = path + + attrs = attrs_to_zarr(self._load_n5_attrs(attrs_key)) + return len(attrs) > 0 + + def is_chunk_key(key): segments = list(key.split('/')) if segments: diff --git a/zarr/storage.py b/zarr/storage.py index c332ee02f5..c6a09c2475 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1019,14 +1019,15 @@ class FSStore(MutableMapping): exceptions : list of Exception subclasses When accessing data, any of these exceptions will be treated as a missing key + meta_keys : list or tuple of str + Defaults to the zarr meta keys, i.e. (".zarray", ".zgroup", ".zattrs"). storage_options : passed to the fsspec implementation """ - _META_KEYS = (attrs_key, group_meta_key, array_meta_key) - def __init__(self, url, normalize_keys=True, key_separator='.', mode='w', exceptions=(KeyError, PermissionError, IOError), + meta_keys=(array_meta_key, group_meta_key, attrs_key), **storage_options): import fsspec self.normalize_keys = normalize_keys @@ -1036,6 +1037,7 @@ def __init__(self, url, normalize_keys=True, key_separator='.', self.path = self.fs._strip_protocol(url) self.mode = mode self.exceptions = exceptions + self._META_KEYS = meta_keys if self.fs.exists(self.path) and not self.fs.isdir(self.path): raise FSPathExistNotDir(url) @@ -1044,7 +1046,7 @@ def _normalize_key(self, key): if key: *bits, end = key.split('/') - if end not in FSStore._META_KEYS: + if end not in self._META_KEYS: end = end.replace('.', self.key_separator) key = '/'.join(bits + [end]) @@ -1052,7 +1054,7 @@ def _normalize_key(self, key): def getitems(self, keys, **kwargs): keys = [self._normalize_key(key) for key in keys] - return self.map.getitems(keys, on_error="omit") + return self.map.getitems(keys, **kwargs) def __getitem__(self, key): key = self._normalize_key(key) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index a6598f2781..f0b58e6352 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -22,7 +22,7 @@ from zarr.meta import (ZARR_FORMAT, decode_array_metadata, decode_group_metadata, encode_array_metadata, encode_group_metadata) -from zarr.n5 import N5Store +from zarr.n5 import N5Store, N5FSStore from zarr.storage import (ABSStore, ConsolidatedMetadataStore, DBMStore, DictStore, DirectoryStore, LMDBStore, LRUStoreCache, MemoryStore, MongoDBStore, NestedDirectoryStore, @@ -1197,6 +1197,86 @@ def test_filters(self): init_array(store, shape=1000, chunks=100, filters=filters) +@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") +class TestN5FSStore(TestFSStore, unittest.TestCase): + def create_store(self, normalize_keys=False): + path = tempfile.mkdtemp(suffix='.n5') + atexit.register(atexit_rmtree, path) + store = N5FSStore(path, normalize_keys=normalize_keys) + return store + + def test_equal(self): + store_a = self.create_store() + store_b = N5FSStore(store_a.path) + assert store_a == store_b + + def test_init_array(self): + store = self.create_store() + init_array(store, shape=1000, chunks=100) + + # check metadata + assert array_meta_key in store + meta = decode_array_metadata(store[array_meta_key]) + assert ZARR_FORMAT == meta['zarr_format'] + assert (1000,) == meta['shape'] + assert (100,) == meta['chunks'] + assert np.dtype(None) == meta['dtype'] + # N5Store wraps the actual compressor + compressor_config = meta['compressor']['compressor_config'] + assert default_compressor.get_config() == compressor_config + # N5Store always has a fill value of 0 + assert meta['fill_value'] == 0 + + def test_init_array_path(self): + path = 'foo/bar' + store = self.create_store() + init_array(store, shape=1000, chunks=100, path=path) + + # check metadata + key = path + '/' + array_meta_key + assert key in store + meta = decode_array_metadata(store[key]) + assert ZARR_FORMAT == meta['zarr_format'] + assert (1000,) == meta['shape'] + assert (100,) == meta['chunks'] + assert np.dtype(None) == meta['dtype'] + # N5Store wraps the actual compressor + compressor_config = meta['compressor']['compressor_config'] + assert default_compressor.get_config() == compressor_config + # N5Store always has a fill value of 0 + assert meta['fill_value'] == 0 + + def test_init_array_compat(self): + store = self.create_store() + init_array(store, shape=1000, chunks=100, compressor='none') + meta = decode_array_metadata(store[array_meta_key]) + # N5Store wraps the actual compressor + compressor_config = meta['compressor']['compressor_config'] + assert compressor_config is None + + def test_init_array_overwrite(self): + self._test_init_array_overwrite('C') + + def test_init_array_overwrite_path(self): + self._test_init_array_overwrite_path('C') + + def test_init_array_overwrite_chunk_store(self): + self._test_init_array_overwrite_chunk_store('C') + + def test_init_group_overwrite(self): + self._test_init_group_overwrite('C') + + def test_init_group_overwrite_path(self): + self._test_init_group_overwrite_path('C') + + def test_init_group_overwrite_chunk_store(self): + self._test_init_group_overwrite_chunk_store('C') + + def test_key_separator(self): + with pytest.raises(TypeError): + self.create_store(key_separator='.') + + @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestNestedFSStore(TestNestedDirectoryStore): From bb1121c721aad0a13afabc773c81defd341a9530 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 5 Jul 2021 20:22:53 -0400 Subject: [PATCH 08/15] slightly smarter kwarg interception --- zarr/n5.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/zarr/n5.py b/zarr/n5.py index 3f98f850bf..cfbe450343 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -313,9 +313,19 @@ class N5FSStore(FSStore): """ def __init__(self, *args, **kwargs): - kwargs["key_separator"] = "/" - kwargs["meta_keys"] = ("attributes.json",) - super().__init__(*args, **kwargs) + if 'dimension_separator' in kwargs: + kwargs.pop('dimension_separator') + warnings.warn('Keyword argument `dimension_separator` will be ignored') + dimension_separator = "/" + + if 'meta_keys' in kwargs: + kwargs.pop('meta_keys') + warnings.warn('Keyword argument `meta_keys` will be ignored') + meta_keys = ("attributes.json",) + super().__init__(*args, + dimension_separator=dimension_separator, + meta_keys=meta_keys, + **kwargs) def _normalize_key(self, key): if is_chunk_key(key): From be8f37fae8d154aa40843f407bf61e1c0ae73f59 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 5 Jul 2021 20:23:36 -0400 Subject: [PATCH 09/15] remove outdated unittest ref and fix the name of a test func --- zarr/tests/test_storage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 35c36a2611..afd4333c32 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1288,7 +1288,7 @@ def test_filters(self): @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -class TestN5FSStore(TestFSStore, unittest.TestCase): +class TestN5FSStore(TestFSStore): def create_store(self, normalize_keys=False): path = tempfile.mkdtemp(suffix='.n5') atexit.register(atexit_rmtree, path) @@ -1362,7 +1362,7 @@ def test_init_group_overwrite_path(self): def test_init_group_overwrite_chunk_store(self): self._test_init_group_overwrite_chunk_store('C') - def test_key_separator(self): + def test_dimension_separator(self): with pytest.raises(TypeError): self.create_store(key_separator='.') From 95b257366b5752adee0f6f57d4491500ba1ebf6b Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 5 Jul 2021 20:24:16 -0400 Subject: [PATCH 10/15] fix massive string block and fix default key_separator kwarg for FSStore --- zarr/storage.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 23819064aa..8833b82dbb 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1036,11 +1036,13 @@ class FSStore(MutableMapping): When accessing data, any of these exceptions will be treated as a missing key meta_keys : list or tuple of str - Defaults to the zarr meta keys, i.e. (".zarray", ".zgroup", ".zattrs"). + Reserved keys for metadata. + Defaults to the zarr metatadata keys, i.e. (".zarray", ".zgroup", ".zattrs"). dimension_separator : {'.', '/'}, optional Separator placed between the dimensions of a chunk. storage_options : passed to the fsspec implementation - def __init__(self, url, normalize_keys=True, key_separator='.', + """ + def __init__(self, url, normalize_keys=True, key_separator=None, mode='w', exceptions=(KeyError, PermissionError, IOError), meta_keys=(array_meta_key, group_meta_key, attrs_key), @@ -1065,7 +1067,6 @@ def __init__(self, url, normalize_keys=True, key_separator='.', # Pass attributes to array creation self._dimension_separator = dimension_separator - if self.fs.exists(self.path) and not self.fs.isdir(self.path): raise FSPathExistNotDir(url) From ceba78d0a5dec53e81032012353346722dd415e8 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 6 Jul 2021 09:21:46 -0400 Subject: [PATCH 11/15] flake8 --- zarr/n5.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/zarr/n5.py b/zarr/n5.py index cfbe450343..b654c7dd31 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -317,15 +317,15 @@ def __init__(self, *args, **kwargs): kwargs.pop('dimension_separator') warnings.warn('Keyword argument `dimension_separator` will be ignored') dimension_separator = "/" - + if 'meta_keys' in kwargs: kwargs.pop('meta_keys') warnings.warn('Keyword argument `meta_keys` will be ignored') meta_keys = ("attributes.json",) - super().__init__(*args, - dimension_separator=dimension_separator, - meta_keys=meta_keys, - **kwargs) + super().__init__(*args, + dimension_separator=dimension_separator, + meta_keys=meta_keys, + **kwargs) def _normalize_key(self, key): if is_chunk_key(key): From 02ea91c949514d9fddbcd82416cfa98f7ee0b9f1 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 6 Jul 2021 10:47:45 -0400 Subject: [PATCH 12/15] promote n5store to toplevel import and fix examples in docstring --- zarr/__init__.py | 2 +- zarr/n5.py | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/zarr/__init__.py b/zarr/__init__.py index 8079bab071..7558ce77de 100644 --- a/zarr/__init__.py +++ b/zarr/__init__.py @@ -9,7 +9,7 @@ zeros_like) from zarr.errors import CopyError, MetadataError from zarr.hierarchy import Group, group, open_group -from zarr.n5 import N5Store +from zarr.n5 import N5Store, N5FSStore from zarr.storage import (ABSStore, DBMStore, DictStore, DirectoryStore, LMDBStore, LRUStoreCache, MemoryStore, MongoDBStore, NestedDirectoryStore, RedisStore, SQLiteStore, diff --git a/zarr/n5.py b/zarr/n5.py index b654c7dd31..7f7650bd89 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -293,19 +293,24 @@ class N5FSStore(FSStore): (e.g. 'foo' and 'FOO' will be treated as equivalent). This can be useful to avoid potential discrepancies between case-senstive and case-insensitive file system. Default value is False. + Examples -------- Store a single array:: + >>> import zarr - >>> store = zarr.N5FSStore('data/array.n5') + >>> store = zarr.N5FSStore('data/array.n5', auto_mkdir=True) >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) >>> z[...] = 42 + Store a group:: - >>> store = zarr.N5FSStore('data/group.n5') + + >>> store = zarr.N5FSStore('data/group.n5', auto_mkdir=True) >>> root = zarr.group(store=store, overwrite=True) >>> foo = root.create_group('foo') >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) >>> bar[...] = 42 + Notes ----- This is an experimental feature. From 68adca50b62441dabc6b3f48364fe3dcf35eeb69 Mon Sep 17 00:00:00 2001 From: jmoore Date: Tue, 17 Aug 2021 14:07:53 +0200 Subject: [PATCH 13/15] Try fsspec 2021.7 (see #802) --- requirements_dev_optional.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index b037f0e77f..8c67a30abb 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -17,6 +17,5 @@ flake8==3.9.2 pytest-cov==2.12.1 pytest-doctestplus==0.10.1 h5py==3.3.0 -s3fs==2021.6.0 -fsspec==2021.6.0 +fsspec==2021.7.0 moto[server]>=1.3.14 From f2f75b7fb4ff92eea2b1df41c31feaafd4687301 Mon Sep 17 00:00:00 2001 From: jmoore Date: Tue, 17 Aug 2021 14:25:10 +0200 Subject: [PATCH 14/15] Revert "Try fsspec 2021.7 (see #802)" This reverts commit 68adca50b62441dabc6b3f48364fe3dcf35eeb69. --- requirements_dev_optional.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 8c67a30abb..b037f0e77f 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -17,5 +17,6 @@ flake8==3.9.2 pytest-cov==2.12.1 pytest-doctestplus==0.10.1 h5py==3.3.0 -fsspec==2021.7.0 +s3fs==2021.6.0 +fsspec==2021.6.0 moto[server]>=1.3.14 From a57b3bc8930b67418e21306b87169b8d27b00805 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 17 Aug 2021 14:01:03 -0400 Subject: [PATCH 15/15] Add missing core tests for N5FSStore, and rchanges required for making them pass --- zarr/n5.py | 559 ++++++++++++++++++------------------- zarr/storage.py | 20 +- zarr/tests/test_core.py | 18 +- zarr/tests/test_storage.py | 4 +- 4 files changed, 308 insertions(+), 293 deletions(-) diff --git a/zarr/n5.py b/zarr/n5.py index 7f7650bd89..99ec60f299 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -11,7 +11,7 @@ from numcodecs.registry import get_codec, register_codec from .meta import ZARR_FORMAT, json_dumps, json_loads -from .storage import NestedDirectoryStore, _prog_ckey, _prog_number, FSStore, normalize_storage_path +from .storage import NestedDirectoryStore, _prog_ckey, _prog_number, normalize_storage_path from .storage import array_meta_key as zarr_array_meta_key from .storage import attrs_key as zarr_attrs_key from .storage import group_meta_key as zarr_group_meta_key @@ -281,343 +281,338 @@ def _contains_attrs(self, path): return len(attrs) > 0 -class N5FSStore(FSStore): - """Implentation of the N5 format (https://github.com/saalfeldlab/n5) using `fsspec`, - which allows storage on a variety of filesystems. Based on `zarr.N5Store`. - Parameters - ---------- - path : string - Location of directory to use as the root of the storage hierarchy. - normalize_keys : bool, optional - If True, all store keys will be normalized to use lower case characters - (e.g. 'foo' and 'FOO' will be treated as equivalent). This can be - useful to avoid potential discrepancies between case-senstive and - case-insensitive file system. Default value is False. - - Examples - -------- - Store a single array:: - - >>> import zarr - >>> store = zarr.N5FSStore('data/array.n5', auto_mkdir=True) - >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) - >>> z[...] = 42 - - Store a group:: - - >>> store = zarr.N5FSStore('data/group.n5', auto_mkdir=True) - >>> root = zarr.group(store=store, overwrite=True) - >>> foo = root.create_group('foo') - >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) - >>> bar[...] = 42 - - Notes - ----- - This is an experimental feature. - Safe to write in multiple threads or processes. - """ - - def __init__(self, *args, **kwargs): - if 'dimension_separator' in kwargs: - kwargs.pop('dimension_separator') - warnings.warn('Keyword argument `dimension_separator` will be ignored') - dimension_separator = "/" - - if 'meta_keys' in kwargs: - kwargs.pop('meta_keys') - warnings.warn('Keyword argument `meta_keys` will be ignored') - meta_keys = ("attributes.json",) - super().__init__(*args, - dimension_separator=dimension_separator, - meta_keys=meta_keys, - **kwargs) - - def _normalize_key(self, key): - if is_chunk_key(key): - key = invert_chunk_coords(key) - - key = normalize_storage_path(key).lstrip("/") - if key: - *bits, end = key.split("/") - - if end not in self._META_KEYS: - end = end.replace(".", self.key_separator) - key = "/".join(bits + [end]) - return key.lower() if self.normalize_keys else key - - def __getitem__(self, key): - if key.endswith(zarr_group_meta_key): - - key = key.replace(zarr_group_meta_key, self._META_KEYS[0]) - value = group_metadata_to_zarr(self._load_n5_attrs(key)) - - return json_dumps(value) - - elif key.endswith(zarr_array_meta_key): +try: + from .storage import FSStore + + class N5FSStore(FSStore): + """Implentation of the N5 format (https://github.com/saalfeldlab/n5) using `fsspec`, + which allows storage on a variety of filesystems. Based on `zarr.N5Store`. + Parameters + ---------- + path : string + Location of directory to use as the root of the storage hierarchy. + normalize_keys : bool, optional + If True, all store keys will be normalized to use lower case characters + (e.g. 'foo' and 'FOO' will be treated as equivalent). This can be + useful to avoid potential discrepancies between case-senstive and + case-insensitive file system. Default value is False. + + Examples + -------- + Store a single array:: + + >>> import zarr + >>> store = zarr.N5FSStore('data/array.n5', auto_mkdir=True) + >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) + >>> z[...] = 42 + + Store a group:: + + >>> store = zarr.N5FSStore('data/group.n5', auto_mkdir=True) + >>> root = zarr.group(store=store, overwrite=True) + >>> foo = root.create_group('foo') + >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) + >>> bar[...] = 42 + + Notes + ----- + This is an experimental feature. + Safe to write in multiple threads or processes. + """ + array_meta_key = 'attributes.json' + group_meta_key = 'attributes.json' + attrs_key = 'attributes.json' + + def __init__(self, *args, **kwargs): + if 'dimension_separator' in kwargs: + kwargs.pop('dimension_separator') + warnings.warn('Keyword argument `dimension_separator` will be ignored') + dimension_separator = "/" + super().__init__(*args, dimension_separator=dimension_separator, **kwargs) + + def _normalize_key(self, key): + if is_chunk_key(key): + key = invert_chunk_coords(key) + + key = normalize_storage_path(key).lstrip("/") + if key: + *bits, end = key.split("/") + + if end not in (self.array_meta_key, self.group_meta_key, self.attrs_key): + end = end.replace(".", self.key_separator) + key = "/".join(bits + [end]) + return key.lower() if self.normalize_keys else key + + def __getitem__(self, key): + if key.endswith(zarr_group_meta_key): + + key = key.replace(zarr_group_meta_key, self.group_meta_key) + value = group_metadata_to_zarr(self._load_n5_attrs(key)) - key = key.replace(zarr_array_meta_key, self._META_KEYS[0]) - value = array_metadata_to_zarr(self._load_n5_attrs(key)) - - return json_dumps(value) - - elif key.endswith(zarr_attrs_key): - - key = key.replace(zarr_attrs_key, self._META_KEYS[0]) - value = attrs_to_zarr(self._load_n5_attrs(key)) - - if len(value) == 0: - raise KeyError(key) - else: return json_dumps(value) - return super().__getitem__(key) - - def __setitem__(self, key, value): - if key.endswith(zarr_group_meta_key): - - key = key.replace(zarr_group_meta_key, self._META_KEYS[0]) - - n5_attrs = self._load_n5_attrs(key) - n5_attrs.update(**group_metadata_to_n5(json_loads(value))) - - value = json_dumps(n5_attrs) - elif key.endswith(zarr_array_meta_key): - - key = key.replace(zarr_array_meta_key, self._META_KEYS[0]) - - n5_attrs = self._load_n5_attrs(key) - n5_attrs.update(**array_metadata_to_n5(json_loads(value))) + elif key.endswith(zarr_array_meta_key): - value = json_dumps(n5_attrs) + key = key.replace(zarr_array_meta_key, self.array_meta_key) + value = array_metadata_to_zarr(self._load_n5_attrs(key)) - elif key.endswith(zarr_attrs_key): - - key = key.replace(zarr_attrs_key, self._META_KEYS[0]) - - n5_attrs = self._load_n5_attrs(key) - zarr_attrs = json_loads(value) - - for k in n5_keywords: - if k in zarr_attrs.keys(): - raise ValueError( - "Can not set attribute %s, this is a reserved N5 keyword" % k - ) - - # replace previous user attributes - for k in list(n5_attrs.keys()): - if k not in n5_keywords: - del n5_attrs[k] - - # add new user attributes - n5_attrs.update(**zarr_attrs) - - value = json_dumps(n5_attrs) + return json_dumps(value) - super().__setitem__(key, value) + elif key.endswith(zarr_attrs_key): - def __delitem__(self, key): + key = key.replace(zarr_attrs_key, self.attrs_key) + value = attrs_to_zarr(self._load_n5_attrs(key)) - if key.endswith(zarr_group_meta_key): # pragma: no cover - key = key.replace(zarr_group_meta_key, self._META_KEYS[0]) - elif key.endswith(zarr_array_meta_key): # pragma: no cover - key = key.replace(zarr_array_meta_key, self._META_KEYS[0]) - elif key.endswith(zarr_attrs_key): # pragma: no cover - key = key.replace(zarr_attrs_key, self._META_KEYS[0]) - - super().__delitem__(key) + if len(value) == 0: + raise KeyError(key) + else: + return json_dumps(value) + return super().__getitem__(key) - def __contains__(self, key): - if key.endswith(zarr_group_meta_key): + def __setitem__(self, key, value): + if key.endswith(zarr_group_meta_key): - key = key.replace(zarr_group_meta_key, self._META_KEYS[0]) - if key not in self: - return False - # group if not a dataset (attributes do not contain 'dimensions') - return "dimensions" not in self._load_n5_attrs(key) + key = key.replace(zarr_group_meta_key, self.group_meta_key) - elif key.endswith(zarr_array_meta_key): + n5_attrs = self._load_n5_attrs(key) + n5_attrs.update(**group_metadata_to_n5(json_loads(value))) - key = key.replace(zarr_array_meta_key, self._META_KEYS[0]) - # array if attributes contain 'dimensions' - return "dimensions" in self._load_n5_attrs(key) + value = json_dumps(n5_attrs) - elif key.endswith(zarr_attrs_key): + elif key.endswith(zarr_array_meta_key): - key = key.replace(zarr_attrs_key, self._META_KEYS[0]) - return self._contains_attrs(key) + key = key.replace(zarr_array_meta_key, self.array_meta_key) - return super().__contains__(key) + n5_attrs = self._load_n5_attrs(key) + n5_attrs.update(**array_metadata_to_n5(json_loads(value))) - def __eq__(self, other): - return isinstance(other, N5FSStore) and self.path == other.path + value = json_dumps(n5_attrs) - def listdir(self, path=None): + elif key.endswith(zarr_attrs_key): - if path is not None: - path = invert_chunk_coords(path) + key = key.replace(zarr_attrs_key, self.attrs_key) - # We can't use NestedDirectoryStore's listdir, as it requires - # array_meta_key to be present in array directories, which this store - # doesn't provide. - children = super().listdir(path=path) - if self._is_array(path): + n5_attrs = self._load_n5_attrs(key) + zarr_attrs = json_loads(value) - # replace n5 attribute file with respective zarr attribute files - children.remove(self._META_KEYS[0]) - children.append(zarr_array_meta_key) - if self._contains_attrs(path): - children.append(zarr_attrs_key) + for k in n5_keywords: + if k in zarr_attrs.keys(): + raise ValueError( + "Can not set attribute %s, this is a reserved N5 keyword" % k + ) - # special handling of directories containing an array to map - # inverted nested chunk keys back to standard chunk keys - new_children = [] - root_path = self.dir_path(path) - for entry in children: - entry_path = os.path.join(root_path, entry) - if _prog_number.match(entry) and self.fs.isdir(entry_path): - for dir_path, _, file_names in self.fs.walk(entry_path): - for file_name in file_names: - file_path = os.path.join(dir_path, file_name) - rel_path = file_path.split(root_path + os.path.sep)[1] - new_child = rel_path.replace(os.path.sep, ".") - new_children.append(invert_chunk_coords(new_child)) - else: - new_children.append(entry) + # replace previous user attributes + for k in list(n5_attrs.keys()): + if k not in n5_keywords: + del n5_attrs[k] - return sorted(new_children) + # add new user attributes + n5_attrs.update(**zarr_attrs) - elif self._is_group(path): + value = json_dumps(n5_attrs) - # replace n5 attribute file with respective zarr attribute files - children.remove(self._META_KEYS[0]) - children.append(zarr_group_meta_key) - if self._contains_attrs(path): # pragma: no cover - children.append(zarr_attrs_key) + super().__setitem__(key, value) - return sorted(children) + def __delitem__(self, key): - else: + if key.endswith(zarr_group_meta_key): # pragma: no cover + key = key.replace(zarr_group_meta_key, self.group_meta_key) + elif key.endswith(zarr_array_meta_key): # pragma: no cover + key = key.replace(zarr_array_meta_key, self.array_meta_key) + elif key.endswith(zarr_attrs_key): # pragma: no cover + key = key.replace(zarr_attrs_key, self.attrs_key) - return children + super().__delitem__(key) - def _load_n5_attrs(self, path): - try: - s = super().__getitem__(path) - return json_loads(s) - except KeyError: - return {} + def __contains__(self, key): + if key.endswith(zarr_group_meta_key): - def _is_group(self, path): + key = key.replace(zarr_group_meta_key, self.group_meta_key) + if key not in self: + return False + # group if not a dataset (attributes do not contain 'dimensions') + return "dimensions" not in self._load_n5_attrs(key) - if path is None: - attrs_key = self._META_KEYS[0] - else: - attrs_key = os.path.join(path, self._META_KEYS[0]) + elif key.endswith(zarr_array_meta_key): - n5_attrs = self._load_n5_attrs(attrs_key) - return len(n5_attrs) > 0 and "dimensions" not in n5_attrs + key = key.replace(zarr_array_meta_key, self.array_meta_key) + # array if attributes contain 'dimensions' + return "dimensions" in self._load_n5_attrs(key) - def _is_array(self, path): + elif key.endswith(zarr_attrs_key): - if path is None: - attrs_key = self._META_KEYS[0] - else: - attrs_key = os.path.join(path, self._META_KEYS[0]) + key = key.replace(zarr_attrs_key, self.attrs_key) + return self._contains_attrs(key) - return "dimensions" in self._load_n5_attrs(attrs_key) + return super().__contains__(key) - def _contains_attrs(self, path): + def __eq__(self, other): + return isinstance(other, N5FSStore) and self.path == other.path - if path is None: - attrs_key = self._META_KEYS[0] - else: - if not path.endswith(self._META_KEYS[0]): - attrs_key = os.path.join(path, self._META_KEYS[0]) - else: # pragma: no cover - attrs_key = path + def listdir(self, path=None): - attrs = attrs_to_zarr(self._load_n5_attrs(attrs_key)) - return len(attrs) > 0 + if path is not None: + path = invert_chunk_coords(path) + # We can't use NestedDirectoryStore's listdir, as it requires + # array_meta_key to be present in array directories, which this store + # doesn't provide. + children = super().listdir(path=path) + if self._is_array(path): -def is_chunk_key(key): - segments = list(key.split('/')) - if segments: - last_segment = segments[-1] - return _prog_ckey.match(last_segment) - return False # pragma: no cover + # replace n5 attribute file with respective zarr attribute files + children.remove(self.array_meta_key) + children.append(zarr_array_meta_key) + if self._contains_attrs(path): + children.append(zarr_attrs_key) + # special handling of directories containing an array to map + # inverted nested chunk keys back to standard chunk keys + new_children = [] + root_path = self.dir_path(path) + for entry in children: + entry_path = os.path.join(root_path, entry) + if _prog_number.match(entry) and self.fs.isdir(entry_path): + for dir_path, _, file_names in self.fs.walk(entry_path): + for file_name in file_names: + file_path = os.path.join(dir_path, file_name) + rel_path = file_path.split(root_path + os.path.sep)[1] + new_child = rel_path.replace(os.path.sep, ".") + new_children.append(invert_chunk_coords(new_child)) + else: + new_children.append(entry) -def invert_chunk_coords(key): - segments = list(key.split('/')) - if segments: - last_segment = segments[-1] - if _prog_ckey.match(last_segment): - coords = list(last_segment.split('.')) - last_segment = '.'.join(coords[::-1]) - segments = segments[:-1] + [last_segment] - key = '/'.join(segments) - return key + return sorted(new_children) + elif self._is_group(path): -def group_metadata_to_n5(group_metadata): - '''Convert group metadata from zarr to N5 format.''' - del group_metadata['zarr_format'] - # TODO: This should only exist at the top-level - group_metadata['n5'] = '2.0.0' - return group_metadata + # replace n5 attribute file with respective zarr attribute files + children.remove(self.group_meta_key) + children.append(zarr_group_meta_key) + if self._contains_attrs(path): # pragma: no cover + children.append(zarr_attrs_key) + return sorted(children) -def group_metadata_to_zarr(group_metadata): - '''Convert group metadata from N5 to zarr format.''' - # This only exists at the top level - group_metadata.pop('n5', None) - group_metadata['zarr_format'] = ZARR_FORMAT - return group_metadata + else: + return children -def array_metadata_to_n5(array_metadata): - '''Convert array metadata from zarr to N5 format.''' + def _load_n5_attrs(self, path): + try: + s = super().__getitem__(path) + return json_loads(s) + except KeyError: + return {} - for f, t in zarr_to_n5_keys: - array_metadata[t] = array_metadata[f] - del array_metadata[f] - del array_metadata['zarr_format'] + def _is_group(self, path): - try: - dtype = np.dtype(array_metadata['dataType']) - except TypeError: # pragma: no cover - raise TypeError( - "data type %s not supported by N5" % array_metadata['dataType']) + if path is None: + attrs_key = self.attrs_key + else: + attrs_key = os.path.join(path, self.attrs_key) - array_metadata['dataType'] = dtype.name - array_metadata['dimensions'] = array_metadata['dimensions'][::-1] - array_metadata['blockSize'] = array_metadata['blockSize'][::-1] + n5_attrs = self._load_n5_attrs(attrs_key) + return len(n5_attrs) > 0 and "dimensions" not in n5_attrs - if 'fill_value' in array_metadata: - if array_metadata['fill_value'] != 0 and array_metadata['fill_value'] is not None: - raise ValueError("N5 only supports fill_value == 0 (for now)") - del array_metadata['fill_value'] + def _is_array(self, path): - if 'order' in array_metadata: - if array_metadata['order'] != 'C': - raise ValueError("zarr N5 storage only stores arrays in C order (for now)") - del array_metadata['order'] + if path is None: + attrs_key = self.attrs_key + else: + attrs_key = os.path.join(path, self.attrs_key) - if 'filters' in array_metadata: - if array_metadata['filters'] != [] and array_metadata['filters'] is not None: - raise ValueError("N5 storage does not support zarr filters") - del array_metadata['filters'] + return "dimensions" in self._load_n5_attrs(attrs_key) - assert 'compression' in array_metadata - compressor_config = array_metadata['compression'] - compressor_config = compressor_config_to_n5(compressor_config) - array_metadata['compression'] = compressor_config + def _contains_attrs(self, path): - if 'dimension_separator' in array_metadata: - del array_metadata['dimension_separator'] + if path is None: + attrs_key = self.attrs_key + else: + if not path.endswith(self.attrs_key): + attrs_key = os.path.join(path, self.attrs_key) + else: # pragma: no cover + attrs_key = path + + attrs = attrs_to_zarr(self._load_n5_attrs(attrs_key)) + return len(attrs) > 0 + + def is_chunk_key(key): + segments = list(key.split('/')) + if segments: + last_segment = segments[-1] + return _prog_ckey.match(last_segment) + return False # pragma: no cover + + def invert_chunk_coords(key): + segments = list(key.split('/')) + if segments: + last_segment = segments[-1] + if _prog_ckey.match(last_segment): + coords = list(last_segment.split('.')) + last_segment = '.'.join(coords[::-1]) + segments = segments[:-1] + [last_segment] + key = '/'.join(segments) + return key + + def group_metadata_to_n5(group_metadata): + '''Convert group metadata from zarr to N5 format.''' + del group_metadata['zarr_format'] + # TODO: This should only exist at the top-level + group_metadata['n5'] = '2.0.0' + return group_metadata + + def group_metadata_to_zarr(group_metadata): + '''Convert group metadata from N5 to zarr format.''' + # This only exists at the top level + group_metadata.pop('n5', None) + group_metadata['zarr_format'] = ZARR_FORMAT + return group_metadata + + def array_metadata_to_n5(array_metadata): + '''Convert array metadata from zarr to N5 format.''' + + for f, t in zarr_to_n5_keys: + array_metadata[t] = array_metadata[f] + del array_metadata[f] + del array_metadata['zarr_format'] - return array_metadata + try: + dtype = np.dtype(array_metadata['dataType']) + except TypeError: # pragma: no cover + raise TypeError( + "data type %s not supported by N5" % array_metadata['dataType']) + + array_metadata['dataType'] = dtype.name + array_metadata['dimensions'] = array_metadata['dimensions'][::-1] + array_metadata['blockSize'] = array_metadata['blockSize'][::-1] + + if 'fill_value' in array_metadata: + if array_metadata['fill_value'] != 0 and array_metadata['fill_value'] is not None: + raise ValueError("N5 only supports fill_value == 0 (for now)") + del array_metadata['fill_value'] + + if 'order' in array_metadata: + if array_metadata['order'] != 'C': + raise ValueError("zarr N5 storage only stores arrays in C order (for now)") + del array_metadata['order'] + + if 'filters' in array_metadata: + if array_metadata['filters'] != [] and array_metadata['filters'] is not None: + raise ValueError("N5 storage does not support zarr filters") + del array_metadata['filters'] + + assert 'compression' in array_metadata + compressor_config = array_metadata['compression'] + compressor_config = compressor_config_to_n5(compressor_config) + array_metadata['compression'] = compressor_config + + if 'dimension_separator' in array_metadata: + del array_metadata['dimension_separator'] + + return array_metadata +except ImportError: + pass def array_metadata_to_zarr(array_metadata): diff --git a/zarr/storage.py b/zarr/storage.py index f0f5ea3392..2142b459d5 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1036,28 +1036,32 @@ class FSStore(MutableMapping): exceptions : list of Exception subclasses When accessing data, any of these exceptions will be treated as a missing key - meta_keys : list or tuple of str - Reserved keys for metadata. - Defaults to the zarr metatadata keys, i.e. (".zarray", ".zgroup", ".zattrs"). dimension_separator : {'.', '/'}, optional Separator placed between the dimensions of a chunk. storage_options : passed to the fsspec implementation """ + array_meta_key = array_meta_key + group_meta_key = group_meta_key + attrs_key = attrs_key + def __init__(self, url, normalize_keys=True, key_separator=None, mode='w', exceptions=(KeyError, PermissionError, IOError), - meta_keys=(array_meta_key, group_meta_key, attrs_key), dimension_separator=None, **storage_options): import fsspec self.normalize_keys = normalize_keys + + protocol, _ = fsspec.core.split_protocol(url) + # set auto_mkdir to True for local file system + if protocol in (None, "file") and not storage_options.get("auto_mkdir"): + storage_options["auto_mkdir"] = True + self.map = fsspec.get_mapper(url, **storage_options) self.fs = self.map.fs # for direct operations self.path = self.fs._strip_protocol(url) self.mode = mode self.exceptions = exceptions - self._META_KEYS = meta_keys - # For backwards compatibility. Guaranteed to be non-None if key_separator is not None: dimension_separator = key_separator @@ -1076,7 +1080,7 @@ def _normalize_key(self, key): if key: *bits, end = key.split('/') - if end not in self._META_KEYS: + if end not in (self.array_meta_key, self.group_meta_key, self.attrs_key): end = end.replace('.', self.key_separator) key = '/'.join(bits + [end]) @@ -1154,7 +1158,7 @@ def listdir(self, path=None): if self.key_separator != "/": return children else: - if array_meta_key in children: + if self.array_meta_key in children: # special handling of directories containing an array to map nested chunk # keys back to standard chunk keys new_children = [] diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index b1346d760e..d329f4e58f 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -18,7 +18,7 @@ from zarr.core import Array from zarr.meta import json_loads -from zarr.n5 import N5Store, n5_keywords +from zarr.n5 import N5Store, N5FSStore, n5_keywords from zarr.storage import ( ABSStore, DBMStore, @@ -1963,6 +1963,22 @@ def test_hexdigest(self): assert self.expected() == found +@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") +class TestArrayWithN5FSStore(TestArrayWithN5Store): + + @staticmethod + def create_array(read_only=False, **kwargs): + path = mkdtemp() + atexit.register(shutil.rmtree, path) + store = N5FSStore(path) + cache_metadata = kwargs.pop('cache_metadata', True) + cache_attrs = kwargs.pop('cache_attrs', True) + kwargs.setdefault('compressor', Zlib(1)) + init_array(store, **kwargs) + return Array(store, read_only=read_only, cache_metadata=cache_metadata, + cache_attrs=cache_attrs) + + class TestArrayWithDBMStore(TestArray): @staticmethod diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 4730393782..9e2f9baf1b 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1183,7 +1183,7 @@ def test_value_error(self): class TestN5Store(TestNestedDirectoryStore): def create_store(self, normalize_keys=False): - path = tempfile.mkdtemp(suffix='.n5') + path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) store = N5Store(path, normalize_keys=normalize_keys) return store @@ -1296,7 +1296,7 @@ def test_filters(self): @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestN5FSStore(TestFSStore): def create_store(self, normalize_keys=False): - path = tempfile.mkdtemp(suffix='.n5') + path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) store = N5FSStore(path, normalize_keys=normalize_keys) return store