|
11 | 11 | from numcodecs.registry import get_codec, register_codec
|
12 | 12 |
|
13 | 13 | from .meta import ZARR_FORMAT, json_dumps, json_loads
|
14 |
| -from .storage import NestedDirectoryStore, _prog_ckey, _prog_number |
| 14 | +from .storage import FSStore |
| 15 | +from .storage import NestedDirectoryStore, _prog_ckey, _prog_number, normalize_storage_path |
15 | 16 | from .storage import array_meta_key as zarr_array_meta_key
|
16 | 17 | from .storage import attrs_key as zarr_attrs_key
|
17 | 18 | from .storage import group_meta_key as zarr_group_meta_key
|
@@ -281,12 +282,298 @@ def _contains_attrs(self, path):
|
281 | 282 | return len(attrs) > 0
|
282 | 283 |
|
283 | 284 |
|
| 285 | +class N5FSStore(FSStore): |
| 286 | + """Implentation of the N5 format (https://github.com/saalfeldlab/n5) using `fsspec`, |
| 287 | + which allows storage on a variety of filesystems. Based on `zarr.N5Store`. |
| 288 | + Parameters |
| 289 | + ---------- |
| 290 | + path : string |
| 291 | + Location of directory to use as the root of the storage hierarchy. |
| 292 | + normalize_keys : bool, optional |
| 293 | + If True, all store keys will be normalized to use lower case characters |
| 294 | + (e.g. 'foo' and 'FOO' will be treated as equivalent). This can be |
| 295 | + useful to avoid potential discrepancies between case-senstive and |
| 296 | + case-insensitive file system. Default value is False. |
| 297 | +
|
| 298 | + Examples |
| 299 | + -------- |
| 300 | + Store a single array:: |
| 301 | +
|
| 302 | + >>> import zarr |
| 303 | + >>> store = zarr.N5FSStore('data/array.n5', auto_mkdir=True) |
| 304 | + >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) |
| 305 | + >>> z[...] = 42 |
| 306 | +
|
| 307 | + Store a group:: |
| 308 | +
|
| 309 | + >>> store = zarr.N5FSStore('data/group.n5', auto_mkdir=True) |
| 310 | + >>> root = zarr.group(store=store, overwrite=True) |
| 311 | + >>> foo = root.create_group('foo') |
| 312 | + >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) |
| 313 | + >>> bar[...] = 42 |
| 314 | +
|
| 315 | + Notes |
| 316 | + ----- |
| 317 | + This is an experimental feature. |
| 318 | + Safe to write in multiple threads or processes. |
| 319 | +
|
| 320 | + Be advised that the `_dimension_separator` property of this store |
| 321 | + (and arrays it creates) is ".", but chunks saved by this store will |
| 322 | + in fact be "/" separated, as proscribed by the N5 format. |
| 323 | +
|
| 324 | + This is counter-intuitive (to say the least), but not arbitrary. |
| 325 | + Chunks in N5 format are stored with reversed dimension order |
| 326 | + relative to Zarr chunks: a chunk of a 3D Zarr array would be stored |
| 327 | + on a file system as `/0/1/2`, but in N5 the same chunk would be |
| 328 | + stored as `/2/1/0`. Therefore, stores targeting N5 must intercept |
| 329 | + chunk keys and flip the order of the dimensions before writing to |
| 330 | + storage, and this procedure requires chunk keys with "." separated |
| 331 | + dimensions, hence the Zarr arrays targeting N5 have the deceptive |
| 332 | + "." dimension separator. |
| 333 | + """ |
| 334 | + _array_meta_key = 'attributes.json' |
| 335 | + _group_meta_key = 'attributes.json' |
| 336 | + _attrs_key = 'attributes.json' |
| 337 | + |
| 338 | + def __init__(self, *args, **kwargs): |
| 339 | + if 'dimension_separator' in kwargs: |
| 340 | + kwargs.pop('dimension_separator') |
| 341 | + warnings.warn('Keyword argument `dimension_separator` will be ignored') |
| 342 | + dimension_separator = "." |
| 343 | + super().__init__(*args, dimension_separator=dimension_separator, **kwargs) |
| 344 | + |
| 345 | + def _swap_separator(self, key): |
| 346 | + segments = list(key.split('/')) |
| 347 | + if segments: |
| 348 | + last_segment = segments[-1] |
| 349 | + if _prog_ckey.match(last_segment): |
| 350 | + coords = list(last_segment.split('.')) |
| 351 | + last_segment = '/'.join(coords[::-1]) |
| 352 | + segments = segments[:-1] + [last_segment] |
| 353 | + key = '/'.join(segments) |
| 354 | + return key |
| 355 | + |
| 356 | + def _normalize_key(self, key): |
| 357 | + if is_chunk_key(key): |
| 358 | + key = invert_chunk_coords(key) |
| 359 | + |
| 360 | + key = normalize_storage_path(key).lstrip("/") |
| 361 | + if key: |
| 362 | + *bits, end = key.split("/") |
| 363 | + |
| 364 | + if end not in (self._array_meta_key, self._group_meta_key, self._attrs_key): |
| 365 | + end = end.replace(".", "/") |
| 366 | + key = "/".join(bits + [end]) |
| 367 | + return key.lower() if self.normalize_keys else key |
| 368 | + |
| 369 | + def __getitem__(self, key): |
| 370 | + if key.endswith(zarr_group_meta_key): |
| 371 | + |
| 372 | + key = key.replace(zarr_group_meta_key, self._group_meta_key) |
| 373 | + value = group_metadata_to_zarr(self._load_n5_attrs(key)) |
| 374 | + |
| 375 | + return json_dumps(value) |
| 376 | + |
| 377 | + elif key.endswith(zarr_array_meta_key): |
| 378 | + |
| 379 | + key = key.replace(zarr_array_meta_key, self._array_meta_key) |
| 380 | + value = array_metadata_to_zarr(self._load_n5_attrs(key)) |
| 381 | + |
| 382 | + return json_dumps(value) |
| 383 | + |
| 384 | + elif key.endswith(zarr_attrs_key): |
| 385 | + |
| 386 | + key = key.replace(zarr_attrs_key, self._attrs_key) |
| 387 | + value = attrs_to_zarr(self._load_n5_attrs(key)) |
| 388 | + |
| 389 | + if len(value) == 0: |
| 390 | + raise KeyError(key) |
| 391 | + else: |
| 392 | + return json_dumps(value) |
| 393 | + |
| 394 | + elif is_chunk_key(key): |
| 395 | + key = self._swap_separator(key) |
| 396 | + |
| 397 | + return super().__getitem__(key) |
| 398 | + |
| 399 | + def __setitem__(self, key, value): |
| 400 | + if key.endswith(zarr_group_meta_key): |
| 401 | + |
| 402 | + key = key.replace(zarr_group_meta_key, self._group_meta_key) |
| 403 | + |
| 404 | + n5_attrs = self._load_n5_attrs(key) |
| 405 | + n5_attrs.update(**group_metadata_to_n5(json_loads(value))) |
| 406 | + |
| 407 | + value = json_dumps(n5_attrs) |
| 408 | + |
| 409 | + elif key.endswith(zarr_array_meta_key): |
| 410 | + |
| 411 | + key = key.replace(zarr_array_meta_key, self._array_meta_key) |
| 412 | + |
| 413 | + n5_attrs = self._load_n5_attrs(key) |
| 414 | + n5_attrs.update(**array_metadata_to_n5(json_loads(value))) |
| 415 | + |
| 416 | + value = json_dumps(n5_attrs) |
| 417 | + |
| 418 | + elif key.endswith(zarr_attrs_key): |
| 419 | + |
| 420 | + key = key.replace(zarr_attrs_key, self._attrs_key) |
| 421 | + |
| 422 | + n5_attrs = self._load_n5_attrs(key) |
| 423 | + zarr_attrs = json_loads(value) |
| 424 | + |
| 425 | + for k in n5_keywords: |
| 426 | + if k in zarr_attrs.keys(): |
| 427 | + raise ValueError( |
| 428 | + "Can not set attribute %s, this is a reserved N5 keyword" % k |
| 429 | + ) |
| 430 | + |
| 431 | + # replace previous user attributes |
| 432 | + for k in list(n5_attrs.keys()): |
| 433 | + if k not in n5_keywords: |
| 434 | + del n5_attrs[k] |
| 435 | + |
| 436 | + # add new user attributes |
| 437 | + n5_attrs.update(**zarr_attrs) |
| 438 | + |
| 439 | + value = json_dumps(n5_attrs) |
| 440 | + |
| 441 | + elif is_chunk_key(key): |
| 442 | + key = self._swap_separator(key) |
| 443 | + |
| 444 | + super().__setitem__(key, value) |
| 445 | + |
| 446 | + def __delitem__(self, key): |
| 447 | + |
| 448 | + if key.endswith(zarr_group_meta_key): # pragma: no cover |
| 449 | + key = key.replace(zarr_group_meta_key, self._group_meta_key) |
| 450 | + elif key.endswith(zarr_array_meta_key): # pragma: no cover |
| 451 | + key = key.replace(zarr_array_meta_key, self._array_meta_key) |
| 452 | + elif key.endswith(zarr_attrs_key): # pragma: no cover |
| 453 | + key = key.replace(zarr_attrs_key, self._attrs_key) |
| 454 | + elif is_chunk_key(key): |
| 455 | + key = self._swap_separator(key) |
| 456 | + |
| 457 | + super().__delitem__(key) |
| 458 | + |
| 459 | + def __contains__(self, key): |
| 460 | + if key.endswith(zarr_group_meta_key): |
| 461 | + |
| 462 | + key = key.replace(zarr_group_meta_key, self._group_meta_key) |
| 463 | + if key not in self: |
| 464 | + return False |
| 465 | + # group if not a dataset (attributes do not contain 'dimensions') |
| 466 | + return "dimensions" not in self._load_n5_attrs(key) |
| 467 | + |
| 468 | + elif key.endswith(zarr_array_meta_key): |
| 469 | + |
| 470 | + key = key.replace(zarr_array_meta_key, self._array_meta_key) |
| 471 | + # array if attributes contain 'dimensions' |
| 472 | + return "dimensions" in self._load_n5_attrs(key) |
| 473 | + |
| 474 | + elif key.endswith(zarr_attrs_key): |
| 475 | + |
| 476 | + key = key.replace(zarr_attrs_key, self._attrs_key) |
| 477 | + return self._contains_attrs(key) |
| 478 | + |
| 479 | + elif is_chunk_key(key): |
| 480 | + key = self._swap_separator(key) |
| 481 | + |
| 482 | + return super().__contains__(key) |
| 483 | + |
| 484 | + def __eq__(self, other): |
| 485 | + return isinstance(other, N5FSStore) and self.path == other.path |
| 486 | + |
| 487 | + def listdir(self, path=None): |
| 488 | + if path is not None: |
| 489 | + path = invert_chunk_coords(path) |
| 490 | + |
| 491 | + # We can't use NestedDirectoryStore's listdir, as it requires |
| 492 | + # array_meta_key to be present in array directories, which this store |
| 493 | + # doesn't provide. |
| 494 | + children = super().listdir(path=path) |
| 495 | + if self._is_array(path): |
| 496 | + |
| 497 | + # replace n5 attribute file with respective zarr attribute files |
| 498 | + children.remove(self._array_meta_key) |
| 499 | + children.append(zarr_array_meta_key) |
| 500 | + if self._contains_attrs(path): |
| 501 | + children.append(zarr_attrs_key) |
| 502 | + |
| 503 | + # special handling of directories containing an array to map |
| 504 | + # inverted nested chunk keys back to standard chunk keys |
| 505 | + new_children = [] |
| 506 | + root_path = self.dir_path(path) |
| 507 | + for entry in children: |
| 508 | + entry_path = os.path.join(root_path, entry) |
| 509 | + if _prog_number.match(entry) and self.fs.isdir(entry_path): |
| 510 | + for file_name in self.fs.find(entry_path): |
| 511 | + file_path = os.path.join(root_path, file_name) |
| 512 | + rel_path = file_path.split(root_path)[1] |
| 513 | + new_child = rel_path.lstrip('/').replace('/', ".") |
| 514 | + new_children.append(invert_chunk_coords(new_child)) |
| 515 | + else: |
| 516 | + new_children.append(entry) |
| 517 | + return sorted(new_children) |
| 518 | + |
| 519 | + elif self._is_group(path): |
| 520 | + |
| 521 | + # replace n5 attribute file with respective zarr attribute files |
| 522 | + children.remove(self._group_meta_key) |
| 523 | + children.append(zarr_group_meta_key) |
| 524 | + if self._contains_attrs(path): # pragma: no cover |
| 525 | + children.append(zarr_attrs_key) |
| 526 | + return sorted(children) |
| 527 | + else: |
| 528 | + return children |
| 529 | + |
| 530 | + def _load_n5_attrs(self, path): |
| 531 | + try: |
| 532 | + s = super().__getitem__(path) |
| 533 | + return json_loads(s) |
| 534 | + except KeyError: |
| 535 | + return {} |
| 536 | + |
| 537 | + def _is_group(self, path): |
| 538 | + |
| 539 | + if path is None: |
| 540 | + attrs_key = self._attrs_key |
| 541 | + else: |
| 542 | + attrs_key = os.path.join(path, self._attrs_key) |
| 543 | + |
| 544 | + n5_attrs = self._load_n5_attrs(attrs_key) |
| 545 | + return len(n5_attrs) > 0 and "dimensions" not in n5_attrs |
| 546 | + |
| 547 | + def _is_array(self, path): |
| 548 | + |
| 549 | + if path is None: |
| 550 | + attrs_key = self._attrs_key |
| 551 | + else: |
| 552 | + attrs_key = os.path.join(path, self._attrs_key) |
| 553 | + |
| 554 | + return "dimensions" in self._load_n5_attrs(attrs_key) |
| 555 | + |
| 556 | + def _contains_attrs(self, path): |
| 557 | + |
| 558 | + if path is None: |
| 559 | + attrs_key = self._attrs_key |
| 560 | + else: |
| 561 | + if not path.endswith(self._attrs_key): |
| 562 | + attrs_key = os.path.join(path, self._attrs_key) |
| 563 | + else: # pragma: no cover |
| 564 | + attrs_key = path |
| 565 | + |
| 566 | + attrs = attrs_to_zarr(self._load_n5_attrs(attrs_key)) |
| 567 | + return len(attrs) > 0 |
| 568 | + |
| 569 | + |
284 | 570 | def is_chunk_key(key):
|
| 571 | + rv = False |
285 | 572 | segments = list(key.split('/'))
|
286 | 573 | if segments:
|
287 | 574 | last_segment = segments[-1]
|
288 |
| - return _prog_ckey.match(last_segment) |
289 |
| - return False # pragma: no cover |
| 575 | + rv = _prog_ckey.match(last_segment) |
| 576 | + return rv |
290 | 577 |
|
291 | 578 |
|
292 | 579 | def invert_chunk_coords(key):
|
@@ -373,6 +660,7 @@ def array_metadata_to_zarr(array_metadata):
|
373 | 660 | array_metadata['fill_value'] = 0 # also if None was requested
|
374 | 661 | array_metadata['order'] = 'C'
|
375 | 662 | array_metadata['filters'] = []
|
| 663 | + array_metadata['dimension_separator'] = '.' |
376 | 664 |
|
377 | 665 | compressor_config = array_metadata['compressor']
|
378 | 666 | compressor_config = compressor_config_to_zarr(compressor_config)
|
|
0 commit comments