From 6ad2ca61b78fd1ecfc10d7fc80ae5055ed1a9d8b Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 23 Jun 2024 18:36:49 +0200 Subject: [PATCH 1/9] use tmpdir for test --- tests/v2/test_storage.py | 61 ++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/tests/v2/test_storage.py b/tests/v2/test_storage.py index 88e99e91a1..0f010bb443 100644 --- a/tests/v2/test_storage.py +++ b/tests/v2/test_storage.py @@ -2227,38 +2227,38 @@ def test_format_compatibility(): np.random.seed(42) arrays_chunks = [ - (np.arange(1111, dtype=" Date: Wed, 3 Jul 2024 11:15:48 +0200 Subject: [PATCH 2/9] type annotations --- tests/v3/test_codecs.py | 89 +++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 44 deletions(-) diff --git a/tests/v3/test_codecs.py b/tests/v3/test_codecs.py index 7cb0d0f804..38eb4354a9 100644 --- a/tests/v3/test_codecs.py +++ b/tests/v3/test_codecs.py @@ -21,6 +21,7 @@ TransposeCodec, ZstdCodec, ) +from zarr.common import MemoryOrder from zarr.config import config from zarr.indexing import Selection, morton_order_iter from zarr.store import MemoryStore, StorePath @@ -43,7 +44,7 @@ class _AsyncArraySelectionProxy: async def get(self) -> np.ndarray: return await self.array.getitem(self.selection) - async def set(self, value: np.ndarray): + async def set(self, value: np.ndarray) -> None: return await self.array.setitem(self.selection, value) @@ -57,7 +58,7 @@ def sample_data() -> np.ndarray: return np.arange(0, 128 * 128 * 128, dtype="uint16").reshape((128, 128, 128), order="F") -def order_from_dim(order: Literal["F", "C"], ndim: int) -> tuple[int, ...]: +def order_from_dim(order: MemoryOrder, ndim: int) -> tuple[int, ...]: if order == "F": return tuple(ndim - x - 1 for x in range(ndim)) else: @@ -67,7 +68,7 @@ def order_from_dim(order: Literal["F", "C"], ndim: int) -> tuple[int, ...]: @pytest.mark.parametrize("index_location", ["start", "end"]) def test_sharding( store: Store, sample_data: np.ndarray, index_location: ShardingCodecIndexLocation -): +) -> None: a = Array.create( store / "sample", shape=sample_data.shape, @@ -97,7 +98,7 @@ def test_sharding( @pytest.mark.parametrize("index_location", ["start", "end"]) def test_sharding_partial( store: Store, sample_data: np.ndarray, index_location: ShardingCodecIndexLocation -): +) -> None: a = Array.create( store / "sample", shape=tuple(a + 10 for a in sample_data.shape), @@ -130,7 +131,7 @@ def test_sharding_partial( @pytest.mark.parametrize("index_location", ["start", "end"]) def test_sharding_partial_read( store: Store, sample_data: np.ndarray, index_location: ShardingCodecIndexLocation -): +) -> None: a = Array.create( store / "sample", shape=tuple(a + 10 for a in sample_data.shape), @@ -157,7 +158,7 @@ def test_sharding_partial_read( @pytest.mark.parametrize("index_location", ["start", "end"]) def test_sharding_partial_overwrite( store: Store, sample_data: np.ndarray, index_location: ShardingCodecIndexLocation -): +) -> None: data = sample_data[:10, :10, :10] a = Array.create( @@ -203,7 +204,7 @@ def test_nested_sharding( sample_data: np.ndarray, outer_index_location: ShardingCodecIndexLocation, inner_index_location: ShardingCodecIndexLocation, -): +) -> None: a = Array.create( store / "l4_sample" / "color" / "1", shape=sample_data.shape, @@ -235,12 +236,12 @@ def test_nested_sharding( @pytest.mark.parametrize("with_sharding", [True, False]) async def test_order( store: Store, - input_order: Literal["F", "C"], - store_order: Literal["F", "C"], - runtime_write_order: Literal["F", "C"], - runtime_read_order: Literal["F", "C"], + input_order: MemoryOrder, + store_order: MemoryOrder, + runtime_write_order: MemoryOrder, + runtime_read_order: MemoryOrder, with_sharding: bool, -): +) -> None: data = np.arange(0, 256, dtype="uint16").reshape((32, 8), order=input_order) codecs_: list[Codec] = ( @@ -303,11 +304,11 @@ async def test_order( @pytest.mark.parametrize("with_sharding", [True, False]) def test_order_implicit( store: Store, - input_order: Literal["F", "C"], - runtime_write_order: Literal["F", "C"], - runtime_read_order: Literal["F", "C"], + input_order: MemoryOrder, + runtime_write_order: MemoryOrder, + runtime_read_order: MemoryOrder, with_sharding: bool, -): +) -> None: data = np.arange(0, 256, dtype="uint16").reshape((16, 16), order=input_order) codecs_: list[Codec] | None = [ShardingCodec(chunk_shape=(8, 8))] if with_sharding else None @@ -345,11 +346,11 @@ def test_order_implicit( @pytest.mark.parametrize("with_sharding", [True, False]) async def test_transpose( store: Store, - input_order: Literal["F", "C"], - runtime_write_order: Literal["F", "C"], - runtime_read_order: Literal["F", "C"], + input_order: MemoryOrder, + runtime_write_order: MemoryOrder, + runtime_read_order: MemoryOrder, with_sharding: bool, -): +) -> None: data = np.arange(0, 256, dtype="uint16").reshape((1, 32, 8), order=input_order) codecs_: list[Codec] = ( @@ -406,7 +407,7 @@ async def test_transpose( @pytest.mark.parametrize("order", [[1, 2, 0], [1, 2, 3, 0], [3, 2, 4, 0, 1]]) -def test_transpose_non_self_inverse(store: Store, order): +def test_transpose_non_self_inverse(store: Store, order: list[int]) -> None: shape = [i + 3 for i in range(len(order))] data = np.arange(0, np.prod(shape), dtype="uint16").reshape(shape) a = Array.create( @@ -424,7 +425,7 @@ def test_transpose_non_self_inverse(store: Store, order): def test_transpose_invalid( store: Store, -): +) -> None: data = np.arange(0, 256, dtype="uint16").reshape((1, 32, 8)) for order in [(1, 0), (3, 2, 1), (3, 3, 1)]: @@ -440,7 +441,7 @@ def test_transpose_invalid( ) -def test_open(store: Store): +def test_open(store: Store) -> None: a = Array.create( store / "open", shape=(16, 16), @@ -452,7 +453,7 @@ def test_open(store: Store): assert a.metadata == b.metadata -def test_open_sharding(store: Store): +def test_open_sharding(store: Store) -> None: a = Array.create( store / "open_sharding", shape=(16, 16), @@ -474,7 +475,7 @@ def test_open_sharding(store: Store): assert a.metadata == b.metadata -def test_simple(store: Store): +def test_simple(store: Store) -> None: data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) a = Array.create( @@ -489,7 +490,7 @@ def test_simple(store: Store): assert np.array_equal(data, a[:, :]) -def test_fill_value(store: Store): +def test_fill_value(store: Store) -> None: data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) a = Array.create( @@ -524,7 +525,7 @@ def test_fill_value(store: Store): assert np.array_equiv(4, c[:, :]) -def test_morton(store: Store): +def test_morton(store: Store) -> None: assert list(morton_order_iter((2, 2))) == [(0, 0), (1, 0), (0, 1), (1, 1)] assert list(morton_order_iter((2, 2, 2))) == [ (0, 0, 0), @@ -556,7 +557,7 @@ def test_morton(store: Store): ] -def test_write_partial_chunks(store: Store): +def test_write_partial_chunks(store: Store) -> None: data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) a = Array.create( @@ -570,7 +571,7 @@ def test_write_partial_chunks(store: Store): assert np.array_equal(a[0:16, 0:16], data) -def test_write_full_chunks(store: Store): +def test_write_full_chunks(store: Store) -> None: data = np.arange(0, 16 * 16, dtype="uint16").reshape((16, 16)) a = Array.create( @@ -593,7 +594,7 @@ def test_write_full_chunks(store: Store): assert np.all(a[16:20, 16:20] == 1) -def test_write_partial_sharded_chunks(store: Store): +def test_write_partial_sharded_chunks(store: Store) -> None: data = np.arange(0, 16 * 16, dtype="uint16").reshape((16, 16)) a = Array.create( @@ -616,7 +617,7 @@ def test_write_partial_sharded_chunks(store: Store): assert np.array_equal(a[0:16, 0:16], data) -async def test_delete_empty_chunks(store: Store): +async def test_delete_empty_chunks(store: Store) -> None: data = np.ones((16, 16)) a = await AsyncArray.create( @@ -632,7 +633,7 @@ async def test_delete_empty_chunks(store: Store): assert await (store / "delete_empty_chunks/c0/0").get() is None -async def test_delete_empty_shards(store: Store): +async def test_delete_empty_shards(store: Store) -> None: a = await AsyncArray.create( store / "delete_empty_shards", shape=(16, 16), @@ -657,7 +658,7 @@ async def test_delete_empty_shards(store: Store): assert chunk_bytes is not None and len(chunk_bytes) == 16 * 2 + 8 * 8 * 2 + 4 -async def test_zarr_compat(store: Store): +async def test_zarr_compat(store: Store) -> None: data = np.zeros((16, 18), dtype="uint16") a = await AsyncArray.create( @@ -688,7 +689,7 @@ async def test_zarr_compat(store: Store): assert_bytes_equal(z2._store["1.1"], await (store / "zarr_compat3/1.1").get()) -async def test_zarr_compat_F(store: Store): +async def test_zarr_compat_F(store: Store) -> None: data = np.zeros((16, 18), dtype="uint16", order="F") a = await AsyncArray.create( @@ -721,7 +722,7 @@ async def test_zarr_compat_F(store: Store): assert_bytes_equal(z2._store["1.1"], await (store / "zarr_compatF3/1.1").get()) -async def test_dimension_names(store: Store): +async def test_dimension_names(store: Store) -> None: data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) await AsyncArray.create( @@ -752,7 +753,7 @@ async def test_dimension_names(store: Store): assert "dimension_names" not in json.loads(zarr_json_buffer.to_bytes()) -def test_gzip(store: Store): +def test_gzip(store: Store) -> None: data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) a = Array.create( @@ -769,7 +770,7 @@ def test_gzip(store: Store): @pytest.mark.parametrize("checksum", [True, False]) -def test_zstd(store: Store, checksum: bool): +def test_zstd(store: Store, checksum: bool) -> None: data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) a = Array.create( @@ -786,7 +787,7 @@ def test_zstd(store: Store, checksum: bool): @pytest.mark.parametrize("endian", ["big", "little"]) -async def test_endian(store: Store, endian: Literal["big", "little"]): +async def test_endian(store: Store, endian: Literal["big", "little"]) -> None: data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) a = await AsyncArray.create( @@ -821,7 +822,7 @@ async def test_endian_write( store: Store, dtype_input_endian: Literal[">u2", " None: data = np.arange(0, 256, dtype=dtype_input_endian).reshape((16, 16)) a = await AsyncArray.create( @@ -850,7 +851,7 @@ async def test_endian_write( assert_bytes_equal(await (store / "endian/0.0").get(), z._store["0.0"]) -def test_invalid_metadata(store: Store): +def test_invalid_metadata(store: Store) -> None: with pytest.raises(ValueError): Array.create( store / "invalid_chunk_shape", @@ -935,7 +936,7 @@ def test_invalid_metadata(store: Store): ) -async def test_resize(store: Store): +async def test_resize(store: Store) -> None: data = np.zeros((16, 18), dtype="uint16") a = await AsyncArray.create( @@ -961,7 +962,7 @@ async def test_resize(store: Store): assert await (store / "resize" / "1.1").get() is None -async def test_blosc_evolve(store: Store): +async def test_blosc_evolve(store: Store) -> None: await AsyncArray.create( store / "blosc_evolve_u1", shape=(16, 16), @@ -1005,7 +1006,7 @@ async def test_blosc_evolve(store: Store): assert blosc_configuration_json["shuffle"] == "shuffle" -def test_exists_ok(store: Store): +def test_exists_ok(store: Store) -> None: Array.create( store / "exists_ok", shape=(16, 16), @@ -1028,7 +1029,7 @@ def test_exists_ok(store: Store): ) -def test_update_attributes_array(store: Store): +def test_update_attributes_array(store: Store) -> None: data = np.zeros((16, 18), dtype="uint16") a = Array.create( From c4e49a6c6bb7eee639f2a59a8837f25e6a2f53d7 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 3 Jul 2024 15:01:34 +0200 Subject: [PATCH 3/9] refactor morton decode and remove destructuring in call to max --- src/zarr/indexing.py | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/src/zarr/indexing.py b/src/zarr/indexing.py index ae4aa0681b..1f483e1c15 100644 --- a/src/zarr/indexing.py +++ b/src/zarr/indexing.py @@ -1220,24 +1220,25 @@ def make_slice_selection(selection: Any) -> list[slice]: return ls -def morton_order_iter(chunk_shape: ChunkCoords) -> Iterator[ChunkCoords]: - def decode_morton(z: int, chunk_shape: ChunkCoords) -> ChunkCoords: - # Inspired by compressed morton code as implemented in Neuroglancer - # https://github.com/google/neuroglancer/blob/master/src/neuroglancer/datasource/precomputed/volume.md#compressed-morton-code - bits = tuple(math.ceil(math.log2(c)) for c in chunk_shape) - max_coords_bits = max(*bits) - input_bit = 0 - input_value = z - out = [0 for _ in range(len(chunk_shape))] - - for coord_bit in range(max_coords_bits): - for dim in range(len(chunk_shape)): - if coord_bit < bits[dim]: - bit = (input_value >> input_bit) & 1 - out[dim] |= bit << coord_bit - input_bit += 1 - return tuple(out) +def decode_morton(z: int, chunk_shape: ChunkCoords) -> ChunkCoords: + # Inspired by compressed morton code as implemented in Neuroglancer + # https://github.com/google/neuroglancer/blob/master/src/neuroglancer/datasource/precomputed/volume.md#compressed-morton-code + bits = tuple(math.ceil(math.log2(c)) for c in chunk_shape) + max_coords_bits = max(bits) + input_bit = 0 + input_value = z + out = [0] * len(chunk_shape) + + for coord_bit in range(max_coords_bits): + for dim in range(len(chunk_shape)): + if coord_bit < bits[dim]: + bit = (input_value >> input_bit) & 1 + out[dim] |= bit << coord_bit + input_bit += 1 + return tuple(out) + +def morton_order_iter(chunk_shape: ChunkCoords) -> Iterator[ChunkCoords]: for i in range(product(chunk_shape)): yield decode_morton(i, chunk_shape) From 34eed0accb6eeb3a8ab0e77b43ea8282a4328902 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 3 Jul 2024 15:01:59 +0200 Subject: [PATCH 4/9] parametrize sharding codec test by data shape --- tests/v3/test_codecs.py | 54 ++++++++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 14 deletions(-) diff --git a/tests/v3/test_codecs.py b/tests/v3/test_codecs.py index 38eb4354a9..b72a9ccc9d 100644 --- a/tests/v3/test_codecs.py +++ b/tests/v3/test_codecs.py @@ -21,13 +21,20 @@ TransposeCodec, ZstdCodec, ) -from zarr.common import MemoryOrder +from zarr.common import ChunkCoords, MemoryOrder from zarr.config import config from zarr.indexing import Selection, morton_order_iter from zarr.store import MemoryStore, StorePath from zarr.testing.utils import assert_bytes_equal +@dataclass +class ArrayRequest: + shape: ChunkCoords + dtype: str + order: MemoryOrder + + @dataclass(frozen=True) class _AsyncArrayProxy: array: AsyncArray @@ -54,8 +61,13 @@ def store() -> Iterator[Store]: @pytest.fixture -def sample_data() -> np.ndarray: - return np.arange(0, 128 * 128 * 128, dtype="uint16").reshape((128, 128, 128), order="F") +def data(request: pytest.FixtureRequest) -> np.ndarray: + array_request: ArrayRequest = request.param + return ( + np.arange(np.prod(array_request.shape)) + .reshape(array_request.shape, order=array_request.order) + .astype(array_request.dtype) + ) def order_from_dim(order: MemoryOrder, ndim: int) -> tuple[int, ...]: @@ -66,20 +78,30 @@ def order_from_dim(order: MemoryOrder, ndim: int) -> tuple[int, ...]: @pytest.mark.parametrize("index_location", ["start", "end"]) +@pytest.mark.parametrize( + "data", + [ + ArrayRequest(shape=(128,) * 1, dtype="uint8", order="C"), + ArrayRequest(shape=(128,) * 2, dtype="uint8", order="C"), + ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), + ], + indirect=["data"], +) +@pytest.mark.parametrize("offset", [0, 10]) def test_sharding( - store: Store, sample_data: np.ndarray, index_location: ShardingCodecIndexLocation + store: Store, data: np.ndarray, index_location: ShardingCodecIndexLocation, offset: int ) -> None: a = Array.create( store / "sample", - shape=sample_data.shape, - chunk_shape=(64, 64, 64), - dtype=sample_data.dtype, - fill_value=0, + shape=tuple(s + offset for s in data.shape), + chunk_shape=(64,) * data.ndim, + dtype=data.dtype, + fill_value=6, codecs=[ ShardingCodec( - chunk_shape=(32, 32, 32), + chunk_shape=(32,) * data.ndim, codecs=[ - TransposeCodec(order=order_from_dim("F", sample_data.ndim)), + TransposeCodec(order=order_from_dim("F", data.ndim)), BytesCodec(), BloscCodec(cname="lz4"), ], @@ -87,12 +109,16 @@ def test_sharding( ) ], ) + write_region = tuple(slice(offset, None) for dim in range(data.ndim)) + a[write_region] = data - a[:, :, :] = sample_data + if offset > 0: + empty_region = tuple(slice(0, offset) for dim in range(data.ndim)) + assert np.all(a[empty_region] == a.metadata.fill_value) - read_data = a[0 : sample_data.shape[0], 0 : sample_data.shape[1], 0 : sample_data.shape[2]] - assert sample_data.shape == read_data.shape - assert np.array_equal(sample_data, read_data) + read_data = a[write_region] + assert data.shape == read_data.shape + assert np.array_equal(data, read_data) @pytest.mark.parametrize("index_location", ["start", "end"]) From 0a36e2f9b4ad709cb19e71e87e52621f4b381cfa Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 3 Jul 2024 22:54:58 +0200 Subject: [PATCH 5/9] refactor codec tests --- tests/v3/conftest.py | 21 +- tests/v3/test_codecs.py | 1076 ------------------------ tests/v3/test_codecs/__init__.py | 0 tests/v3/test_codecs/test_blosc.py | 57 ++ tests/v3/test_codecs/test_codecs.py | 503 +++++++++++ tests/v3/test_codecs/test_endian.py | 87 ++ tests/v3/test_codecs/test_gzip.py | 24 + tests/v3/test_codecs/test_sharding.py | 316 +++++++ tests/v3/test_codecs/test_transpose.py | 121 +++ tests/v3/test_codecs/test_zstd.py | 25 + 10 files changed, 1145 insertions(+), 1085 deletions(-) delete mode 100644 tests/v3/test_codecs.py create mode 100644 tests/v3/test_codecs/__init__.py create mode 100644 tests/v3/test_codecs/test_blosc.py create mode 100644 tests/v3/test_codecs/test_codecs.py create mode 100644 tests/v3/test_codecs/test_endian.py create mode 100644 tests/v3/test_codecs/test_gzip.py create mode 100644 tests/v3/test_codecs/test_sharding.py create mode 100644 tests/v3/test_codecs/test_transpose.py create mode 100644 tests/v3/test_codecs/test_zstd.py diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py index 6b58cce412..79ce1d4b67 100644 --- a/tests/v3/conftest.py +++ b/tests/v3/conftest.py @@ -4,6 +4,9 @@ from types import ModuleType from typing import TYPE_CHECKING +from _pytest.compat import LEGACY_PATH + +from zarr.abc.store import Store from zarr.common import ZarrFormat from zarr.group import AsyncGroup @@ -26,40 +29,40 @@ def parse_store( if store == "memory": return MemoryStore(mode="w") if store == "remote": - return RemoteStore(mode="w") + return RemoteStore(url=path, mode="w") raise AssertionError @pytest.fixture(params=[str, pathlib.Path]) -def path_type(request): +def path_type(request: pytest.FixtureRequest) -> Any: return request.param # todo: harmonize this with local_store fixture @pytest.fixture -def store_path(tmpdir): +def store_path(tmpdir: LEGACY_PATH) -> StorePath: store = LocalStore(str(tmpdir), mode="w") p = StorePath(store) return p @pytest.fixture(scope="function") -def local_store(tmpdir): +def local_store(tmpdir: LEGACY_PATH) -> LocalStore: return LocalStore(str(tmpdir), mode="w") @pytest.fixture(scope="function") -def remote_store(): - return RemoteStore(mode="w") +def remote_store(url: str) -> RemoteStore: + return RemoteStore(url, mode="w") @pytest.fixture(scope="function") -def memory_store(): +def memory_store() -> MemoryStore: return MemoryStore(mode="w") @pytest.fixture(scope="function") -def store(request: str, tmpdir): +def store(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> Store: param = request.param return parse_store(param, str(tmpdir)) @@ -72,7 +75,7 @@ class AsyncGroupRequest: @pytest.fixture(scope="function") -async def async_group(request: pytest.FixtureRequest, tmpdir) -> AsyncGroup: +async def async_group(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> AsyncGroup: param: AsyncGroupRequest = request.param store = parse_store(param.store, str(tmpdir)) diff --git a/tests/v3/test_codecs.py b/tests/v3/test_codecs.py deleted file mode 100644 index b72a9ccc9d..0000000000 --- a/tests/v3/test_codecs.py +++ /dev/null @@ -1,1076 +0,0 @@ -from __future__ import annotations - -import json -from collections.abc import Iterator -from dataclasses import dataclass -from typing import Literal - -import numpy as np -import pytest - -import zarr.v2 -from zarr.abc.codec import Codec -from zarr.abc.store import Store -from zarr.array import Array, AsyncArray -from zarr.codecs import ( - BloscCodec, - BytesCodec, - GzipCodec, - ShardingCodec, - ShardingCodecIndexLocation, - TransposeCodec, - ZstdCodec, -) -from zarr.common import ChunkCoords, MemoryOrder -from zarr.config import config -from zarr.indexing import Selection, morton_order_iter -from zarr.store import MemoryStore, StorePath -from zarr.testing.utils import assert_bytes_equal - - -@dataclass -class ArrayRequest: - shape: ChunkCoords - dtype: str - order: MemoryOrder - - -@dataclass(frozen=True) -class _AsyncArrayProxy: - array: AsyncArray - - def __getitem__(self, selection: Selection) -> _AsyncArraySelectionProxy: - return _AsyncArraySelectionProxy(self.array, selection) - - -@dataclass(frozen=True) -class _AsyncArraySelectionProxy: - array: AsyncArray - selection: Selection - - async def get(self) -> np.ndarray: - return await self.array.getitem(self.selection) - - async def set(self, value: np.ndarray) -> None: - return await self.array.setitem(self.selection, value) - - -@pytest.fixture -def store() -> Iterator[Store]: - yield StorePath(MemoryStore(mode="w")) - - -@pytest.fixture -def data(request: pytest.FixtureRequest) -> np.ndarray: - array_request: ArrayRequest = request.param - return ( - np.arange(np.prod(array_request.shape)) - .reshape(array_request.shape, order=array_request.order) - .astype(array_request.dtype) - ) - - -def order_from_dim(order: MemoryOrder, ndim: int) -> tuple[int, ...]: - if order == "F": - return tuple(ndim - x - 1 for x in range(ndim)) - else: - return tuple(range(ndim)) - - -@pytest.mark.parametrize("index_location", ["start", "end"]) -@pytest.mark.parametrize( - "data", - [ - ArrayRequest(shape=(128,) * 1, dtype="uint8", order="C"), - ArrayRequest(shape=(128,) * 2, dtype="uint8", order="C"), - ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), - ], - indirect=["data"], -) -@pytest.mark.parametrize("offset", [0, 10]) -def test_sharding( - store: Store, data: np.ndarray, index_location: ShardingCodecIndexLocation, offset: int -) -> None: - a = Array.create( - store / "sample", - shape=tuple(s + offset for s in data.shape), - chunk_shape=(64,) * data.ndim, - dtype=data.dtype, - fill_value=6, - codecs=[ - ShardingCodec( - chunk_shape=(32,) * data.ndim, - codecs=[ - TransposeCodec(order=order_from_dim("F", data.ndim)), - BytesCodec(), - BloscCodec(cname="lz4"), - ], - index_location=index_location, - ) - ], - ) - write_region = tuple(slice(offset, None) for dim in range(data.ndim)) - a[write_region] = data - - if offset > 0: - empty_region = tuple(slice(0, offset) for dim in range(data.ndim)) - assert np.all(a[empty_region] == a.metadata.fill_value) - - read_data = a[write_region] - assert data.shape == read_data.shape - assert np.array_equal(data, read_data) - - -@pytest.mark.parametrize("index_location", ["start", "end"]) -def test_sharding_partial( - store: Store, sample_data: np.ndarray, index_location: ShardingCodecIndexLocation -) -> None: - a = Array.create( - store / "sample", - shape=tuple(a + 10 for a in sample_data.shape), - chunk_shape=(64, 64, 64), - dtype=sample_data.dtype, - fill_value=0, - codecs=[ - ShardingCodec( - chunk_shape=(32, 32, 32), - codecs=[ - TransposeCodec(order=order_from_dim("F", sample_data.ndim)), - BytesCodec(), - BloscCodec(cname="lz4"), - ], - index_location=index_location, - ) - ], - ) - - a[10:, 10:, 10:] = sample_data - - read_data = a[0:10, 0:10, 0:10] - assert np.all(read_data == 0) - - read_data = a[10:, 10:, 10:] - assert sample_data.shape == read_data.shape - assert np.array_equal(sample_data, read_data) - - -@pytest.mark.parametrize("index_location", ["start", "end"]) -def test_sharding_partial_read( - store: Store, sample_data: np.ndarray, index_location: ShardingCodecIndexLocation -) -> None: - a = Array.create( - store / "sample", - shape=tuple(a + 10 for a in sample_data.shape), - chunk_shape=(64, 64, 64), - dtype=sample_data.dtype, - fill_value=1, - codecs=[ - ShardingCodec( - chunk_shape=(32, 32, 32), - codecs=[ - TransposeCodec(order=order_from_dim("F", sample_data.ndim)), - BytesCodec(), - BloscCodec(cname="lz4"), - ], - index_location=index_location, - ) - ], - ) - - read_data = a[0:10, 0:10, 0:10] - assert np.all(read_data == 1) - - -@pytest.mark.parametrize("index_location", ["start", "end"]) -def test_sharding_partial_overwrite( - store: Store, sample_data: np.ndarray, index_location: ShardingCodecIndexLocation -) -> None: - data = sample_data[:10, :10, :10] - - a = Array.create( - store / "sample", - shape=tuple(a + 10 for a in data.shape), - chunk_shape=(64, 64, 64), - dtype=data.dtype, - fill_value=1, - codecs=[ - ShardingCodec( - chunk_shape=(32, 32, 32), - codecs=[ - TransposeCodec(order=order_from_dim("F", data.ndim)), - BytesCodec(), - BloscCodec(cname="lz4"), - ], - index_location=index_location, - ) - ], - ) - - a[:10, :10, :10] = data - - read_data = a[0:10, 0:10, 0:10] - assert np.array_equal(data, read_data) - - data = data + 10 - a[:10, :10, :10] = data - read_data = a[0:10, 0:10, 0:10] - assert np.array_equal(data, read_data) - - -@pytest.mark.parametrize( - "outer_index_location", - ["start", "end"], -) -@pytest.mark.parametrize( - "inner_index_location", - ["start", "end"], -) -def test_nested_sharding( - store: Store, - sample_data: np.ndarray, - outer_index_location: ShardingCodecIndexLocation, - inner_index_location: ShardingCodecIndexLocation, -) -> None: - a = Array.create( - store / "l4_sample" / "color" / "1", - shape=sample_data.shape, - chunk_shape=(64, 64, 64), - dtype=sample_data.dtype, - fill_value=0, - codecs=[ - ShardingCodec( - chunk_shape=(32, 32, 32), - codecs=[ - ShardingCodec(chunk_shape=(16, 16, 16), index_location=inner_index_location) - ], - index_location=outer_index_location, - ) - ], - ) - - a[:, :, :] = sample_data - - read_data = a[0 : sample_data.shape[0], 0 : sample_data.shape[1], 0 : sample_data.shape[2]] - assert sample_data.shape == read_data.shape - assert np.array_equal(sample_data, read_data) - - -@pytest.mark.parametrize("input_order", ["F", "C"]) -@pytest.mark.parametrize("store_order", ["F", "C"]) -@pytest.mark.parametrize("runtime_write_order", ["F", "C"]) -@pytest.mark.parametrize("runtime_read_order", ["F", "C"]) -@pytest.mark.parametrize("with_sharding", [True, False]) -async def test_order( - store: Store, - input_order: MemoryOrder, - store_order: MemoryOrder, - runtime_write_order: MemoryOrder, - runtime_read_order: MemoryOrder, - with_sharding: bool, -) -> None: - data = np.arange(0, 256, dtype="uint16").reshape((32, 8), order=input_order) - - codecs_: list[Codec] = ( - [ - ShardingCodec( - chunk_shape=(16, 8), - codecs=[TransposeCodec(order=order_from_dim(store_order, data.ndim)), BytesCodec()], - ) - ] - if with_sharding - else [TransposeCodec(order=order_from_dim(store_order, data.ndim)), BytesCodec()] - ) - - with config.set({"array.order": runtime_write_order}): - a = await AsyncArray.create( - store / "order", - shape=data.shape, - chunk_shape=(32, 8), - dtype=data.dtype, - fill_value=0, - chunk_key_encoding=("v2", "."), - codecs=codecs_, - ) - - await _AsyncArrayProxy(a)[:, :].set(data) - read_data = await _AsyncArrayProxy(a)[:, :].get() - assert np.array_equal(data, read_data) - - with config.set({"array.order": runtime_read_order}): - a = await AsyncArray.open( - store / "order", - ) - read_data = await _AsyncArrayProxy(a)[:, :].get() - assert np.array_equal(data, read_data) - - if runtime_read_order == "F": - assert read_data.flags["F_CONTIGUOUS"] - assert not read_data.flags["C_CONTIGUOUS"] - else: - assert not read_data.flags["F_CONTIGUOUS"] - assert read_data.flags["C_CONTIGUOUS"] - - if not with_sharding: - # Compare with zarr-python - z = zarr.v2.create( - shape=data.shape, - chunks=(32, 8), - dtype=" None: - data = np.arange(0, 256, dtype="uint16").reshape((16, 16), order=input_order) - - codecs_: list[Codec] | None = [ShardingCodec(chunk_shape=(8, 8))] if with_sharding else None - - with config.set({"array.order": runtime_write_order}): - a = Array.create( - store / "order_implicit", - shape=data.shape, - chunk_shape=(16, 16), - dtype=data.dtype, - fill_value=0, - codecs=codecs_, - ) - - a[:, :] = data - - with config.set({"array.order": runtime_read_order}): - a = Array.open( - store / "order_implicit", - ) - read_data = a[:, :] - assert np.array_equal(data, read_data) - - if runtime_read_order == "F": - assert read_data.flags["F_CONTIGUOUS"] - assert not read_data.flags["C_CONTIGUOUS"] - else: - assert not read_data.flags["F_CONTIGUOUS"] - assert read_data.flags["C_CONTIGUOUS"] - - -@pytest.mark.parametrize("input_order", ["F", "C"]) -@pytest.mark.parametrize("runtime_write_order", ["F", "C"]) -@pytest.mark.parametrize("runtime_read_order", ["F", "C"]) -@pytest.mark.parametrize("with_sharding", [True, False]) -async def test_transpose( - store: Store, - input_order: MemoryOrder, - runtime_write_order: MemoryOrder, - runtime_read_order: MemoryOrder, - with_sharding: bool, -) -> None: - data = np.arange(0, 256, dtype="uint16").reshape((1, 32, 8), order=input_order) - - codecs_: list[Codec] = ( - [ - ShardingCodec( - chunk_shape=(1, 16, 8), - codecs=[TransposeCodec(order=(2, 1, 0)), BytesCodec()], - ) - ] - if with_sharding - else [TransposeCodec(order=(2, 1, 0)), BytesCodec()] - ) - with config.set({"array.order": runtime_write_order}): - a = await AsyncArray.create( - store / "transpose", - shape=data.shape, - chunk_shape=(1, 32, 8), - dtype=data.dtype, - fill_value=0, - chunk_key_encoding=("v2", "."), - codecs=codecs_, - ) - - await _AsyncArrayProxy(a)[:, :].set(data) - read_data = await _AsyncArrayProxy(a)[:, :].get() - assert np.array_equal(data, read_data) - - with config.set({"array.order": runtime_read_order}): - a = await AsyncArray.open( - store / "transpose", - ) - read_data = await _AsyncArrayProxy(a)[:, :].get() - assert np.array_equal(data, read_data) - - if runtime_read_order == "F": - assert read_data.flags["F_CONTIGUOUS"] - assert not read_data.flags["C_CONTIGUOUS"] - else: - assert not read_data.flags["F_CONTIGUOUS"] - assert read_data.flags["C_CONTIGUOUS"] - - if not with_sharding: - # Compare with zarr-python - z = zarr.v2.create( - shape=data.shape, - chunks=(1, 32, 8), - dtype=" None: - shape = [i + 3 for i in range(len(order))] - data = np.arange(0, np.prod(shape), dtype="uint16").reshape(shape) - a = Array.create( - store / "transpose_non_self_inverse", - shape=data.shape, - chunk_shape=data.shape, - dtype=data.dtype, - fill_value=0, - codecs=[TransposeCodec(order=order), BytesCodec()], - ) - a[:, :] = data - read_data = a[:, :] - assert np.array_equal(data, read_data) - - -def test_transpose_invalid( - store: Store, -) -> None: - data = np.arange(0, 256, dtype="uint16").reshape((1, 32, 8)) - - for order in [(1, 0), (3, 2, 1), (3, 3, 1)]: - with pytest.raises(ValueError): - Array.create( - store / "transpose_invalid", - shape=data.shape, - chunk_shape=(1, 32, 8), - dtype=data.dtype, - fill_value=0, - chunk_key_encoding=("v2", "."), - codecs=[TransposeCodec(order=order), BytesCodec()], - ) - - -def test_open(store: Store) -> None: - a = Array.create( - store / "open", - shape=(16, 16), - chunk_shape=(16, 16), - dtype="int32", - fill_value=0, - ) - b = Array.open(store / "open") - assert a.metadata == b.metadata - - -def test_open_sharding(store: Store) -> None: - a = Array.create( - store / "open_sharding", - shape=(16, 16), - chunk_shape=(16, 16), - dtype="int32", - fill_value=0, - codecs=[ - ShardingCodec( - chunk_shape=(8, 8), - codecs=[ - TransposeCodec(order=order_from_dim("F", 2)), - BytesCodec(), - BloscCodec(), - ], - ) - ], - ) - b = Array.open(store / "open_sharding") - assert a.metadata == b.metadata - - -def test_simple(store: Store) -> None: - data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) - - a = Array.create( - store / "simple", - shape=data.shape, - chunk_shape=(16, 16), - dtype=data.dtype, - fill_value=0, - ) - - a[:, :] = data - assert np.array_equal(data, a[:, :]) - - -def test_fill_value(store: Store) -> None: - data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) - - a = Array.create( - store / "fill_value1", - shape=data.shape, - chunk_shape=(16, 16), - dtype=data.dtype, - ) - - assert a.metadata.fill_value == 0 - assert np.array_equiv(0, a[:, :]) - - b = Array.create( - store / "fill_value2", - shape=data.shape, - chunk_shape=(16, 16), - dtype=np.dtype("bool"), - ) - - assert b.metadata.fill_value is False - assert np.array_equiv(False, b[:, :]) - - c = Array.create( - store / "fill_value3", - shape=data.shape, - chunk_shape=(16, 16), - dtype=data.dtype, - fill_value=4, - ) - - assert c.metadata.fill_value == 4 - assert np.array_equiv(4, c[:, :]) - - -def test_morton(store: Store) -> None: - assert list(morton_order_iter((2, 2))) == [(0, 0), (1, 0), (0, 1), (1, 1)] - assert list(morton_order_iter((2, 2, 2))) == [ - (0, 0, 0), - (1, 0, 0), - (0, 1, 0), - (1, 1, 0), - (0, 0, 1), - (1, 0, 1), - (0, 1, 1), - (1, 1, 1), - ] - assert list(morton_order_iter((2, 2, 2, 2))) == [ - (0, 0, 0, 0), - (1, 0, 0, 0), - (0, 1, 0, 0), - (1, 1, 0, 0), - (0, 0, 1, 0), - (1, 0, 1, 0), - (0, 1, 1, 0), - (1, 1, 1, 0), - (0, 0, 0, 1), - (1, 0, 0, 1), - (0, 1, 0, 1), - (1, 1, 0, 1), - (0, 0, 1, 1), - (1, 0, 1, 1), - (0, 1, 1, 1), - (1, 1, 1, 1), - ] - - -def test_write_partial_chunks(store: Store) -> None: - data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) - - a = Array.create( - store / "write_partial_chunks", - shape=data.shape, - chunk_shape=(20, 20), - dtype=data.dtype, - fill_value=1, - ) - a[0:16, 0:16] = data - assert np.array_equal(a[0:16, 0:16], data) - - -def test_write_full_chunks(store: Store) -> None: - data = np.arange(0, 16 * 16, dtype="uint16").reshape((16, 16)) - - a = Array.create( - store / "write_full_chunks", - shape=(16, 16), - chunk_shape=(20, 20), - dtype=data.dtype, - fill_value=1, - ) - a[0:16, 0:16] = data - assert np.array_equal(a[0:16, 0:16], data) - - a = Array.create( - store / "write_full_chunks2", - shape=(20, 20), - chunk_shape=(20, 20), - dtype=data.dtype, - fill_value=1, - ) - assert np.all(a[16:20, 16:20] == 1) - - -def test_write_partial_sharded_chunks(store: Store) -> None: - data = np.arange(0, 16 * 16, dtype="uint16").reshape((16, 16)) - - a = Array.create( - store / "write_partial_sharded_chunks", - shape=(40, 40), - chunk_shape=(20, 20), - dtype=data.dtype, - fill_value=1, - codecs=[ - ShardingCodec( - chunk_shape=(10, 10), - codecs=[ - BytesCodec(), - BloscCodec(), - ], - ) - ], - ) - a[0:16, 0:16] = data - assert np.array_equal(a[0:16, 0:16], data) - - -async def test_delete_empty_chunks(store: Store) -> None: - data = np.ones((16, 16)) - - a = await AsyncArray.create( - store / "delete_empty_chunks", - shape=data.shape, - chunk_shape=(32, 32), - dtype=data.dtype, - fill_value=1, - ) - await _AsyncArrayProxy(a)[:16, :16].set(np.zeros((16, 16))) - await _AsyncArrayProxy(a)[:16, :16].set(data) - assert np.array_equal(await _AsyncArrayProxy(a)[:16, :16].get(), data) - assert await (store / "delete_empty_chunks/c0/0").get() is None - - -async def test_delete_empty_shards(store: Store) -> None: - a = await AsyncArray.create( - store / "delete_empty_shards", - shape=(16, 16), - chunk_shape=(8, 16), - dtype="uint16", - fill_value=1, - codecs=[ShardingCodec(chunk_shape=(8, 8))], - ) - await _AsyncArrayProxy(a)[:, :].set(np.zeros((16, 16))) - await _AsyncArrayProxy(a)[8:, :].set(np.ones((8, 16))) - await _AsyncArrayProxy(a)[:, 8:].set(np.ones((16, 8))) - # chunk (0, 0) is full - # chunks (0, 1), (1, 0), (1, 1) are empty - # shard (0, 0) is half-full - # shard (1, 0) is empty - - data = np.ones((16, 16), dtype="uint16") - data[:8, :8] = 0 - assert np.array_equal(data, await _AsyncArrayProxy(a)[:, :].get()) - assert await (store / "delete_empty_shards/c/1/0").get() is None - chunk_bytes = await (store / "delete_empty_shards/c/0/0").get() - assert chunk_bytes is not None and len(chunk_bytes) == 16 * 2 + 8 * 8 * 2 + 4 - - -async def test_zarr_compat(store: Store) -> None: - data = np.zeros((16, 18), dtype="uint16") - - a = await AsyncArray.create( - store / "zarr_compat3", - shape=data.shape, - chunk_shape=(10, 10), - dtype=data.dtype, - chunk_key_encoding=("v2", "."), - fill_value=1, - ) - - z2 = zarr.v2.create( - shape=data.shape, - chunks=(10, 10), - dtype=data.dtype, - compressor=None, - fill_value=1, - ) - - await _AsyncArrayProxy(a)[:16, :18].set(data) - z2[:16, :18] = data - assert np.array_equal(data, await _AsyncArrayProxy(a)[:16, :18].get()) - assert np.array_equal(data, z2[:16, :18]) - - assert_bytes_equal(z2._store["0.0"], await (store / "zarr_compat3/0.0").get()) - assert_bytes_equal(z2._store["0.1"], await (store / "zarr_compat3/0.1").get()) - assert_bytes_equal(z2._store["1.0"], await (store / "zarr_compat3/1.0").get()) - assert_bytes_equal(z2._store["1.1"], await (store / "zarr_compat3/1.1").get()) - - -async def test_zarr_compat_F(store: Store) -> None: - data = np.zeros((16, 18), dtype="uint16", order="F") - - a = await AsyncArray.create( - store / "zarr_compatF3", - shape=data.shape, - chunk_shape=(10, 10), - dtype=data.dtype, - chunk_key_encoding=("v2", "."), - fill_value=1, - codecs=[TransposeCodec(order=order_from_dim("F", data.ndim)), BytesCodec()], - ) - - z2 = zarr.v2.create( - shape=data.shape, - chunks=(10, 10), - dtype=data.dtype, - compressor=None, - order="F", - fill_value=1, - ) - - await _AsyncArrayProxy(a)[:16, :18].set(data) - z2[:16, :18] = data - assert np.array_equal(data, await _AsyncArrayProxy(a)[:16, :18].get()) - assert np.array_equal(data, z2[:16, :18]) - - assert_bytes_equal(z2._store["0.0"], await (store / "zarr_compatF3/0.0").get()) - assert_bytes_equal(z2._store["0.1"], await (store / "zarr_compatF3/0.1").get()) - assert_bytes_equal(z2._store["1.0"], await (store / "zarr_compatF3/1.0").get()) - assert_bytes_equal(z2._store["1.1"], await (store / "zarr_compatF3/1.1").get()) - - -async def test_dimension_names(store: Store) -> None: - data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) - - await AsyncArray.create( - store / "dimension_names", - shape=data.shape, - chunk_shape=(16, 16), - dtype=data.dtype, - fill_value=0, - dimension_names=("x", "y"), - ) - - assert (await AsyncArray.open(store / "dimension_names")).metadata.dimension_names == ( - "x", - "y", - ) - - await AsyncArray.create( - store / "dimension_names2", - shape=data.shape, - chunk_shape=(16, 16), - dtype=data.dtype, - fill_value=0, - ) - - assert (await AsyncArray.open(store / "dimension_names2")).metadata.dimension_names is None - zarr_json_buffer = await (store / "dimension_names2" / "zarr.json").get() - assert zarr_json_buffer is not None - assert "dimension_names" not in json.loads(zarr_json_buffer.to_bytes()) - - -def test_gzip(store: Store) -> None: - data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) - - a = Array.create( - store / "gzip", - shape=data.shape, - chunk_shape=(16, 16), - dtype=data.dtype, - fill_value=0, - codecs=[BytesCodec(), GzipCodec()], - ) - - a[:, :] = data - assert np.array_equal(data, a[:, :]) - - -@pytest.mark.parametrize("checksum", [True, False]) -def test_zstd(store: Store, checksum: bool) -> None: - data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) - - a = Array.create( - store / "zstd", - shape=data.shape, - chunk_shape=(16, 16), - dtype=data.dtype, - fill_value=0, - codecs=[BytesCodec(), ZstdCodec(level=0, checksum=checksum)], - ) - - a[:, :] = data - assert np.array_equal(data, a[:, :]) - - -@pytest.mark.parametrize("endian", ["big", "little"]) -async def test_endian(store: Store, endian: Literal["big", "little"]) -> None: - data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) - - a = await AsyncArray.create( - store / "endian", - shape=data.shape, - chunk_shape=(16, 16), - dtype=data.dtype, - fill_value=0, - chunk_key_encoding=("v2", "."), - codecs=[BytesCodec(endian=endian)], - ) - - await _AsyncArrayProxy(a)[:, :].set(data) - readback_data = await _AsyncArrayProxy(a)[:, :].get() - assert np.array_equal(data, readback_data) - - # Compare with zarr-python - z = zarr.v2.create( - shape=data.shape, - chunks=(16, 16), - dtype=">u2" if endian == "big" else "u2", "u2", " None: - data = np.arange(0, 256, dtype=dtype_input_endian).reshape((16, 16)) - - a = await AsyncArray.create( - store / "endian", - shape=data.shape, - chunk_shape=(16, 16), - dtype="uint16", - fill_value=0, - chunk_key_encoding=("v2", "."), - codecs=[BytesCodec(endian=dtype_store_endian)], - ) - - await _AsyncArrayProxy(a)[:, :].set(data) - readback_data = await _AsyncArrayProxy(a)[:, :].get() - assert np.array_equal(data, readback_data) - - # Compare with zarr-python - z = zarr.v2.create( - shape=data.shape, - chunks=(16, 16), - dtype=">u2" if dtype_store_endian == "big" else " None: - with pytest.raises(ValueError): - Array.create( - store / "invalid_chunk_shape", - shape=(16, 16, 16), - chunk_shape=(16, 16), - dtype=np.dtype("uint8"), - fill_value=0, - ) - - with pytest.raises(ValueError): - Array.create( - store / "invalid_endian", - shape=(16, 16), - chunk_shape=(16, 16), - dtype=np.dtype("uint8"), - fill_value=0, - codecs=[ - BytesCodec(endian="big"), - TransposeCodec(order=order_from_dim("F", 2)), - ], - ) - - with pytest.raises(TypeError): - Array.create( - store / "invalid_order", - shape=(16, 16), - chunk_shape=(16, 16), - dtype=np.dtype("uint8"), - fill_value=0, - codecs=[ - BytesCodec(), - TransposeCodec(order="F"), - ], - ) - - with pytest.raises(ValueError): - Array.create( - store / "invalid_missing_bytes_codec", - shape=(16, 16), - chunk_shape=(16, 16), - dtype=np.dtype("uint8"), - fill_value=0, - codecs=[ - TransposeCodec(order=order_from_dim("F", 2)), - ], - ) - - with pytest.raises(ValueError): - Array.create( - store / "invalid_inner_chunk_shape", - shape=(16, 16), - chunk_shape=(16, 16), - dtype=np.dtype("uint8"), - fill_value=0, - codecs=[ - ShardingCodec(chunk_shape=(8,)), - ], - ) - with pytest.raises(ValueError): - Array.create( - store / "invalid_inner_chunk_shape", - shape=(16, 16), - chunk_shape=(16, 16), - dtype=np.dtype("uint8"), - fill_value=0, - codecs=[ - ShardingCodec(chunk_shape=(8, 7)), - ], - ) - - with pytest.warns(UserWarning): - Array.create( - store / "warning_inefficient_codecs", - shape=(16, 16), - chunk_shape=(16, 16), - dtype=np.dtype("uint8"), - fill_value=0, - codecs=[ - ShardingCodec(chunk_shape=(8, 8)), - GzipCodec(), - ], - ) - - -async def test_resize(store: Store) -> None: - data = np.zeros((16, 18), dtype="uint16") - - a = await AsyncArray.create( - store / "resize", - shape=data.shape, - chunk_shape=(10, 10), - dtype=data.dtype, - chunk_key_encoding=("v2", "."), - fill_value=1, - ) - - await _AsyncArrayProxy(a)[:16, :18].set(data) - assert await (store / "resize" / "0.0").get() is not None - assert await (store / "resize" / "0.1").get() is not None - assert await (store / "resize" / "1.0").get() is not None - assert await (store / "resize" / "1.1").get() is not None - - a = await a.resize((10, 12)) - assert a.metadata.shape == (10, 12) - assert await (store / "resize" / "0.0").get() is not None - assert await (store / "resize" / "0.1").get() is not None - assert await (store / "resize" / "1.0").get() is None - assert await (store / "resize" / "1.1").get() is None - - -async def test_blosc_evolve(store: Store) -> None: - await AsyncArray.create( - store / "blosc_evolve_u1", - shape=(16, 16), - chunk_shape=(16, 16), - dtype="uint8", - fill_value=0, - codecs=[BytesCodec(), BloscCodec()], - ) - - zarr_json = json.loads((await (store / "blosc_evolve_u1" / "zarr.json").get()).to_bytes()) - blosc_configuration_json = zarr_json["codecs"][1]["configuration"] - assert blosc_configuration_json["typesize"] == 1 - assert blosc_configuration_json["shuffle"] == "bitshuffle" - - await AsyncArray.create( - store / "blosc_evolve_u2", - shape=(16, 16), - chunk_shape=(16, 16), - dtype="uint16", - fill_value=0, - codecs=[BytesCodec(), BloscCodec()], - ) - - zarr_json = json.loads((await (store / "blosc_evolve_u2" / "zarr.json").get()).to_bytes()) - blosc_configuration_json = zarr_json["codecs"][1]["configuration"] - assert blosc_configuration_json["typesize"] == 2 - assert blosc_configuration_json["shuffle"] == "shuffle" - - await AsyncArray.create( - store / "sharding_blosc_evolve", - shape=(16, 16), - chunk_shape=(16, 16), - dtype="uint16", - fill_value=0, - codecs=[ShardingCodec(chunk_shape=(16, 16), codecs=[BytesCodec(), BloscCodec()])], - ) - - zarr_json = json.loads((await (store / "sharding_blosc_evolve" / "zarr.json").get()).to_bytes()) - blosc_configuration_json = zarr_json["codecs"][0]["configuration"]["codecs"][1]["configuration"] - assert blosc_configuration_json["typesize"] == 2 - assert blosc_configuration_json["shuffle"] == "shuffle" - - -def test_exists_ok(store: Store) -> None: - Array.create( - store / "exists_ok", - shape=(16, 16), - chunk_shape=(16, 16), - dtype=np.dtype("uint8"), - ) - with pytest.raises(AssertionError): - Array.create( - store / "exists_ok", - shape=(16, 16), - chunk_shape=(16, 16), - dtype=np.dtype("uint8"), - ) - Array.create( - store / "exists_ok", - shape=(16, 16), - chunk_shape=(16, 16), - dtype=np.dtype("uint8"), - exists_ok=True, - ) - - -def test_update_attributes_array(store: Store) -> None: - data = np.zeros((16, 18), dtype="uint16") - - a = Array.create( - store / "update_attributes", - shape=data.shape, - chunk_shape=(10, 10), - dtype=data.dtype, - fill_value=1, - attributes={"hello": "world"}, - ) - - a = Array.open(store / "update_attributes") - assert a.attrs["hello"] == "world" - - a.update_attributes({"hello": "zarrita"}) - - a = Array.open(store / "update_attributes") - assert a.attrs["hello"] == "zarrita" diff --git a/tests/v3/test_codecs/__init__.py b/tests/v3/test_codecs/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/v3/test_codecs/test_blosc.py b/tests/v3/test_codecs/test_blosc.py new file mode 100644 index 0000000000..04c4c671c8 --- /dev/null +++ b/tests/v3/test_codecs/test_blosc.py @@ -0,0 +1,57 @@ +import json + +import numpy as np +import pytest + +from zarr.abc.store import Store +from zarr.array import AsyncArray +from zarr.buffer import default_buffer_prototype +from zarr.codecs import BloscCodec, BytesCodec, ShardingCodec +from zarr.store.core import StorePath + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("dtype", ["uint8", "uint16"]) +async def test_blosc_evolve(store: Store, dtype: str) -> None: + typesize = np.dtype(dtype).itemsize + path = "blosc_evolve" + spath = StorePath(store, path) + await AsyncArray.create( + spath, + shape=(16, 16), + chunk_shape=(16, 16), + dtype=dtype, + fill_value=0, + codecs=[BytesCodec(), BloscCodec()], + ) + + zarr_json = json.loads( + (await store.get(f"{path}/zarr.json", prototype=default_buffer_prototype)).to_bytes() + ) + blosc_configuration_json = zarr_json["codecs"][1]["configuration"] + assert blosc_configuration_json["typesize"] == typesize + if typesize == 1: + assert blosc_configuration_json["shuffle"] == "bitshuffle" + else: + assert blosc_configuration_json["shuffle"] == "shuffle" + + path2 = "blosc_evolve_sharding" + spath2 = StorePath(store, path2) + await AsyncArray.create( + spath2, + shape=(16, 16), + chunk_shape=(16, 16), + dtype=dtype, + fill_value=0, + codecs=[ShardingCodec(chunk_shape=(16, 16), codecs=[BytesCodec(), BloscCodec()])], + ) + + zarr_json = json.loads( + (await store.get(f"{path2}/zarr.json", prototype=default_buffer_prototype)).to_bytes() + ) + blosc_configuration_json = zarr_json["codecs"][0]["configuration"]["codecs"][1]["configuration"] + assert blosc_configuration_json["typesize"] == typesize + if typesize == 1: + assert blosc_configuration_json["shuffle"] == "bitshuffle" + else: + assert blosc_configuration_json["shuffle"] == "shuffle" diff --git a/tests/v3/test_codecs/test_codecs.py b/tests/v3/test_codecs/test_codecs.py new file mode 100644 index 0000000000..297848121f --- /dev/null +++ b/tests/v3/test_codecs/test_codecs.py @@ -0,0 +1,503 @@ +from __future__ import annotations + +import json +from dataclasses import dataclass + +import numpy as np +import pytest + +import zarr.v2 +from zarr.abc.codec import Codec +from zarr.abc.store import Store +from zarr.array import Array, AsyncArray +from zarr.buffer import default_buffer_prototype +from zarr.codecs import ( + BytesCodec, + GzipCodec, + ShardingCodec, + TransposeCodec, +) +from zarr.common import ChunkCoords, MemoryOrder +from zarr.config import config +from zarr.indexing import Selection, morton_order_iter +from zarr.store import StorePath +from zarr.testing.utils import assert_bytes_equal + + +@dataclass +class ArrayRequest: + shape: ChunkCoords + dtype: str + order: MemoryOrder + + +@dataclass(frozen=True) +class _AsyncArrayProxy: + array: AsyncArray + + def __getitem__(self, selection: Selection) -> _AsyncArraySelectionProxy: + return _AsyncArraySelectionProxy(self.array, selection) + + +@dataclass(frozen=True) +class _AsyncArraySelectionProxy: + array: AsyncArray + selection: Selection + + async def get(self) -> np.ndarray: + return await self.array.getitem(self.selection) + + async def set(self, value: np.ndarray) -> None: + return await self.array.setitem(self.selection, value) + + +@pytest.fixture +def array_fixture(request: pytest.FixtureRequest) -> np.ndarray: + array_request: ArrayRequest = request.param + return ( + np.arange(np.prod(array_request.shape)) + .reshape(array_request.shape, order=array_request.order) + .astype(array_request.dtype) + ) + + +def order_from_dim(order: MemoryOrder, ndim: int) -> tuple[int, ...]: + if order == "F": + return tuple(ndim - x - 1 for x in range(ndim)) + else: + return tuple(range(ndim)) + + +def test_sharding_pickle() -> None: + """ + Test that sharding codecs can be pickled + """ + pass + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("input_order", ["F", "C"]) +@pytest.mark.parametrize("store_order", ["F", "C"]) +@pytest.mark.parametrize("runtime_write_order", ["F", "C"]) +@pytest.mark.parametrize("runtime_read_order", ["F", "C"]) +@pytest.mark.parametrize("with_sharding", [True, False]) +async def test_order( + store: Store, + input_order: MemoryOrder, + store_order: MemoryOrder, + runtime_write_order: MemoryOrder, + runtime_read_order: MemoryOrder, + with_sharding: bool, +) -> None: + data = np.arange(0, 256, dtype="uint16").reshape((32, 8), order=input_order) + path = "order" + spath = StorePath(store, path=path) + codecs_: list[Codec] = ( + [ + ShardingCodec( + chunk_shape=(16, 8), + codecs=[TransposeCodec(order=order_from_dim(store_order, data.ndim)), BytesCodec()], + ) + ] + if with_sharding + else [TransposeCodec(order=order_from_dim(store_order, data.ndim)), BytesCodec()] + ) + + with config.set({"array.order": runtime_write_order}): + a = await AsyncArray.create( + spath, + shape=data.shape, + chunk_shape=(32, 8), + dtype=data.dtype, + fill_value=0, + chunk_key_encoding=("v2", "."), + codecs=codecs_, + ) + + await _AsyncArrayProxy(a)[:, :].set(data) + read_data = await _AsyncArrayProxy(a)[:, :].get() + assert np.array_equal(data, read_data) + + with config.set({"array.order": runtime_read_order}): + a = await AsyncArray.open( + spath, + ) + read_data = await _AsyncArrayProxy(a)[:, :].get() + assert np.array_equal(data, read_data) + + if runtime_read_order == "F": + assert read_data.flags["F_CONTIGUOUS"] + assert not read_data.flags["C_CONTIGUOUS"] + else: + assert not read_data.flags["F_CONTIGUOUS"] + assert read_data.flags["C_CONTIGUOUS"] + + if not with_sharding: + # Compare with zarr-python + z = zarr.v2.create( + shape=data.shape, + chunks=(32, 8), + dtype=" None: + data = np.arange(0, 256, dtype="uint16").reshape((16, 16), order=input_order) + path = "order_implicit" + spath = StorePath(store, path) + codecs_: list[Codec] | None = [ShardingCodec(chunk_shape=(8, 8))] if with_sharding else None + + with config.set({"array.order": runtime_write_order}): + a = Array.create( + spath, + shape=data.shape, + chunk_shape=(16, 16), + dtype=data.dtype, + fill_value=0, + codecs=codecs_, + ) + + a[:, :] = data + + with config.set({"array.order": runtime_read_order}): + a = Array.open(spath) + read_data = a[:, :] + assert np.array_equal(data, read_data) + + if runtime_read_order == "F": + assert read_data.flags["F_CONTIGUOUS"] + assert not read_data.flags["C_CONTIGUOUS"] + else: + assert not read_data.flags["F_CONTIGUOUS"] + assert read_data.flags["C_CONTIGUOUS"] + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +def test_open(store: Store) -> None: + spath = StorePath(store) + a = Array.create( + spath, + shape=(16, 16), + chunk_shape=(16, 16), + dtype="int32", + fill_value=0, + ) + b = Array.open(spath) + assert a.metadata == b.metadata + + +def test_morton() -> None: + assert list(morton_order_iter((2, 2))) == [(0, 0), (1, 0), (0, 1), (1, 1)] + assert list(morton_order_iter((2, 2, 2))) == [ + (0, 0, 0), + (1, 0, 0), + (0, 1, 0), + (1, 1, 0), + (0, 0, 1), + (1, 0, 1), + (0, 1, 1), + (1, 1, 1), + ] + assert list(morton_order_iter((2, 2, 2, 2))) == [ + (0, 0, 0, 0), + (1, 0, 0, 0), + (0, 1, 0, 0), + (1, 1, 0, 0), + (0, 0, 1, 0), + (1, 0, 1, 0), + (0, 1, 1, 0), + (1, 1, 1, 0), + (0, 0, 0, 1), + (1, 0, 0, 1), + (0, 1, 0, 1), + (1, 1, 0, 1), + (0, 0, 1, 1), + (1, 0, 1, 1), + (0, 1, 1, 1), + (1, 1, 1, 1), + ] + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +def test_write_partial_chunks(store: Store) -> None: + data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) + spath = StorePath(store) + a = Array.create( + spath, + shape=data.shape, + chunk_shape=(20, 20), + dtype=data.dtype, + fill_value=1, + ) + a[0:16, 0:16] = data + assert np.array_equal(a[0:16, 0:16], data) + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +async def test_delete_empty_chunks(store: Store) -> None: + data = np.ones((16, 16)) + path = "delete_empty_chunks" + spath = StorePath(store, path) + a = await AsyncArray.create( + spath, + shape=data.shape, + chunk_shape=(32, 32), + dtype=data.dtype, + fill_value=1, + ) + await _AsyncArrayProxy(a)[:16, :16].set(np.zeros((16, 16))) + await _AsyncArrayProxy(a)[:16, :16].set(data) + assert np.array_equal(await _AsyncArrayProxy(a)[:16, :16].get(), data) + assert await store.get(f"{path}/c0/0", prototype=default_buffer_prototype) is None + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +async def test_zarr_compat(store: Store) -> None: + data = np.zeros((16, 18), dtype="uint16") + path = "zarr_compat3" + spath = StorePath(store, path) + a = await AsyncArray.create( + spath, + shape=data.shape, + chunk_shape=(10, 10), + dtype=data.dtype, + chunk_key_encoding=("v2", "."), + fill_value=1, + ) + + z2 = zarr.v2.create( + shape=data.shape, + chunks=(10, 10), + dtype=data.dtype, + compressor=None, + fill_value=1, + ) + + await _AsyncArrayProxy(a)[:16, :18].set(data) + z2[:16, :18] = data + assert np.array_equal(data, await _AsyncArrayProxy(a)[:16, :18].get()) + assert np.array_equal(data, z2[:16, :18]) + + assert_bytes_equal( + z2._store["0.0"], await store.get(f"{path}/0.0", prototype=default_buffer_prototype) + ) + assert_bytes_equal( + z2._store["0.1"], await store.get(f"{path}/0.1", prototype=default_buffer_prototype) + ) + assert_bytes_equal( + z2._store["1.0"], await store.get(f"{path}/1.0", prototype=default_buffer_prototype) + ) + assert_bytes_equal( + z2._store["1.1"], await store.get(f"{path}/1.1", prototype=default_buffer_prototype) + ) + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +async def test_zarr_compat_F(store: Store) -> None: + data = np.zeros((16, 18), dtype="uint16", order="F") + path = "zarr_compatF3" + spath = StorePath(store, path) + a = await AsyncArray.create( + spath, + shape=data.shape, + chunk_shape=(10, 10), + dtype=data.dtype, + chunk_key_encoding=("v2", "."), + fill_value=1, + codecs=[TransposeCodec(order=order_from_dim("F", data.ndim)), BytesCodec()], + ) + + z2 = zarr.v2.create( + shape=data.shape, + chunks=(10, 10), + dtype=data.dtype, + compressor=None, + order="F", + fill_value=1, + ) + + await _AsyncArrayProxy(a)[:16, :18].set(data) + z2[:16, :18] = data + assert np.array_equal(data, await _AsyncArrayProxy(a)[:16, :18].get()) + assert np.array_equal(data, z2[:16, :18]) + + assert_bytes_equal( + z2._store["0.0"], await store.get(f"{path}/0.0", prototype=default_buffer_prototype) + ) + assert_bytes_equal( + z2._store["0.1"], await store.get(f"{path}/0.1", prototype=default_buffer_prototype) + ) + assert_bytes_equal( + z2._store["1.0"], await store.get(f"{path}/1.0", prototype=default_buffer_prototype) + ) + assert_bytes_equal( + z2._store["1.1"], await store.get(f"{path}/1.1", prototype=default_buffer_prototype) + ) + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +async def test_dimension_names(store: Store) -> None: + data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) + path = "dimension_names" + spath = StorePath(store, path) + await AsyncArray.create( + spath, + shape=data.shape, + chunk_shape=(16, 16), + dtype=data.dtype, + fill_value=0, + dimension_names=("x", "y"), + ) + + assert (await AsyncArray.open(spath)).metadata.dimension_names == ( + "x", + "y", + ) + path2 = "dimension_names2" + spath2 = StorePath(store, path2) + await AsyncArray.create( + spath2, + shape=data.shape, + chunk_shape=(16, 16), + dtype=data.dtype, + fill_value=0, + ) + + assert (await AsyncArray.open(spath2)).metadata.dimension_names is None + zarr_json_buffer = await store.get(f"{path2}/zarr.json", prototype=default_buffer_prototype) + assert zarr_json_buffer is not None + assert "dimension_names" not in json.loads(zarr_json_buffer.to_bytes()) + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +def test_invalid_metadata(store: Store) -> None: + spath = StorePath(store, "invalid_metadata") + with pytest.raises(ValueError): + Array.create( + spath, + shape=(16, 16, 16), + chunk_shape=(16, 16), + dtype=np.dtype("uint8"), + fill_value=0, + ) + spath2 = StorePath(store, "invalid_endian") + with pytest.raises(ValueError): + Array.create( + spath2, + shape=(16, 16), + chunk_shape=(16, 16), + dtype=np.dtype("uint8"), + fill_value=0, + codecs=[ + BytesCodec(endian="big"), + TransposeCodec(order=order_from_dim("F", 2)), + ], + ) + spath3 = StorePath(store, "invalid_order") + with pytest.raises(TypeError): + Array.create( + spath3, + shape=(16, 16), + chunk_shape=(16, 16), + dtype=np.dtype("uint8"), + fill_value=0, + codecs=[ + BytesCodec(), + TransposeCodec(order="F"), + ], + ) + spath4 = StorePath(store, "invalid_missing_bytes_codec") + with pytest.raises(ValueError): + Array.create( + spath4, + shape=(16, 16), + chunk_shape=(16, 16), + dtype=np.dtype("uint8"), + fill_value=0, + codecs=[ + TransposeCodec(order=order_from_dim("F", 2)), + ], + ) + spath5 = StorePath(store, "invalid_inner_chunk_shape") + with pytest.raises(ValueError): + Array.create( + spath5, + shape=(16, 16), + chunk_shape=(16, 16), + dtype=np.dtype("uint8"), + fill_value=0, + codecs=[ + ShardingCodec(chunk_shape=(8,)), + ], + ) + spath6 = StorePath(store, "invalid_inner_chunk_shape") + with pytest.raises(ValueError): + Array.create( + spath6, + shape=(16, 16), + chunk_shape=(16, 16), + dtype=np.dtype("uint8"), + fill_value=0, + codecs=[ + ShardingCodec(chunk_shape=(8, 7)), + ], + ) + spath7 = StorePath(store, "warning_inefficient_codecs") + with pytest.warns(UserWarning): + Array.create( + spath7, + shape=(16, 16), + chunk_shape=(16, 16), + dtype=np.dtype("uint8"), + fill_value=0, + codecs=[ + ShardingCodec(chunk_shape=(8, 8)), + GzipCodec(), + ], + ) + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +async def test_resize(store: Store) -> None: + data = np.zeros((16, 18), dtype="uint16") + path = "resize" + spath = StorePath(store, path) + a = await AsyncArray.create( + spath, + shape=data.shape, + chunk_shape=(10, 10), + dtype=data.dtype, + chunk_key_encoding=("v2", "."), + fill_value=1, + ) + + await _AsyncArrayProxy(a)[:16, :18].set(data) + assert await store.get(f"{path}/1.1", prototype=default_buffer_prototype) is not None + assert await store.get(f"{path}/0.0", prototype=default_buffer_prototype) is not None + assert await store.get(f"{path}/0.1", prototype=default_buffer_prototype) is not None + assert await store.get(f"{path}/1.0", prototype=default_buffer_prototype) is not None + + a = await a.resize((10, 12)) + assert a.metadata.shape == (10, 12) + assert await store.get(f"{path}/0.0", prototype=default_buffer_prototype) is not None + assert await store.get(f"{path}/0.1", prototype=default_buffer_prototype) is not None + assert await store.get(f"{path}/1.0", prototype=default_buffer_prototype) is None + assert await store.get(f"{path}/1.1", prototype=default_buffer_prototype) is None diff --git a/tests/v3/test_codecs/test_endian.py b/tests/v3/test_codecs/test_endian.py new file mode 100644 index 0000000000..a34fd5c4f4 --- /dev/null +++ b/tests/v3/test_codecs/test_endian.py @@ -0,0 +1,87 @@ +from typing import Literal + +import numpy as np +import pytest + +import zarr.v2 +from zarr.abc.store import Store +from zarr.array import AsyncArray +from zarr.buffer import default_buffer_prototype +from zarr.codecs import BytesCodec +from zarr.store.core import StorePath +from zarr.testing.utils import assert_bytes_equal + +from .test_codecs import _AsyncArrayProxy + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("endian", ["big", "little"]) +async def test_endian(store: Store, endian: Literal["big", "little"]) -> None: + data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) + path = "endian" + spath = StorePath(store, path) + a = await AsyncArray.create( + spath, + shape=data.shape, + chunk_shape=(16, 16), + dtype=data.dtype, + fill_value=0, + chunk_key_encoding=("v2", "."), + codecs=[BytesCodec(endian=endian)], + ) + + await _AsyncArrayProxy(a)[:, :].set(data) + readback_data = await _AsyncArrayProxy(a)[:, :].get() + assert np.array_equal(data, readback_data) + + # Compare with zarr-python + z = zarr.v2.create( + shape=data.shape, + chunks=(16, 16), + dtype=">u2" if endian == "big" else "u2", "u2", " None: + data = np.arange(0, 256, dtype=dtype_input_endian).reshape((16, 16)) + path = "endian" + spath = StorePath(store, path) + a = await AsyncArray.create( + spath, + shape=data.shape, + chunk_shape=(16, 16), + dtype="uint16", + fill_value=0, + chunk_key_encoding=("v2", "."), + codecs=[BytesCodec(endian=dtype_store_endian)], + ) + + await _AsyncArrayProxy(a)[:, :].set(data) + readback_data = await _AsyncArrayProxy(a)[:, :].get() + assert np.array_equal(data, readback_data) + + # Compare with zarr-python + z = zarr.v2.create( + shape=data.shape, + chunks=(16, 16), + dtype=">u2" if dtype_store_endian == "big" else " None: + data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) + + a = Array.create( + StorePath(store), + shape=data.shape, + chunk_shape=(16, 16), + dtype=data.dtype, + fill_value=0, + codecs=[BytesCodec(), GzipCodec()], + ) + + a[:, :] = data + assert np.array_equal(data, a[:, :]) diff --git a/tests/v3/test_codecs/test_sharding.py b/tests/v3/test_codecs/test_sharding.py new file mode 100644 index 0000000000..98dbb4917d --- /dev/null +++ b/tests/v3/test_codecs/test_sharding.py @@ -0,0 +1,316 @@ +import numpy as np +import pytest + +from zarr.abc.store import Store +from zarr.array import Array, AsyncArray +from zarr.buffer import default_buffer_prototype +from zarr.codecs import ( + BloscCodec, + BytesCodec, + ShardingCodec, + ShardingCodecIndexLocation, + TransposeCodec, +) +from zarr.store.core import StorePath + +from .test_codecs import ArrayRequest, _AsyncArrayProxy, order_from_dim + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("index_location", ["start", "end"]) +@pytest.mark.parametrize( + "array_fixture", + [ + ArrayRequest(shape=(128,) * 1, dtype="uint8", order="C"), + ArrayRequest(shape=(128,) * 2, dtype="uint8", order="C"), + ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), + ], + indirect=["array_fixture"], +) +@pytest.mark.parametrize("offset", [0, 10]) +def test_sharding( + store: Store, array_fixture: np.ndarray, index_location: ShardingCodecIndexLocation, offset: int +) -> None: + """ + Test that we can create an array with a sharding codec, write data to that array, and get + the same data out via indexing. + """ + data = array_fixture + spath = StorePath(store) + arr = Array.create( + spath, + shape=tuple(s + offset for s in data.shape), + chunk_shape=(64,) * data.ndim, + dtype=data.dtype, + fill_value=6, + codecs=[ + ShardingCodec( + chunk_shape=(32,) * data.ndim, + codecs=[ + TransposeCodec(order=order_from_dim("F", data.ndim)), + BytesCodec(), + BloscCodec(cname="lz4"), + ], + index_location=index_location, + ) + ], + ) + write_region = tuple(slice(offset, None) for dim in range(data.ndim)) + arr[write_region] = data + + if offset > 0: + empty_region = tuple(slice(0, offset) for dim in range(data.ndim)) + assert np.all(arr[empty_region] == arr.metadata.fill_value) + + read_data = arr[write_region] + assert data.shape == read_data.shape + assert np.array_equal(data, read_data) + + +@pytest.mark.parametrize("index_location", ["start", "end"]) +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize( + "array_fixture", + [ + ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), + ], + indirect=["array_fixture"], +) +def test_sharding_partial( + store: Store, array_fixture: np.ndarray, index_location: ShardingCodecIndexLocation +) -> None: + data = array_fixture + spath = StorePath(store) + a = Array.create( + spath, + shape=tuple(a + 10 for a in data.shape), + chunk_shape=(64, 64, 64), + dtype=data.dtype, + fill_value=0, + codecs=[ + ShardingCodec( + chunk_shape=(32, 32, 32), + codecs=[ + TransposeCodec(order=order_from_dim("F", data.ndim)), + BytesCodec(), + BloscCodec(cname="lz4"), + ], + index_location=index_location, + ) + ], + ) + + a[10:, 10:, 10:] = data + + read_data = a[0:10, 0:10, 0:10] + assert np.all(read_data == 0) + + read_data = a[10:, 10:, 10:] + assert data.shape == read_data.shape + assert np.array_equal(data, read_data) + + +@pytest.mark.parametrize( + "array_fixture", + [ + ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), + ], + indirect=["array_fixture"], +) +@pytest.mark.parametrize("index_location", ["start", "end"]) +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +def test_sharding_partial_read( + store: Store, array_fixture: np.ndarray, index_location: ShardingCodecIndexLocation +) -> None: + data = array_fixture + spath = StorePath(store) + a = Array.create( + spath, + shape=tuple(a + 10 for a in data.shape), + chunk_shape=(64, 64, 64), + dtype=data.dtype, + fill_value=1, + codecs=[ + ShardingCodec( + chunk_shape=(32, 32, 32), + codecs=[ + TransposeCodec(order=order_from_dim("F", data.ndim)), + BytesCodec(), + BloscCodec(cname="lz4"), + ], + index_location=index_location, + ) + ], + ) + + read_data = a[0:10, 0:10, 0:10] + assert np.all(read_data == 1) + + +@pytest.mark.parametrize( + "array_fixture", + [ + ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), + ], + indirect=["array_fixture"], +) +@pytest.mark.parametrize("index_location", ["start", "end"]) +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +def test_sharding_partial_overwrite( + store: Store, array_fixture: np.ndarray, index_location: ShardingCodecIndexLocation +) -> None: + data = array_fixture[:10, :10, :10] + spath = StorePath(store) + a = Array.create( + spath, + shape=tuple(a + 10 for a in data.shape), + chunk_shape=(64, 64, 64), + dtype=data.dtype, + fill_value=1, + codecs=[ + ShardingCodec( + chunk_shape=(32, 32, 32), + codecs=[ + TransposeCodec(order=order_from_dim("F", data.ndim)), + BytesCodec(), + BloscCodec(cname="lz4"), + ], + index_location=index_location, + ) + ], + ) + + a[:10, :10, :10] = data + + read_data = a[0:10, 0:10, 0:10] + assert np.array_equal(data, read_data) + + data = data + 10 + a[:10, :10, :10] = data + read_data = a[0:10, 0:10, 0:10] + assert np.array_equal(data, read_data) + + +@pytest.mark.parametrize( + "array_fixture", + [ + ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), + ], + indirect=["array_fixture"], +) +@pytest.mark.parametrize( + "outer_index_location", + ["start", "end"], +) +@pytest.mark.parametrize( + "inner_index_location", + ["start", "end"], +) +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +def test_nested_sharding( + store: Store, + array_fixture: np.ndarray, + outer_index_location: ShardingCodecIndexLocation, + inner_index_location: ShardingCodecIndexLocation, +) -> None: + data = array_fixture + spath = StorePath(store) + a = Array.create( + spath, + shape=data.shape, + chunk_shape=(64, 64, 64), + dtype=data.dtype, + fill_value=0, + codecs=[ + ShardingCodec( + chunk_shape=(32, 32, 32), + codecs=[ + ShardingCodec(chunk_shape=(16, 16, 16), index_location=inner_index_location) + ], + index_location=outer_index_location, + ) + ], + ) + + a[:, :, :] = data + + read_data = a[0 : data.shape[0], 0 : data.shape[1], 0 : data.shape[2]] + assert data.shape == read_data.shape + assert np.array_equal(data, read_data) + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +def test_open_sharding(store: Store) -> None: + path = "open_sharding" + spath = StorePath(store, path) + a = Array.create( + spath, + shape=(16, 16), + chunk_shape=(16, 16), + dtype="int32", + fill_value=0, + codecs=[ + ShardingCodec( + chunk_shape=(8, 8), + codecs=[ + TransposeCodec(order=order_from_dim("F", 2)), + BytesCodec(), + BloscCodec(), + ], + ) + ], + ) + b = Array.open(spath) + assert a.metadata == b.metadata + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +def test_write_partial_sharded_chunks(store: Store) -> None: + data = np.arange(0, 16 * 16, dtype="uint16").reshape((16, 16)) + spath = StorePath(store) + a = Array.create( + spath, + shape=(40, 40), + chunk_shape=(20, 20), + dtype=data.dtype, + fill_value=1, + codecs=[ + ShardingCodec( + chunk_shape=(10, 10), + codecs=[ + BytesCodec(), + BloscCodec(), + ], + ) + ], + ) + a[0:16, 0:16] = data + assert np.array_equal(a[0:16, 0:16], data) + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +async def test_delete_empty_shards(store: Store) -> None: + path = "delete_empty_shards" + spath = StorePath(store, path) + a = await AsyncArray.create( + spath, + shape=(16, 16), + chunk_shape=(8, 16), + dtype="uint16", + fill_value=1, + codecs=[ShardingCodec(chunk_shape=(8, 8))], + ) + await _AsyncArrayProxy(a)[:, :].set(np.zeros((16, 16))) + await _AsyncArrayProxy(a)[8:, :].set(np.ones((8, 16))) + await _AsyncArrayProxy(a)[:, 8:].set(np.ones((16, 8))) + # chunk (0, 0) is full + # chunks (0, 1), (1, 0), (1, 1) are empty + # shard (0, 0) is half-full + # shard (1, 0) is empty + + data = np.ones((16, 16), dtype="uint16") + data[:8, :8] = 0 + assert np.array_equal(data, await _AsyncArrayProxy(a)[:, :].get()) + assert await store.get(f"{path}/c/1/0", prototype=default_buffer_prototype) is None + chunk_bytes = await store.get(f"{path}/c/0/0", prototype=default_buffer_prototype) + assert chunk_bytes is not None and len(chunk_bytes) == 16 * 2 + 8 * 8 * 2 + 4 diff --git a/tests/v3/test_codecs/test_transpose.py b/tests/v3/test_codecs/test_transpose.py new file mode 100644 index 0000000000..3fd4350299 --- /dev/null +++ b/tests/v3/test_codecs/test_transpose.py @@ -0,0 +1,121 @@ +import numpy as np +import pytest + +import zarr.v2 +from zarr.abc.codec import Codec +from zarr.abc.store import Store +from zarr.array import Array, AsyncArray +from zarr.buffer import default_buffer_prototype +from zarr.codecs import BytesCodec, ShardingCodec, TransposeCodec +from zarr.common import MemoryOrder +from zarr.config import config +from zarr.store.core import StorePath + +from .test_codecs import _AsyncArrayProxy + + +@pytest.mark.parametrize("input_order", ["F", "C"]) +@pytest.mark.parametrize("runtime_write_order", ["F", "C"]) +@pytest.mark.parametrize("runtime_read_order", ["F", "C"]) +@pytest.mark.parametrize("with_sharding", [True, False]) +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +async def test_transpose( + store: Store, + input_order: MemoryOrder, + runtime_write_order: MemoryOrder, + runtime_read_order: MemoryOrder, + with_sharding: bool, +) -> None: + data = np.arange(0, 256, dtype="uint16").reshape((1, 32, 8), order=input_order) + spath = StorePath(store, path="transpose") + codecs_: list[Codec] = ( + [ + ShardingCodec( + chunk_shape=(1, 16, 8), + codecs=[TransposeCodec(order=(2, 1, 0)), BytesCodec()], + ) + ] + if with_sharding + else [TransposeCodec(order=(2, 1, 0)), BytesCodec()] + ) + with config.set({"array.order": runtime_write_order}): + a = await AsyncArray.create( + spath, + shape=data.shape, + chunk_shape=(1, 32, 8), + dtype=data.dtype, + fill_value=0, + chunk_key_encoding=("v2", "."), + codecs=codecs_, + ) + + await _AsyncArrayProxy(a)[:, :].set(data) + read_data = await _AsyncArrayProxy(a)[:, :].get() + assert np.array_equal(data, read_data) + + with config.set({"array.order": runtime_read_order}): + a = await AsyncArray.open( + spath, + ) + read_data = await _AsyncArrayProxy(a)[:, :].get() + assert np.array_equal(data, read_data) + + if runtime_read_order == "F": + assert read_data.flags["F_CONTIGUOUS"] + assert not read_data.flags["C_CONTIGUOUS"] + else: + assert not read_data.flags["F_CONTIGUOUS"] + assert read_data.flags["C_CONTIGUOUS"] + + if not with_sharding: + # Compare with zarr-python + z = zarr.v2.create( + shape=data.shape, + chunks=(1, 32, 8), + dtype=" None: + shape = [i + 3 for i in range(len(order))] + data = np.arange(0, np.prod(shape), dtype="uint16").reshape(shape) + spath = StorePath(store, "transpose_non_self_inverse") + a = Array.create( + spath, + shape=data.shape, + chunk_shape=data.shape, + dtype=data.dtype, + fill_value=0, + codecs=[TransposeCodec(order=order), BytesCodec()], + ) + a[:, :] = data + read_data = a[:, :] + assert np.array_equal(data, read_data) + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +def test_transpose_invalid( + store: Store, +) -> None: + data = np.arange(0, 256, dtype="uint16").reshape((1, 32, 8)) + spath = StorePath(store, "transpose_invalid") + for order in [(1, 0), (3, 2, 1), (3, 3, 1)]: + with pytest.raises(ValueError): + Array.create( + spath, + shape=data.shape, + chunk_shape=(1, 32, 8), + dtype=data.dtype, + fill_value=0, + chunk_key_encoding=("v2", "."), + codecs=[TransposeCodec(order=order), BytesCodec()], + ) diff --git a/tests/v3/test_codecs/test_zstd.py b/tests/v3/test_codecs/test_zstd.py new file mode 100644 index 0000000000..1e1b1e02c9 --- /dev/null +++ b/tests/v3/test_codecs/test_zstd.py @@ -0,0 +1,25 @@ +import numpy as np +import pytest + +from zarr.abc.store import Store +from zarr.array import Array +from zarr.codecs import BytesCodec, ZstdCodec +from zarr.store.core import StorePath + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("checksum", [True, False]) +def test_zstd(store: Store, checksum: bool) -> None: + data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) + + a = Array.create( + StorePath(store, path="zstd"), + shape=data.shape, + chunk_shape=(16, 16), + dtype=data.dtype, + fill_value=0, + codecs=[BytesCodec(), ZstdCodec(level=0, checksum=checksum)], + ) + + a[:, :] = data + assert np.array_equal(data, a[:, :]) From 0d40ce56ba6280d352677f2c3fa9ab6d1b3df162 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 3 Jul 2024 23:31:31 +0200 Subject: [PATCH 6/9] add test for pickling sharding codec, and make it pass --- src/zarr/codecs/sharding.py | 16 ++++++++++++++++ tests/v3/test_codecs/test_sharding.py | 7 +++++++ 2 files changed, 23 insertions(+) diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py index def95b206d..e3ef664b94 100644 --- a/src/zarr/codecs/sharding.py +++ b/src/zarr/codecs/sharding.py @@ -324,6 +324,22 @@ def __init__( object.__setattr__(self, "_get_index_chunk_spec", lru_cache()(self._get_index_chunk_spec)) object.__setattr__(self, "_get_chunks_per_shard", lru_cache()(self._get_chunks_per_shard)) + # todo: typedict return type + def __getstate__(self) -> dict[str, Any]: + return self.to_dict() + + def __setstate__(self, state: dict[str, Any]) -> None: + config = state["configuration"] + object.__setattr__(self, "chunk_shape", parse_shapelike(config["chunk_shape"])) + object.__setattr__(self, "codecs", parse_codecs(config["codecs"])) + object.__setattr__(self, "index_codecs", parse_codecs(config["index_codecs"])) + object.__setattr__(self, "index_location", parse_index_location(config["index_location"])) + + # Use instance-local lru_cache to avoid memory leaks + object.__setattr__(self, "_get_chunk_spec", lru_cache()(self._get_chunk_spec)) + object.__setattr__(self, "_get_index_chunk_spec", lru_cache()(self._get_index_chunk_spec)) + object.__setattr__(self, "_get_chunks_per_shard", lru_cache()(self._get_chunks_per_shard)) + @classmethod def from_dict(cls, data: dict[str, JSON]) -> Self: _, configuration_parsed = parse_named_configuration(data, "sharding_indexed") diff --git a/tests/v3/test_codecs/test_sharding.py b/tests/v3/test_codecs/test_sharding.py index 98dbb4917d..855e9f8561 100644 --- a/tests/v3/test_codecs/test_sharding.py +++ b/tests/v3/test_codecs/test_sharding.py @@ -1,3 +1,5 @@ +import pickle + import numpy as np import pytest @@ -314,3 +316,8 @@ async def test_delete_empty_shards(store: Store) -> None: assert await store.get(f"{path}/c/1/0", prototype=default_buffer_prototype) is None chunk_bytes = await store.get(f"{path}/c/0/0", prototype=default_buffer_prototype) assert chunk_bytes is not None and len(chunk_bytes) == 16 * 2 + 8 * 8 * 2 + 4 + + +def test_pickle() -> None: + codec = ShardingCodec(chunk_shape=(8, 8)) + assert pickle.loads(pickle.dumps(codec)) == codec From 8b6e8ee97ddc9f99c37df92568e852ae27bdd2df Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 3 Jul 2024 23:43:19 +0200 Subject: [PATCH 7/9] Revert "use tmpdir for test" This reverts commit 6ad2ca61b78fd1ecfc10d7fc80ae5055ed1a9d8b. --- tests/v2/test_storage.py | 61 ++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/tests/v2/test_storage.py b/tests/v2/test_storage.py index 0f010bb443..88e99e91a1 100644 --- a/tests/v2/test_storage.py +++ b/tests/v2/test_storage.py @@ -2227,38 +2227,38 @@ def test_format_compatibility(): np.random.seed(42) arrays_chunks = [ - (np.arange(1111, dtype=" Date: Wed, 3 Jul 2024 23:58:54 +0200 Subject: [PATCH 8/9] move fixtures into conftest.py --- tests/v3/conftest.py | 20 +++++++++++++++++++- tests/v3/test_codecs/test_codecs.py | 19 +------------------ tests/v3/test_codecs/test_sharding.py | 3 ++- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py index 79ce1d4b67..8b75d9f2f8 100644 --- a/tests/v3/conftest.py +++ b/tests/v3/conftest.py @@ -7,7 +7,7 @@ from _pytest.compat import LEGACY_PATH from zarr.abc.store import Store -from zarr.common import ZarrFormat +from zarr.common import ChunkCoords, MemoryOrder, ZarrFormat from zarr.group import AsyncGroup if TYPE_CHECKING: @@ -15,6 +15,7 @@ import pathlib from dataclasses import dataclass, field +import numpy as np import pytest from zarr.store import LocalStore, MemoryStore, StorePath @@ -93,3 +94,20 @@ def xp(request: pytest.FixtureRequest) -> Iterator[ModuleType]: """Fixture to parametrize over numpy-like libraries""" yield pytest.importorskip(request.param) + + +@dataclass +class ArrayRequest: + shape: ChunkCoords + dtype: str + order: MemoryOrder + + +@pytest.fixture +def array_fixture(request: pytest.FixtureRequest) -> np.ndarray: + array_request: ArrayRequest = request.param + return ( + np.arange(np.prod(array_request.shape)) + .reshape(array_request.shape, order=array_request.order) + .astype(array_request.dtype) + ) diff --git a/tests/v3/test_codecs/test_codecs.py b/tests/v3/test_codecs/test_codecs.py index 297848121f..1104805d4b 100644 --- a/tests/v3/test_codecs/test_codecs.py +++ b/tests/v3/test_codecs/test_codecs.py @@ -17,20 +17,13 @@ ShardingCodec, TransposeCodec, ) -from zarr.common import ChunkCoords, MemoryOrder +from zarr.common import MemoryOrder from zarr.config import config from zarr.indexing import Selection, morton_order_iter from zarr.store import StorePath from zarr.testing.utils import assert_bytes_equal -@dataclass -class ArrayRequest: - shape: ChunkCoords - dtype: str - order: MemoryOrder - - @dataclass(frozen=True) class _AsyncArrayProxy: array: AsyncArray @@ -51,16 +44,6 @@ async def set(self, value: np.ndarray) -> None: return await self.array.setitem(self.selection, value) -@pytest.fixture -def array_fixture(request: pytest.FixtureRequest) -> np.ndarray: - array_request: ArrayRequest = request.param - return ( - np.arange(np.prod(array_request.shape)) - .reshape(array_request.shape, order=array_request.order) - .astype(array_request.dtype) - ) - - def order_from_dim(order: MemoryOrder, ndim: int) -> tuple[int, ...]: if order == "F": return tuple(ndim - x - 1 for x in range(ndim)) diff --git a/tests/v3/test_codecs/test_sharding.py b/tests/v3/test_codecs/test_sharding.py index 855e9f8561..f0031349cb 100644 --- a/tests/v3/test_codecs/test_sharding.py +++ b/tests/v3/test_codecs/test_sharding.py @@ -15,7 +15,8 @@ ) from zarr.store.core import StorePath -from .test_codecs import ArrayRequest, _AsyncArrayProxy, order_from_dim +from ..conftest import ArrayRequest +from .test_codecs import _AsyncArrayProxy, order_from_dim @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) From dad3c5fbdd1aa5b795baf6dec5e10496494b8369 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 5 Jul 2024 10:19:28 +0200 Subject: [PATCH 9/9] Update tests/v3/test_codecs/test_endian.py --- tests/v3/test_codecs/test_endian.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/v3/test_codecs/test_endian.py b/tests/v3/test_codecs/test_endian.py index a34fd5c4f4..8301a424b9 100644 --- a/tests/v3/test_codecs/test_endian.py +++ b/tests/v3/test_codecs/test_endian.py @@ -34,7 +34,7 @@ async def test_endian(store: Store, endian: Literal["big", "little"]) -> None: readback_data = await _AsyncArrayProxy(a)[:, :].get() assert np.array_equal(data, readback_data) - # Compare with zarr-python + # Compare with v2 z = zarr.v2.create( shape=data.shape, chunks=(16, 16),