From e3ad2203a87462e86325f99edf8be0ec6aadeaa4 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 13 Feb 2025 07:49:19 -0700 Subject: [PATCH 1/2] Add shards to array strategy --- changes/2822.feature.rst | 1 + src/zarr/testing/strategies.py | 53 ++++++++++++++++++++++++---------- 2 files changed, 38 insertions(+), 16 deletions(-) create mode 100644 changes/2822.feature.rst diff --git a/changes/2822.feature.rst b/changes/2822.feature.rst new file mode 100644 index 0000000000..37b3bf1faf --- /dev/null +++ b/changes/2822.feature.rst @@ -0,0 +1 @@ +Add arbitrary `shards` to Hypothesis strategy for generating arrays. diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index 0883d79bf0..fc37cc47eb 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -110,6 +110,32 @@ def numpy_arrays( return draw(npst.arrays(dtype=dtype, shape=shapes)) +@st.composite # type: ignore[misc] +def chunk_shapes(draw: st.DrawFn, *, shape: tuple[int, ...]) -> tuple[int, ...]: + # We want this strategy to shrink towards arrays with smaller number of chunks + # 1. st.integers() shrinks towards smaller values. So we use that to generate number of chunks + numchunks = draw( + st.tuples(*[st.integers(min_value=0 if size == 0 else 1, max_value=size) for size in shape]) + ) + # 2. and now generate the chunks tuple + return tuple( + size // nchunks if nchunks > 0 else 0 + for size, nchunks in zip(shape, numchunks, strict=True) + ) + + +@st.composite # type: ignore[misc] +def shard_shapes( + draw: st.DrawFn, *, shape: tuple[int, ...], chunk_shape: tuple[int, ...] +) -> tuple[int, ...]: + # We want this strategy to shrink towards arrays with smaller number of shards + # shards must be an integral number of chunks + assert all(c != 0 for c in chunk_shape) + numchunks = tuple(s // c for s, c in zip(shape, chunk_shape, strict=True)) + multiples = tuple(draw(st.integers(min_value=1, max_value=nc)) for nc in numchunks) + return tuple(m * c for m, c in zip(multiples, chunk_shape, strict=True)) + + @st.composite # type: ignore[misc] def np_array_and_chunks( draw: st.DrawFn, *, arrays: st.SearchStrategy[np.ndarray] = numpy_arrays @@ -119,19 +145,7 @@ def np_array_and_chunks( Returns: a tuple of the array and a suitable random chunking for it. """ array = draw(arrays) - # We want this strategy to shrink towards arrays with smaller number of chunks - # 1. st.integers() shrinks towards smaller values. So we use that to generate number of chunks - numchunks = draw( - st.tuples( - *[st.integers(min_value=0 if size == 0 else 1, max_value=size) for size in array.shape] - ) - ) - # 2. and now generate the chunks tuple - chunks = tuple( - size // nchunks if nchunks > 0 else 0 - for size, nchunks in zip(array.shape, numchunks, strict=True) - ) - return (array, chunks) + return (array, draw(chunk_shapes(shape=array.shape))) @st.composite # type: ignore[misc] @@ -154,7 +168,12 @@ def arrays( zarr_format = draw(zarr_formats) if arrays is None: arrays = numpy_arrays(shapes=shapes, zarr_formats=st.just(zarr_format)) - nparray, chunks = draw(np_array_and_chunks(arrays=arrays)) + nparray = draw(arrays) + chunk_shape = draw(chunk_shapes(shape=nparray.shape)) + if zarr_format == 3 and all(c > 0 for c in chunk_shape): + shard_shape = draw(st.none() | shard_shapes(shape=nparray.shape, chunk_shape=chunk_shape)) + else: + shard_shape = None # test that None works too. fill_value = draw(st.one_of([st.none(), npst.from_dtype(nparray.dtype)])) # compressor = draw(compressors) @@ -167,7 +186,8 @@ def arrays( a = root.create_array( array_path, shape=nparray.shape, - chunks=chunks, + chunks=chunk_shape, + shards=shard_shape, dtype=nparray.dtype, attributes=attributes, # compressor=compressor, # FIXME @@ -180,7 +200,8 @@ def arrays( assert a.name is not None assert isinstance(root[array_path], Array) assert nparray.shape == a.shape - assert chunks == a.chunks + assert chunk_shape == a.chunks + assert shard_shape == a.shards assert array_path == a.path, (path, name, array_path, a.name, a.path) assert a.basename == name, (a.basename, name) assert dict(a.attrs) == expected_attrs From 022a7955f30c81e80e07be0dab654417f1aa3ba0 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 13 Feb 2025 09:32:24 -0700 Subject: [PATCH 2/2] Prioritize v3 over v2 in property tests --- src/zarr/testing/strategies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index fc37cc47eb..42e03ec614 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -87,7 +87,7 @@ def safe_unicode_for_dtype(dtype: np.dtype[np.str_]) -> st.SearchStrategy[str]: # So we map a clear to reset the store. stores = st.builds(MemoryStore, st.just({})).map(lambda x: sync(x.clear())) compressors = st.sampled_from([None, "default"]) -zarr_formats: st.SearchStrategy[ZarrFormat] = st.sampled_from([2, 3]) +zarr_formats: st.SearchStrategy[ZarrFormat] = st.sampled_from([3, 2]) array_shapes = npst.array_shapes(max_dims=4, min_side=0)