From 4263325a6ccf6ac9a4c9fa1cd9101017379d1894 Mon Sep 17 00:00:00 2001
From: Deepak Cherian <deepak@cherian.net>
Date: Fri, 21 Feb 2025 14:30:45 -0700
Subject: [PATCH 1/5] Partially Revert "Always skip reads when completely
 overwriting chunks (#2784)"

This reverts commit feeb08f4e49f6574d712fe5ceb42ce80ab6ceb3f.
---
 src/zarr/abc/codec.py           |  4 +--
 src/zarr/codecs/sharding.py     | 14 +++-----
 src/zarr/core/array.py          |  6 ++--
 src/zarr/core/codec_pipeline.py | 60 ++++++++++++++------------------
 src/zarr/core/indexing.py       | 61 +++++++++++++++++++++------------
 src/zarr/storage/_logging.py    |  2 +-
 tests/test_array.py             | 11 +-----
 tests/test_indexing.py          |  6 ++++
 8 files changed, 82 insertions(+), 82 deletions(-)

diff --git a/src/zarr/abc/codec.py b/src/zarr/abc/codec.py
index 16400f5f4b..fabd042dbe 100644
--- a/src/zarr/abc/codec.py
+++ b/src/zarr/abc/codec.py
@@ -357,7 +357,7 @@ async def encode(
     @abstractmethod
     async def read(
         self,
-        batch_info: Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool]],
+        batch_info: Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple]],
         out: NDBuffer,
         drop_axes: tuple[int, ...] = (),
     ) -> None:
@@ -379,7 +379,7 @@ async def read(
     @abstractmethod
     async def write(
         self,
-        batch_info: Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]],
+        batch_info: Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]],
         value: NDBuffer,
         drop_axes: tuple[int, ...] = (),
     ) -> None:
diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index 42b1313fac..c287159216 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -455,9 +455,8 @@ async def _decode_single(
                     chunk_spec,
                     chunk_selection,
                     out_selection,
-                    is_complete_shard,
                 )
-                for chunk_coords, chunk_selection, out_selection, is_complete_shard in indexer
+                for chunk_coords, chunk_selection, out_selection in indexer
             ],
             out,
         )
@@ -487,7 +486,7 @@ async def _decode_partial_single(
         )
 
         indexed_chunks = list(indexer)
-        all_chunk_coords = {chunk_coords for chunk_coords, *_ in indexed_chunks}
+        all_chunk_coords = {chunk_coords for chunk_coords, _, _ in indexed_chunks}
 
         # reading bytes of all requested chunks
         shard_dict: ShardMapping = {}
@@ -525,9 +524,8 @@ async def _decode_partial_single(
                     chunk_spec,
                     chunk_selection,
                     out_selection,
-                    is_complete_shard,
                 )
-                for chunk_coords, chunk_selection, out_selection, is_complete_shard in indexer
+                for chunk_coords, chunk_selection, out_selection in indexer
             ],
             out,
         )
@@ -564,9 +562,8 @@ async def _encode_single(
                     chunk_spec,
                     chunk_selection,
                     out_selection,
-                    is_complete_shard,
                 )
-                for chunk_coords, chunk_selection, out_selection, is_complete_shard in indexer
+                for chunk_coords, chunk_selection, out_selection in indexer
             ],
             shard_array,
         )
@@ -608,9 +605,8 @@ async def _encode_partial_single(
                     chunk_spec,
                     chunk_selection,
                     out_selection,
-                    is_complete_shard,
                 )
-                for chunk_coords, chunk_selection, out_selection, is_complete_shard in indexer
+                for chunk_coords, chunk_selection, out_selection in indexer
             ],
             shard_array,
         )
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 9c2f8a7260..ba163c1529 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -1291,9 +1291,8 @@ async def _get_selection(
                         self.metadata.get_chunk_spec(chunk_coords, _config, prototype=prototype),
                         chunk_selection,
                         out_selection,
-                        is_complete_chunk,
                     )
-                    for chunk_coords, chunk_selection, out_selection, is_complete_chunk in indexer
+                    for chunk_coords, chunk_selection, out_selection in indexer
                 ],
                 out_buffer,
                 drop_axes=indexer.drop_axes,
@@ -1419,9 +1418,8 @@ async def _set_selection(
                     self.metadata.get_chunk_spec(chunk_coords, _config, prototype),
                     chunk_selection,
                     out_selection,
-                    is_complete_chunk,
                 )
-                for chunk_coords, chunk_selection, out_selection, is_complete_chunk in indexer
+                for chunk_coords, chunk_selection, out_selection in indexer
             ],
             value_buffer,
             drop_axes=indexer.drop_axes,
diff --git a/src/zarr/core/codec_pipeline.py b/src/zarr/core/codec_pipeline.py
index 0c53cda96c..a35c5ca210 100644
--- a/src/zarr/core/codec_pipeline.py
+++ b/src/zarr/core/codec_pipeline.py
@@ -16,7 +16,7 @@
 )
 from zarr.core.common import ChunkCoords, concurrent_map
 from zarr.core.config import config
-from zarr.core.indexing import SelectorTuple, is_scalar
+from zarr.core.indexing import SelectorTuple, is_scalar, is_total_slice
 from zarr.core.metadata.v2 import _default_fill_value
 from zarr.registry import register_pipeline
 
@@ -243,7 +243,7 @@ async def encode_partial_batch(
 
     async def read_batch(
         self,
-        batch_info: Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool]],
+        batch_info: Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple]],
         out: NDBuffer,
         drop_axes: tuple[int, ...] = (),
     ) -> None:
@@ -251,10 +251,10 @@ async def read_batch(
             chunk_array_batch = await self.decode_partial_batch(
                 [
                     (byte_getter, chunk_selection, chunk_spec)
-                    for byte_getter, chunk_spec, chunk_selection, *_ in batch_info
+                    for byte_getter, chunk_spec, chunk_selection, _ in batch_info
                 ]
             )
-            for chunk_array, (_, chunk_spec, _, out_selection, _) in zip(
+            for chunk_array, (_, chunk_spec, _, out_selection) in zip(
                 chunk_array_batch, batch_info, strict=False
             ):
                 if chunk_array is not None:
@@ -263,19 +263,22 @@ async def read_batch(
                     out[out_selection] = fill_value_or_default(chunk_spec)
         else:
             chunk_bytes_batch = await concurrent_map(
-                [(byte_getter, array_spec.prototype) for byte_getter, array_spec, *_ in batch_info],
+                [
+                    (byte_getter, array_spec.prototype)
+                    for byte_getter, array_spec, _, _ in batch_info
+                ],
                 lambda byte_getter, prototype: byte_getter.get(prototype),
                 config.get("async.concurrency"),
             )
             chunk_array_batch = await self.decode_batch(
                 [
                     (chunk_bytes, chunk_spec)
-                    for chunk_bytes, (_, chunk_spec, *_) in zip(
+                    for chunk_bytes, (_, chunk_spec, _, _) in zip(
                         chunk_bytes_batch, batch_info, strict=False
                     )
                 ],
             )
-            for chunk_array, (_, chunk_spec, chunk_selection, out_selection, _) in zip(
+            for chunk_array, (_, chunk_spec, chunk_selection, out_selection) in zip(
                 chunk_array_batch, batch_info, strict=False
             ):
                 if chunk_array is not None:
@@ -293,10 +296,9 @@ def _merge_chunk_array(
         out_selection: SelectorTuple,
         chunk_spec: ArraySpec,
         chunk_selection: SelectorTuple,
-        is_complete_chunk: bool,
         drop_axes: tuple[int, ...],
     ) -> NDBuffer:
-        if is_complete_chunk and value.shape == chunk_spec.shape:
+        if is_total_slice(chunk_selection, chunk_spec.shape) and value.shape == chunk_spec.shape:
             return value
         if existing_chunk_array is None:
             chunk_array = chunk_spec.prototype.nd_buffer.create(
@@ -325,7 +327,7 @@ def _merge_chunk_array(
 
     async def write_batch(
         self,
-        batch_info: Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]],
+        batch_info: Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]],
         value: NDBuffer,
         drop_axes: tuple[int, ...] = (),
     ) -> None:
@@ -335,14 +337,14 @@ async def write_batch(
                 await self.encode_partial_batch(
                     [
                         (byte_setter, value, chunk_selection, chunk_spec)
-                        for byte_setter, chunk_spec, chunk_selection, out_selection, _ in batch_info
+                        for byte_setter, chunk_spec, chunk_selection, out_selection in batch_info
                     ],
                 )
             else:
                 await self.encode_partial_batch(
                     [
                         (byte_setter, value[out_selection], chunk_selection, chunk_spec)
-                        for byte_setter, chunk_spec, chunk_selection, out_selection, _ in batch_info
+                        for byte_setter, chunk_spec, chunk_selection, out_selection in batch_info
                     ],
                 )
 
@@ -359,10 +361,10 @@ async def _read_key(
             chunk_bytes_batch = await concurrent_map(
                 [
                     (
-                        None if is_complete_chunk else byte_setter,
+                        None if is_total_slice(chunk_selection, chunk_spec.shape) else byte_setter,
                         chunk_spec.prototype,
                     )
-                    for byte_setter, chunk_spec, chunk_selection, _, is_complete_chunk in batch_info
+                    for byte_setter, chunk_spec, chunk_selection, _ in batch_info
                 ],
                 _read_key,
                 config.get("async.concurrency"),
@@ -370,7 +372,7 @@ async def _read_key(
             chunk_array_decoded = await self.decode_batch(
                 [
                     (chunk_bytes, chunk_spec)
-                    for chunk_bytes, (_, chunk_spec, *_) in zip(
+                    for chunk_bytes, (_, chunk_spec, _, _) in zip(
                         chunk_bytes_batch, batch_info, strict=False
                     )
                 ],
@@ -378,24 +380,14 @@ async def _read_key(
 
             chunk_array_merged = [
                 self._merge_chunk_array(
-                    chunk_array,
-                    value,
-                    out_selection,
-                    chunk_spec,
-                    chunk_selection,
-                    is_complete_chunk,
-                    drop_axes,
+                    chunk_array, value, out_selection, chunk_spec, chunk_selection, drop_axes
+                )
+                for chunk_array, (_, chunk_spec, chunk_selection, out_selection) in zip(
+                    chunk_array_decoded, batch_info, strict=False
                 )
-                for chunk_array, (
-                    _,
-                    chunk_spec,
-                    chunk_selection,
-                    out_selection,
-                    is_complete_chunk,
-                ) in zip(chunk_array_decoded, batch_info, strict=False)
             ]
             chunk_array_batch: list[NDBuffer | None] = []
-            for chunk_array, (_, chunk_spec, *_) in zip(
+            for chunk_array, (_, chunk_spec, _, _) in zip(
                 chunk_array_merged, batch_info, strict=False
             ):
                 if chunk_array is None:
@@ -411,7 +403,7 @@ async def _read_key(
             chunk_bytes_batch = await self.encode_batch(
                 [
                     (chunk_array, chunk_spec)
-                    for chunk_array, (_, chunk_spec, *_) in zip(
+                    for chunk_array, (_, chunk_spec, _, _) in zip(
                         chunk_array_batch, batch_info, strict=False
                     )
                 ],
@@ -426,7 +418,7 @@ async def _write_key(byte_setter: ByteSetter, chunk_bytes: Buffer | None) -> Non
             await concurrent_map(
                 [
                     (byte_setter, chunk_bytes)
-                    for chunk_bytes, (byte_setter, *_) in zip(
+                    for chunk_bytes, (byte_setter, _, _, _) in zip(
                         chunk_bytes_batch, batch_info, strict=False
                     )
                 ],
@@ -454,7 +446,7 @@ async def encode(
 
     async def read(
         self,
-        batch_info: Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool]],
+        batch_info: Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple]],
         out: NDBuffer,
         drop_axes: tuple[int, ...] = (),
     ) -> None:
@@ -469,7 +461,7 @@ async def read(
 
     async def write(
         self,
-        batch_info: Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]],
+        batch_info: Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]],
         value: NDBuffer,
         drop_axes: tuple[int, ...] = (),
     ) -> None:
diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py
index c197f6f397..733b2464ac 100644
--- a/src/zarr/core/indexing.py
+++ b/src/zarr/core/indexing.py
@@ -321,12 +321,12 @@ class ChunkDimProjection(NamedTuple):
         Selection of items from chunk array.
     dim_out_sel
         Selection of items in target (output) array.
+
     """
 
     dim_chunk_ix: int
     dim_chunk_sel: Selector
     dim_out_sel: Selector | None
-    is_complete_chunk: bool
 
 
 @dataclass(frozen=True)
@@ -346,8 +346,7 @@ def __iter__(self) -> Iterator[ChunkDimProjection]:
         dim_offset = dim_chunk_ix * self.dim_chunk_len
         dim_chunk_sel = self.dim_sel - dim_offset
         dim_out_sel = None
-        is_complete_chunk = self.dim_chunk_len == 1
-        yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel, is_complete_chunk)
+        yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel)
 
 
 @dataclass(frozen=True)
@@ -421,10 +420,7 @@ def __iter__(self) -> Iterator[ChunkDimProjection]:
 
             dim_out_sel = slice(dim_out_offset, dim_out_offset + dim_chunk_nitems)
 
-            is_complete_chunk = (
-                dim_chunk_sel_start == 0 and (self.stop >= dim_limit) and self.step in [1, None]
-            )
-            yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel, is_complete_chunk)
+            yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel)
 
 
 def check_selection_length(selection: SelectionNormalized, shape: ChunkCoords) -> None:
@@ -497,14 +493,12 @@ class ChunkProjection(NamedTuple):
         Selection of items from chunk array.
     out_selection
         Selection of items in target (output) array.
-    is_complete_chunk:
-        True if a complete chunk is indexed
+
     """
 
     chunk_coords: ChunkCoords
     chunk_selection: tuple[Selector, ...] | npt.NDArray[np.intp]
     out_selection: tuple[Selector, ...] | npt.NDArray[np.intp] | slice
-    is_complete_chunk: bool
 
 
 def is_slice(s: Any) -> TypeGuard[slice]:
@@ -580,8 +574,8 @@ def __iter__(self) -> Iterator[ChunkProjection]:
             out_selection = tuple(
                 p.dim_out_sel for p in dim_projections if p.dim_out_sel is not None
             )
-            is_complete_chunk = all(p.is_complete_chunk for p in dim_projections)
-            yield ChunkProjection(chunk_coords, chunk_selection, out_selection, is_complete_chunk)
+
+            yield ChunkProjection(chunk_coords, chunk_selection, out_selection)
 
 
 @dataclass(frozen=True)
@@ -649,9 +643,8 @@ def __iter__(self) -> Iterator[ChunkDimProjection]:
                 start = self.chunk_nitems_cumsum[dim_chunk_ix - 1]
             stop = self.chunk_nitems_cumsum[dim_chunk_ix]
             dim_out_sel = slice(start, stop)
-            is_complete_chunk = False  # TODO
 
-            yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel, is_complete_chunk)
+            yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel)
 
 
 class Order(Enum):
@@ -790,8 +783,8 @@ def __iter__(self) -> Iterator[ChunkDimProjection]:
             # find region in chunk
             dim_offset = dim_chunk_ix * self.dim_chunk_len
             dim_chunk_sel = self.dim_sel[start:stop] - dim_offset
-            is_complete_chunk = False  # TODO
-            yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel, is_complete_chunk)
+
+            yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel)
 
 
 def slice_to_range(s: slice, length: int) -> range:
@@ -928,8 +921,7 @@ def __iter__(self) -> Iterator[ChunkProjection]:
                 if not is_basic_selection(out_selection):
                     out_selection = ix_(out_selection, self.shape)
 
-            is_complete_chunk = all(p.is_complete_chunk for p in dim_projections)
-            yield ChunkProjection(chunk_coords, chunk_selection, out_selection, is_complete_chunk)
+            yield ChunkProjection(chunk_coords, chunk_selection, out_selection)
 
 
 @dataclass(frozen=True)
@@ -1038,8 +1030,8 @@ def __iter__(self) -> Iterator[ChunkProjection]:
             out_selection = tuple(
                 p.dim_out_sel for p in dim_projections if p.dim_out_sel is not None
             )
-            is_complete_chunk = all(p.is_complete_chunk for p in dim_projections)
-            yield ChunkProjection(chunk_coords, chunk_selection, out_selection, is_complete_chunk)
+
+            yield ChunkProjection(chunk_coords, chunk_selection, out_selection)
 
 
 @dataclass(frozen=True)
@@ -1206,8 +1198,7 @@ def __iter__(self) -> Iterator[ChunkProjection]:
                 for (dim_sel, dim_chunk_offset) in zip(self.selection, chunk_offsets, strict=True)
             )
 
-            is_complete_chunk = False  # TODO
-            yield ChunkProjection(chunk_coords, chunk_selection, out_selection, is_complete_chunk)
+            yield ChunkProjection(chunk_coords, chunk_selection, out_selection)
 
 
 @dataclass(frozen=True)
@@ -1370,6 +1361,32 @@ def c_order_iter(chunks_per_shard: ChunkCoords) -> Iterator[ChunkCoords]:
     return itertools.product(*(range(x) for x in chunks_per_shard))
 
 
+def is_total_slice(item: Selection, shape: ChunkCoords) -> bool:
+    """Determine whether `item` specifies a complete slice of array with the
+    given `shape`. Used to optimize __setitem__ operations on the Chunk
+    class."""
+
+    # N.B., assume shape is normalized
+    if item == slice(None):
+        return True
+    if isinstance(item, slice):
+        item = (item,)
+    if isinstance(item, tuple):
+        return all(
+            (isinstance(dim_sel, int) and dim_len == 1)
+            or (
+                isinstance(dim_sel, slice)
+                and (
+                    (dim_sel == slice(None))
+                    or ((dim_sel.stop - dim_sel.start == dim_len) and (dim_sel.step in [1, None]))
+                )
+            )
+            for dim_sel, dim_len in zip(item, shape, strict=False)
+        )
+    else:
+        raise TypeError(f"expected slice or tuple of slices, found {item!r}")
+
+
 def get_indexer(
     selection: SelectionWithFields, shape: ChunkCoords, chunk_grid: ChunkGrid
 ) -> Indexer:
diff --git a/src/zarr/storage/_logging.py b/src/zarr/storage/_logging.py
index 5f1a97acd9..e9d6211588 100644
--- a/src/zarr/storage/_logging.py
+++ b/src/zarr/storage/_logging.py
@@ -88,7 +88,7 @@ def log(self, hint: Any = "") -> Generator[None, None, None]:
         op = f"{type(self._store).__name__}.{method}"
         if hint:
             op = f"{op}({hint})"
-        self.logger.info(" Calling %s", op)
+        self.logger.info("Calling %s", op)
         start_time = time.time()
         try:
             self.counter[method] += 1
diff --git a/tests/test_array.py b/tests/test_array.py
index efcf8a6bf9..0b09ef2a58 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -1330,18 +1330,9 @@ async def test_orthogonal_set_total_slice() -> None:
     """Ensure that a whole chunk overwrite does not read chunks"""
     store = MemoryStore()
     array = zarr.create_array(store, shape=(20, 20), chunks=(1, 2), dtype=int, fill_value=-1)
-    with mock.patch("zarr.storage.MemoryStore.get", side_effect=RuntimeError):
+    with mock.patch("zarr.storage.MemoryStore.get", side_effect=ValueError):
         array[0, slice(4, 10)] = np.arange(6)
 
-    array = zarr.create_array(
-        store, shape=(20, 21), chunks=(1, 2), dtype=int, fill_value=-1, overwrite=True
-    )
-    with mock.patch("zarr.storage.MemoryStore.get", side_effect=RuntimeError):
-        array[0, :] = np.arange(21)
-
-    with mock.patch("zarr.storage.MemoryStore.get", side_effect=RuntimeError):
-        array[:] = 1
-
 
 @pytest.mark.skipif(
     Version(numcodecs.__version__) < Version("0.15.1"),
diff --git a/tests/test_indexing.py b/tests/test_indexing.py
index 30d0d75f22..932c32f1ae 100644
--- a/tests/test_indexing.py
+++ b/tests/test_indexing.py
@@ -19,6 +19,7 @@
     OrthogonalSelection,
     Selection,
     _iter_grid,
+    is_total_slice,
     make_slice_selection,
     normalize_integer_selection,
     oindex,
@@ -1953,3 +1954,8 @@ def test_vectorized_indexing_incompatible_shape(store) -> None:
     )
     with pytest.raises(ValueError, match="Attempting to set"):
         arr[np.array([1, 2]), np.array([1, 2])] = np.array([[-1, -2], [-3, -4]])
+
+
+def test_is_total_slice():
+    assert is_total_slice((0, slice(4, 6)), (1, 2))
+    assert is_total_slice((slice(0, 1, None), slice(4, 6)), (1, 2))

From ebc5743f84c51a102b0b433578777a1ee0feffd8 Mon Sep 17 00:00:00 2001
From: Deepak Cherian <deepak@cherian.net>
Date: Fri, 21 Feb 2025 14:32:02 -0700
Subject: [PATCH 2/5] Greatly improve property tests

---
 src/zarr/testing/stateful.py   |  8 ++--
 src/zarr/testing/strategies.py | 80 ++++++++++++++++++++++++++++------
 tests/test_indexing.py         | 30 +++++++++++++
 tests/test_properties.py       | 10 +++--
 4 files changed, 107 insertions(+), 21 deletions(-)

diff --git a/src/zarr/testing/stateful.py b/src/zarr/testing/stateful.py
index 3e8dbcdf04..ede83201ae 100644
--- a/src/zarr/testing/stateful.py
+++ b/src/zarr/testing/stateful.py
@@ -325,7 +325,7 @@ def __init__(self, store: Store) -> None:
     def init_store(self) -> None:
         self.store.clear()
 
-    @rule(key=zarr_keys, data=st.binary(min_size=0, max_size=MAX_BINARY_SIZE))
+    @rule(key=zarr_keys(), data=st.binary(min_size=0, max_size=MAX_BINARY_SIZE))
     def set(self, key: str, data: DataObject) -> None:
         note(f"(set) Setting {key!r} with {data}")
         assert not self.store.read_only
@@ -334,7 +334,7 @@ def set(self, key: str, data: DataObject) -> None:
         self.model[key] = data_buf
 
     @precondition(lambda self: len(self.model.keys()) > 0)
-    @rule(key=zarr_keys, data=st.data())
+    @rule(key=zarr_keys(), data=st.data())
     def get(self, key: str, data: DataObject) -> None:
         key = data.draw(
             st.sampled_from(sorted(self.model.keys()))
@@ -344,7 +344,7 @@ def get(self, key: str, data: DataObject) -> None:
         # to bytes here necessary because data_buf set to model in set()
         assert self.model[key] == store_value
 
-    @rule(key=zarr_keys, data=st.data())
+    @rule(key=zarr_keys(), data=st.data())
     def get_invalid_zarr_keys(self, key: str, data: DataObject) -> None:
         note("(get_invalid)")
         assume(key not in self.model)
@@ -408,7 +408,7 @@ def is_empty(self) -> None:
         # make sure they either both are or both aren't empty (same state)
         assert self.store.is_empty("") == (not self.model)
 
-    @rule(key=zarr_keys)
+    @rule(key=zarr_keys())
     def exists(self, key: str) -> None:
         note("(exists)")
 
diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py
index 0e25e44592..90e2726dda 100644
--- a/src/zarr/testing/strategies.py
+++ b/src/zarr/testing/strategies.py
@@ -1,10 +1,11 @@
+import math
 import sys
 from typing import Any, Literal
 
 import hypothesis.extra.numpy as npst
 import hypothesis.strategies as st
 import numpy as np
-from hypothesis import given, settings  # noqa: F401
+from hypothesis import event, given, settings  # noqa: F401
 from hypothesis.strategies import SearchStrategy
 
 import zarr
@@ -28,6 +29,16 @@
 )
 
 
+@st.composite  # type: ignore[misc]
+def keys(draw: st.DrawFn, *, max_num_nodes: int | None = None) -> Any:
+    return draw(st.lists(node_names, min_size=1, max_size=max_num_nodes).map("/".join))
+
+
+@st.composite  # type: ignore[misc]
+def paths(draw: st.DrawFn, *, max_num_nodes: int | None = None) -> Any:
+    return draw(st.just("/") | keys(max_num_nodes=max_num_nodes))
+
+
 def v3_dtypes() -> st.SearchStrategy[np.dtype]:
     return (
         npst.boolean_dtypes()
@@ -87,17 +98,19 @@ def clear_store(x: Store) -> Store:
 node_names = st.text(zarr_key_chars, min_size=1).filter(
     lambda t: t not in (".", "..") and not t.startswith("__")
 )
+short_node_names = st.text(zarr_key_chars, max_size=3, min_size=1).filter(
+    lambda t: t not in (".", "..") and not t.startswith("__")
+)
 array_names = node_names
 attrs = st.none() | st.dictionaries(_attr_keys, _attr_values)
-keys = st.lists(node_names, min_size=1).map("/".join)
-paths = st.just("/") | keys
 # st.builds will only call a new store constructor for different keyword arguments
 # i.e. stores.examples() will always return the same object per Store class.
 # So we map a clear to reset the store.
 stores = st.builds(MemoryStore, st.just({})).map(clear_store)
 compressors = st.sampled_from([None, "default"])
 zarr_formats: st.SearchStrategy[ZarrFormat] = st.sampled_from([3, 2])
-array_shapes = npst.array_shapes(max_dims=4, min_side=0)
+# We de-prioritize arrays having dim sizes 0, 1, 2
+array_shapes = npst.array_shapes(max_dims=4, min_side=3) | npst.array_shapes(max_dims=4, min_side=0)
 
 
 @st.composite  # type: ignore[misc]
@@ -174,11 +187,19 @@ def chunk_shapes(draw: st.DrawFn, *, shape: tuple[int, ...]) -> tuple[int, ...]:
         st.tuples(*[st.integers(min_value=0 if size == 0 else 1, max_value=size) for size in shape])
     )
     # 2. and now generate the chunks tuple
-    return tuple(
+    chunks = tuple(
         size // nchunks if nchunks > 0 else 0
         for size, nchunks in zip(shape, numchunks, strict=True)
     )
 
+    for c in chunks:
+        event("chunk size", c)
+
+    if any((c != 0 and s % c != 0) for s, c in zip(shape, chunks, strict=True)):
+        event("smaller last chunk")
+
+    return chunks
+
 
 @st.composite  # type: ignore[misc]
 def shard_shapes(
@@ -211,7 +232,7 @@ def arrays(
     shapes: st.SearchStrategy[tuple[int, ...]] = array_shapes,
     compressors: st.SearchStrategy = compressors,
     stores: st.SearchStrategy[StoreLike] = stores,
-    paths: st.SearchStrategy[str | None] = paths,
+    paths: st.SearchStrategy[str | None] = paths(),  # noqa: B008
     array_names: st.SearchStrategy = array_names,
     arrays: st.SearchStrategy | None = None,
     attrs: st.SearchStrategy = attrs,
@@ -267,23 +288,56 @@ def arrays(
     return a
 
 
+@st.composite  # type: ignore[misc]
+def simple_arrays(
+    draw: st.DrawFn,
+    *,
+    shapes: st.SearchStrategy[tuple[int, ...]] = array_shapes,
+) -> Any:
+    return draw(
+        arrays(
+            shapes=shapes,
+            paths=paths(max_num_nodes=2),
+            array_names=short_node_names,
+            attrs=st.none(),
+            compressors=st.sampled_from([None, "default"]),
+        )
+    )
+
+
 def is_negative_slice(idx: Any) -> bool:
     return isinstance(idx, slice) and idx.step is not None and idx.step < 0
 
 
+@st.composite  # type: ignore[misc]
+def end_slices(draw: st.DrawFn, *, shape: tuple[int]) -> Any:
+    """
+    A strategy that slices ranges that include the last chunk.
+    This is intended to stress-test handling of a possibly smaller last chunk.
+    """
+    slicers = []
+    for size in shape:
+        start = draw(st.integers(min_value=size // 2, max_value=size - 1))
+        length = draw(st.integers(min_value=0, max_value=size - start))
+        slicers.append(slice(start, start + length))
+    event("drawing end slice")
+    return tuple(slicers)
+
+
 @st.composite  # type: ignore[misc]
 def basic_indices(draw: st.DrawFn, *, shape: tuple[int], **kwargs: Any) -> Any:
     """Basic indices without unsupported negative slices."""
-    return draw(
-        npst.basic_indices(shape=shape, **kwargs).filter(
-            lambda idxr: (
-                not (
-                    is_negative_slice(idxr)
-                    or (isinstance(idxr, tuple) and any(is_negative_slice(idx) for idx in idxr))
-                )
+    strategy = npst.basic_indices(shape=shape, **kwargs).filter(
+        lambda idxr: (
+            not (
+                is_negative_slice(idxr)
+                or (isinstance(idxr, tuple) and any(is_negative_slice(idx) for idx in idxr))
             )
         )
     )
+    if math.prod(shape) >= 3:
+        strategy = end_slices(shape=shape) | strategy
+    return draw(strategy)
 
 
 @st.composite  # type: ignore[misc]
diff --git a/tests/test_indexing.py b/tests/test_indexing.py
index 932c32f1ae..05adcfe8a3 100644
--- a/tests/test_indexing.py
+++ b/tests/test_indexing.py
@@ -425,6 +425,17 @@ def test_orthogonal_indexing_fallback_on_getitem_2d(
     np.testing.assert_array_equal(z[index], expected_result)
 
 
+def test_setitem_repeated_index():
+    array = zarr.array(data=np.zeros((4,)), chunks=(1,))
+    indexer = np.array([-1, -1, 0, 0])
+    array.oindex[(indexer,)] = [0, 1, 2, 3]
+    np.testing.assert_array_equal(array[:], np.array([3, 0, 0, 1]))
+
+    indexer = np.array([-1, 0, 0, -1])
+    array.oindex[(indexer,)] = [0, 1, 2, 3]
+    np.testing.assert_array_equal(array[:], np.array([2, 0, 0, 3]))
+
+
 Index = list[int] | tuple[slice | int | list[int], ...]
 
 
@@ -816,6 +827,25 @@ def test_set_orthogonal_selection_1d(store: StorePath) -> None:
         _test_set_orthogonal_selection(v, a, z, selection)
 
 
+def test_set_item_1d_last_two_chunks():
+    # regression test for GH2849
+    g = zarr.open_group("foo.zarr", zarr_format=3, mode="w")
+    a = g.create_array("bar", shape=(10,), chunks=(3,), dtype=int)
+    data = np.array([7, 8, 9])
+    a[slice(7, 10)] = data
+    np.testing.assert_array_equal(a[slice(7, 10)], data)
+
+    z = zarr.open_group("foo.zarr", mode="w")
+    z.create_array("zoo", dtype=float, shape=())
+    z["zoo"][...] = np.array(1)  # why doesn't [:] work?
+    np.testing.assert_equal(z["zoo"][()], np.array(1))
+
+    z = zarr.open_group("foo.zarr", mode="w")
+    z.create_array("zoo", dtype=float, shape=())
+    z["zoo"][...] = 1  # why doesn't [:] work?
+    np.testing.assert_equal(z["zoo"][()], np.array(1))
+
+
 def _test_set_orthogonal_selection_2d(
     v: npt.NDArray[np.int_],
     a: npt.NDArray[np.int_],
diff --git a/tests/test_properties.py b/tests/test_properties.py
index 5643cf3853..62cfd9e3fc 100644
--- a/tests/test_properties.py
+++ b/tests/test_properties.py
@@ -7,7 +7,7 @@
 
 import hypothesis.extra.numpy as npst
 import hypothesis.strategies as st
-from hypothesis import assume, given
+from hypothesis import assume, given, note
 
 from zarr.abc.store import Store
 from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata
@@ -18,6 +18,7 @@
     basic_indices,
     numpy_arrays,
     orthogonal_indices,
+    simple_arrays,
     stores,
     zarr_formats,
 )
@@ -50,7 +51,7 @@ def test_array_creates_implicit_groups(array):
 
 @given(data=st.data())
 def test_basic_indexing(data: st.DataObject) -> None:
-    zarray = data.draw(arrays())
+    zarray = data.draw(simple_arrays())
     nparray = zarray[:]
     indexer = data.draw(basic_indices(shape=nparray.shape))
     actual = zarray[indexer]
@@ -65,7 +66,7 @@ def test_basic_indexing(data: st.DataObject) -> None:
 @given(data=st.data())
 def test_oindex(data: st.DataObject) -> None:
     # integer_array_indices can't handle 0-size dimensions.
-    zarray = data.draw(arrays(shapes=npst.array_shapes(max_dims=4, min_side=1)))
+    zarray = data.draw(simple_arrays(shapes=npst.array_shapes(max_dims=4, min_side=1)))
     nparray = zarray[:]
 
     zindexer, npindexer = data.draw(orthogonal_indices(shape=nparray.shape))
@@ -76,13 +77,14 @@ def test_oindex(data: st.DataObject) -> None:
     new_data = data.draw(npst.arrays(shape=st.just(actual.shape), dtype=nparray.dtype))
     nparray[npindexer] = new_data
     zarray.oindex[zindexer] = new_data
+    note((new_data, npindexer, nparray, zindexer, zarray[:]))
     assert_array_equal(nparray, zarray[:])
 
 
 @given(data=st.data())
 def test_vindex(data: st.DataObject) -> None:
     # integer_array_indices can't handle 0-size dimensions.
-    zarray = data.draw(arrays(shapes=npst.array_shapes(max_dims=4, min_side=1)))
+    zarray = data.draw(simple_arrays(shapes=npst.array_shapes(max_dims=4, min_side=1)))
     nparray = zarray[:]
 
     indexer = data.draw(

From 1b7cf9a722ebdd86c56b5e0e3ed2f35d0e464945 Mon Sep 17 00:00:00 2001
From: Deepak Cherian <deepak@cherian.net>
Date: Fri, 21 Feb 2025 14:35:39 -0700
Subject: [PATCH 3/5] cleanup

---
 src/zarr/storage/_logging.py | 2 +-
 tests/test_properties.py     | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/zarr/storage/_logging.py b/src/zarr/storage/_logging.py
index e9d6211588..5f1a97acd9 100644
--- a/src/zarr/storage/_logging.py
+++ b/src/zarr/storage/_logging.py
@@ -88,7 +88,7 @@ def log(self, hint: Any = "") -> Generator[None, None, None]:
         op = f"{type(self._store).__name__}.{method}"
         if hint:
             op = f"{op}({hint})"
-        self.logger.info("Calling %s", op)
+        self.logger.info(" Calling %s", op)
         start_time = time.time()
         try:
             self.counter[method] += 1
diff --git a/tests/test_properties.py b/tests/test_properties.py
index 62cfd9e3fc..90acbdb4b9 100644
--- a/tests/test_properties.py
+++ b/tests/test_properties.py
@@ -7,7 +7,7 @@
 
 import hypothesis.extra.numpy as npst
 import hypothesis.strategies as st
-from hypothesis import assume, given, note
+from hypothesis import assume, given
 
 from zarr.abc.store import Store
 from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata
@@ -77,7 +77,6 @@ def test_oindex(data: st.DataObject) -> None:
     new_data = data.draw(npst.arrays(shape=st.just(actual.shape), dtype=nparray.dtype))
     nparray[npindexer] = new_data
     zarray.oindex[zindexer] = new_data
-    note((new_data, npindexer, nparray, zindexer, zarray[:]))
     assert_array_equal(nparray, zarray[:])
 
 

From 82a272db92874bacd50533f1151d6bc369b90ebf Mon Sep 17 00:00:00 2001
From: Deepak Cherian <deepak@cherian.net>
Date: Fri, 21 Feb 2025 14:42:40 -0700
Subject: [PATCH 4/5] Fix test

---
 src/zarr/testing/strategies.py | 6 ++++--
 tests/test_properties.py       | 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py
index 90e2726dda..96d664f5aa 100644
--- a/src/zarr/testing/strategies.py
+++ b/src/zarr/testing/strategies.py
@@ -165,13 +165,15 @@ def numpy_arrays(
     draw: st.DrawFn,
     *,
     shapes: st.SearchStrategy[tuple[int, ...]] = array_shapes,
-    zarr_formats: st.SearchStrategy[ZarrFormat] = zarr_formats,
+    dtype: np.dtype[Any] | None = None,
+    zarr_formats: st.SearchStrategy[ZarrFormat] | None = zarr_formats,
 ) -> Any:
     """
     Generate numpy arrays that can be saved in the provided Zarr format.
     """
     zarr_format = draw(zarr_formats)
-    dtype = draw(v3_dtypes() if zarr_format == 3 else v2_dtypes())
+    if dtype is None:
+        dtype = draw(v3_dtypes() if zarr_format == 3 else v2_dtypes())
     if np.issubdtype(dtype, np.str_):
         safe_unicode_strings = safe_unicode_for_dtype(dtype)
         return draw(npst.arrays(dtype=dtype, shape=shapes, elements=safe_unicode_strings))
diff --git a/tests/test_properties.py b/tests/test_properties.py
index 90acbdb4b9..67c13d0b17 100644
--- a/tests/test_properties.py
+++ b/tests/test_properties.py
@@ -57,7 +57,7 @@ def test_basic_indexing(data: st.DataObject) -> None:
     actual = zarray[indexer]
     assert_array_equal(nparray[indexer], actual)
 
-    new_data = data.draw(npst.arrays(shape=st.just(actual.shape), dtype=nparray.dtype))
+    new_data = data.draw(numpy_arrays(shapes=st.just(actual.shape), dtype=nparray.dtype))
     zarray[indexer] = new_data
     nparray[indexer] = new_data
     assert_array_equal(nparray, zarray[:])
@@ -74,7 +74,7 @@ def test_oindex(data: st.DataObject) -> None:
     assert_array_equal(nparray[npindexer], actual)
 
     assume(zarray.shards is None)  # GH2834
-    new_data = data.draw(npst.arrays(shape=st.just(actual.shape), dtype=nparray.dtype))
+    new_data = data.draw(numpy_arrays(shapes=st.just(actual.shape), dtype=nparray.dtype))
     nparray[npindexer] = new_data
     zarray.oindex[zindexer] = new_data
     assert_array_equal(nparray, zarray[:])

From df2c99385701dbd34af15c100bdf81b1f4f9703c Mon Sep 17 00:00:00 2001
From: Deepak Cherian <deepak@cherian.net>
Date: Fri, 21 Feb 2025 14:48:12 -0700
Subject: [PATCH 5/5] add release note

---
 changes/2845.bugfix.rst | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changes/2845.bugfix.rst

diff --git a/changes/2845.bugfix.rst b/changes/2845.bugfix.rst
new file mode 100644
index 0000000000..4b075936c3
--- /dev/null
+++ b/changes/2845.bugfix.rst
@@ -0,0 +1 @@
+Revert "Always skip reads when completely overwriting chunks (PR #2784, :issue:`2849`)