Skip to content

Commit edbba37

Browse files
committed
Skip reads when completely overwriting boundary chunks
Uses `slice(..., None)` to indicate that a `chunk_selection` ends at the boundary of the current chunk. Also does so for a last chunk that is shorter than the chunk size. `is_total_slice` now understands this convention, and correctly detects boundary chunks as total slices. Closes zarr-developers#757
1 parent 15d9260 commit edbba37

File tree

4 files changed

+36
-6
lines changed

4 files changed

+36
-6
lines changed

src/zarr/core/indexing.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -403,16 +403,24 @@ def __iter__(self) -> Iterator[ChunkDimProjection]:
403403
dim_chunk_sel_start = self.start - dim_offset
404404
dim_out_offset = 0
405405

406+
# Use None to indicate this selection ends at the chunk edge
407+
# This is useful for is_total_slice at boundary chunks,
406408
if self.stop > dim_limit:
407409
# selection ends after current chunk
408-
dim_chunk_sel_stop = dim_chunk_len
410+
dim_chunk_sel_stop = None # dim_chunk_len
409411

410412
else:
411413
# selection ends within current chunk
412-
dim_chunk_sel_stop = self.stop - dim_offset
414+
if dim_chunk_ix == (self.nchunks - 1):
415+
# all of the last chunk is included
416+
dim_chunk_sel_stop = None
417+
else:
418+
dim_chunk_sel_stop = self.stop - dim_offset
413419

414420
dim_chunk_sel = slice(dim_chunk_sel_start, dim_chunk_sel_stop, self.step)
415-
dim_chunk_nitems = ceildiv((dim_chunk_sel_stop - dim_chunk_sel_start), self.step)
421+
dim_chunk_nitems = ceildiv(
422+
((dim_chunk_sel_stop or dim_chunk_len) - dim_chunk_sel_start), self.step
423+
)
416424

417425
# If there are no elements on the selection within this chunk, then skip
418426
if dim_chunk_nitems == 0:
@@ -1378,7 +1386,17 @@ def is_total_slice(item: Selection, shape: ChunkCoords) -> bool:
13781386
isinstance(dim_sel, slice)
13791387
and (
13801388
(dim_sel == slice(None))
1381-
or ((dim_sel.stop - dim_sel.start == dim_len) and (dim_sel.step in [1, None]))
1389+
or (
1390+
dim_sel.stop is not None
1391+
and (dim_sel.stop - dim_sel.start == dim_len)
1392+
and (dim_sel.step in [1, None])
1393+
)
1394+
# starts exactly at a chunk
1395+
or (
1396+
(dim_sel.start % dim_len == 0)
1397+
and dim_sel.stop is None
1398+
and (dim_sel.step in [1, None])
1399+
)
13821400
)
13831401
)
13841402
for dim_sel, dim_len in zip(item, shape, strict=False)

src/zarr/storage/_logging.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def log(self, hint: Any = "") -> Generator[None, None, None]:
8888
op = f"{type(self._store).__name__}.{method}"
8989
if hint:
9090
op = f"{op}({hint})"
91-
self.logger.info("Calling %s", op)
91+
self.logger.info(" Calling %s", op)
9292
start_time = time.time()
9393
try:
9494
self.counter[method] += 1

tests/test_array.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1335,5 +1335,14 @@ async def test_orthogonal_set_total_slice() -> None:
13351335
"""Ensure that a whole chunk overwrite does not read chunks"""
13361336
store = MemoryStore()
13371337
array = zarr.create_array(store, shape=(20, 20), chunks=(1, 2), dtype=int, fill_value=-1)
1338-
with mock.patch("zarr.storage.MemoryStore.get", side_effect=ValueError):
1338+
with mock.patch("zarr.storage.MemoryStore.get", side_effect=RuntimeError):
13391339
array[0, slice(4, 10)] = np.arange(6)
1340+
1341+
array = zarr.create_array(
1342+
store, shape=(20, 21), chunks=(1, 2), dtype=int, fill_value=-1, overwrite=True
1343+
)
1344+
with mock.patch("zarr.storage.MemoryStore.get", side_effect=RuntimeError):
1345+
array[0, :] = np.arange(21)
1346+
1347+
with mock.patch("zarr.storage.MemoryStore.get", side_effect=RuntimeError):
1348+
array[:] = 1

tests/test_indexing.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1959,3 +1959,6 @@ def test_vectorized_indexing_incompatible_shape(store) -> None:
19591959
def test_is_total_slice():
19601960
assert is_total_slice((0, slice(4, 6)), (1, 2))
19611961
assert is_total_slice((slice(0, 1, None), slice(4, 6)), (1, 2))
1962+
assert is_total_slice((slice(0, 1, None), slice(4, None)), (1, 2))
1963+
# slice(5, None) starts in the middle of a chunk
1964+
assert not is_total_slice((slice(0, 1, None), slice(5, None)), (1, 2))

0 commit comments

Comments
 (0)