|
13 | 13 | import tempfile
|
14 | 14 | import uuid
|
15 | 15 | import warnings
|
16 |
| -from collections.abc import Iterator |
| 16 | +from collections.abc import Generator, Iterator |
17 | 17 | from contextlib import ExitStack
|
18 | 18 | from io import BytesIO
|
19 | 19 | from os import listdir
|
@@ -1536,6 +1536,83 @@ def test_keep_chunksizes_if_no_original_shape(self) -> None:
|
1536 | 1536 | ds["x"].encoding["chunksizes"], actual["x"].encoding["chunksizes"]
|
1537 | 1537 | )
|
1538 | 1538 |
|
| 1539 | + def test_preferred_chunks_is_present(self) -> None: |
| 1540 | + ds = Dataset({"x": [1, 2, 3]}) |
| 1541 | + chunksizes = (2,) |
| 1542 | + ds.variables["x"].encoding = {"chunksizes": chunksizes} |
| 1543 | + |
| 1544 | + with self.roundtrip(ds) as actual: |
| 1545 | + assert actual["x"].encoding["preferred_chunks"] == {"x": 2} |
| 1546 | + |
| 1547 | + @requires_dask |
| 1548 | + def test_auto_chunking_is_based_on_disk_chunk_sizes(self) -> None: |
| 1549 | + x_size = y_size = 1000 |
| 1550 | + y_chunksize = y_size |
| 1551 | + x_chunksize = 10 |
| 1552 | + |
| 1553 | + with dask.config.set({"array.chunk-size": "100KiB"}): |
| 1554 | + with self.chunked_roundtrip( |
| 1555 | + (1, y_size, x_size), |
| 1556 | + (1, y_chunksize, x_chunksize), |
| 1557 | + open_kwargs={"chunks": "auto"}, |
| 1558 | + ) as ds: |
| 1559 | + t_chunks, y_chunks, x_chunks = ds["image"].data.chunks |
| 1560 | + assert all(np.asanyarray(y_chunks) == y_chunksize) |
| 1561 | + # Check that the chunk size is a multiple of the file chunk size |
| 1562 | + assert all(np.asanyarray(x_chunks) % x_chunksize == 0) |
| 1563 | + |
| 1564 | + @requires_dask |
| 1565 | + def test_base_chunking_uses_disk_chunk_sizes(self) -> None: |
| 1566 | + x_size = y_size = 1000 |
| 1567 | + y_chunksize = y_size |
| 1568 | + x_chunksize = 10 |
| 1569 | + |
| 1570 | + with self.chunked_roundtrip( |
| 1571 | + (1, y_size, x_size), |
| 1572 | + (1, y_chunksize, x_chunksize), |
| 1573 | + open_kwargs={"chunks": {}}, |
| 1574 | + ) as ds: |
| 1575 | + for chunksizes, expected in zip( |
| 1576 | + ds["image"].data.chunks, (1, y_chunksize, x_chunksize) |
| 1577 | + ): |
| 1578 | + assert all(np.asanyarray(chunksizes) == expected) |
| 1579 | + |
| 1580 | + @contextlib.contextmanager |
| 1581 | + def chunked_roundtrip( |
| 1582 | + self, |
| 1583 | + array_shape: tuple[int, int, int], |
| 1584 | + chunk_sizes: tuple[int, int, int], |
| 1585 | + open_kwargs: dict[str, Any] | None = None, |
| 1586 | + ) -> Generator[Dataset, None, None]: |
| 1587 | + t_size, y_size, x_size = array_shape |
| 1588 | + t_chunksize, y_chunksize, x_chunksize = chunk_sizes |
| 1589 | + |
| 1590 | + image = xr.DataArray( |
| 1591 | + np.arange(t_size * x_size * y_size, dtype=np.int16).reshape( |
| 1592 | + (t_size, y_size, x_size) |
| 1593 | + ), |
| 1594 | + dims=["t", "y", "x"], |
| 1595 | + ) |
| 1596 | + image.encoding = {"chunksizes": (t_chunksize, y_chunksize, x_chunksize)} |
| 1597 | + dataset = xr.Dataset(dict(image=image)) |
| 1598 | + |
| 1599 | + with self.roundtrip(dataset, open_kwargs=open_kwargs) as ds: |
| 1600 | + yield ds |
| 1601 | + |
| 1602 | + def test_preferred_chunks_are_disk_chunk_sizes(self) -> None: |
| 1603 | + x_size = y_size = 1000 |
| 1604 | + y_chunksize = y_size |
| 1605 | + x_chunksize = 10 |
| 1606 | + |
| 1607 | + with self.chunked_roundtrip( |
| 1608 | + (1, y_size, x_size), (1, y_chunksize, x_chunksize) |
| 1609 | + ) as ds: |
| 1610 | + assert ds["image"].encoding["preferred_chunks"] == { |
| 1611 | + "t": 1, |
| 1612 | + "y": y_chunksize, |
| 1613 | + "x": x_chunksize, |
| 1614 | + } |
| 1615 | + |
1539 | 1616 | def test_encoding_chunksizes_unlimited(self) -> None:
|
1540 | 1617 | # regression test for GH1225
|
1541 | 1618 | ds = Dataset({"x": [1, 2, 3], "y": ("x", [2, 3, 4])})
|
|
0 commit comments