Skip to content

Commit 66be9c5

Browse files
author
Joe Hamman
authored
fix zarr chunking bug (#2228)
1 parent 6c3abed commit 66be9c5

File tree

3 files changed

+29
-19
lines changed

3 files changed

+29
-19
lines changed

doc/whats-new.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ Enhancements
3939
Bug fixes
4040
~~~~~~~~~
4141

42+
- Fixed a bug in ``zarr`` backend which prevented use with datasets with
43+
incomplete chunks in multiple dimensions (:issue:`2225`).
44+
By `Joe Hamman <https://github.com/jhamman>`_.
45+
4246
.. _whats-new.0.10.7:
4347

4448
v0.10.7 (7 June 2018)
@@ -60,12 +64,13 @@ Enhancements
6064
See :ref:`interpolating values with interp` for the detail.
6165
(:issue:`2079`)
6266
By `Keisuke Fujii <https://github.com/fujiisoup>`_.
63-
67+
6468
Bug fixes
6569
~~~~~~~~~
6670

6771
- Fixed a bug in ``rasterio`` backend which prevented use with ``distributed``.
6872
The ``rasterio`` backend now returns pickleable objects (:issue:`2021`).
73+
By `Joe Hamman <https://github.com/jhamman>`_.
6974

7075
.. _whats-new.0.10.6:
7176

xarray/backends/zarr.py

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -78,24 +78,18 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim):
7878
# while dask chunks can be variable sized
7979
# http://dask.pydata.org/en/latest/array-design.html#chunks
8080
if var_chunks and enc_chunks is None:
81-
all_var_chunks = list(product(*var_chunks))
82-
first_var_chunk = all_var_chunks[0]
83-
# all but the last chunk have to match exactly
84-
for this_chunk in all_var_chunks[:-1]:
85-
if this_chunk != first_var_chunk:
86-
raise ValueError(
87-
"Zarr requires uniform chunk sizes excpet for final chunk."
88-
" Variable %r has incompatible chunks. Consider "
89-
"rechunking using `chunk()`." % (var_chunks,))
90-
# last chunk is allowed to be smaller
91-
last_var_chunk = all_var_chunks[-1]
92-
for len_first, len_last in zip(first_var_chunk, last_var_chunk):
93-
if len_last > len_first:
94-
raise ValueError(
95-
"Final chunk of Zarr array must be smaller than first. "
96-
"Variable %r has incompatible chunks. Consider rechunking "
97-
"using `chunk()`." % var_chunks)
98-
return first_var_chunk
81+
if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks):
82+
raise ValueError(
83+
"Zarr requires uniform chunk sizes excpet for final chunk."
84+
" Variable %r has incompatible chunks. Consider "
85+
"rechunking using `chunk()`." % (var_chunks,))
86+
if any((chunks[0] < chunks[-1]) for chunks in var_chunks):
87+
raise ValueError(
88+
"Final chunk of Zarr array must be smaller than first. "
89+
"Variable %r has incompatible chunks. Consider rechunking "
90+
"using `chunk()`." % var_chunks)
91+
# return the first chunk for each dimension
92+
return tuple(chunk[0] for chunk in var_chunks)
9993

10094
# from here on, we are dealing with user-specified chunks in encoding
10195
# zarr allows chunks to be an integer, in which case it uses the same chunk

xarray/tests/test_backends.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1330,6 +1330,17 @@ def test_auto_chunk(self):
13301330
# chunk size should be the same as original
13311331
self.assertEqual(v.chunks, original[k].chunks)
13321332

1333+
def test_write_uneven_dask_chunks(self):
1334+
# regression for GH#2225
1335+
original = create_test_data().chunk({'dim1': 3, 'dim2': 4, 'dim3': 3})
1336+
1337+
with self.roundtrip(
1338+
original, open_kwargs={'auto_chunk': True}) as actual:
1339+
for k, v in actual.data_vars.items():
1340+
print(k)
1341+
assert v.chunks == actual[k].chunks
1342+
1343+
13331344
def test_chunk_encoding(self):
13341345
# These datasets have no dask chunks. All chunking specified in
13351346
# encoding

0 commit comments

Comments
 (0)