Skip to content

Commit f788084

Browse files
lilyminiumshoyer
authored andcommitted
Zarr chunking (GH2300) (#2487)
* fixed typo * added test for saving opened zarr dataset * modified test for saving opened zarr dataset * allow different last chunk * removed whitespace * modified error messages * fixed pep8 issues * updated whats-new
1 parent cf798c5 commit f788084

File tree

3 files changed

+21
-6
lines changed

3 files changed

+21
-6
lines changed

doc/whats-new.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,10 @@ Bug fixes
152152
the dates must be encoded using cftime rather than NumPy (:issue:`2272`).
153153
By `Spencer Clark <https://github.com/spencerkclark>`_.
154154

155+
- Chunked datasets can now roundtrip to Zarr storage continually
156+
with `to_zarr` and ``open_zarr`` (:issue:`2300`).
157+
By `Lily Wang <https://github.com/lilyminium>`_.
158+
155159
.. _whats-new.0.10.9:
156160

157161
v0.10.9 (21 September 2018)

xarray/backends/zarr.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -79,14 +79,14 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim):
7979
if var_chunks and enc_chunks is None:
8080
if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks):
8181
raise ValueError(
82-
"Zarr requires uniform chunk sizes excpet for final chunk."
83-
" Variable %r has incompatible chunks. Consider "
82+
"Zarr requires uniform chunk sizes except for final chunk."
83+
" Variable dask chunks %r are incompatible. Consider "
8484
"rechunking using `chunk()`." % (var_chunks,))
8585
if any((chunks[0] < chunks[-1]) for chunks in var_chunks):
8686
raise ValueError(
87-
"Final chunk of Zarr array must be smaller than first. "
88-
"Variable %r has incompatible chunks. Consider rechunking "
89-
"using `chunk()`." % var_chunks)
87+
"Final chunk of Zarr array must be the same size or smaller "
88+
"than the first. Variable Dask chunks %r are incompatible. "
89+
"Consider rechunking using `chunk()`." % var_chunks)
9090
# return the first chunk for each dimension
9191
return tuple(chunk[0] for chunk in var_chunks)
9292

@@ -126,14 +126,21 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim):
126126
# threads
127127
if var_chunks and enc_chunks_tuple:
128128
for zchunk, dchunks in zip(enc_chunks_tuple, var_chunks):
129-
for dchunk in dchunks:
129+
for dchunk in dchunks[:-1]:
130130
if dchunk % zchunk:
131131
raise NotImplementedError(
132132
"Specified zarr chunks %r would overlap multiple dask "
133133
"chunks %r. This is not implemented in xarray yet. "
134134
" Consider rechunking the data using "
135135
"`chunk()` or specifying different chunks in encoding."
136136
% (enc_chunks_tuple, var_chunks))
137+
if dchunks[-1] > zchunk:
138+
raise ValueError(
139+
"Final chunk of Zarr array must be the same size or "
140+
"smaller than the first. The specified Zarr chunk "
141+
"encoding is %r, but %r in variable Dask chunks %r is "
142+
"incompatible. Consider rechunking using `chunk()`."
143+
% (enc_chunks_tuple, dchunks, var_chunks))
137144
return enc_chunks_tuple
138145

139146
raise AssertionError(

xarray/tests/test_backends.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1388,6 +1388,10 @@ def test_chunk_encoding_with_dask(self):
13881388
ds_chunk_irreg = ds.chunk({'x': (5, 5, 2)})
13891389
with self.roundtrip(ds_chunk_irreg) as actual:
13901390
assert (5,) == actual['var1'].encoding['chunks']
1391+
# re-save Zarr arrays
1392+
with self.roundtrip(ds_chunk_irreg) as original:
1393+
with self.roundtrip(original) as actual:
1394+
assert_identical(original, actual)
13911395

13921396
# - encoding specified -
13931397
# specify compatible encodings

0 commit comments

Comments
 (0)