From 3552231405b67baa11b13be10556350fc726125f Mon Sep 17 00:00:00 2001 From: Lily Wang Date: Mon, 15 Oct 2018 04:02:28 +1100 Subject: [PATCH 1/8] fixed typo --- xarray/backends/zarr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 5f19c826289..63f054f5f4f 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -79,7 +79,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim): if var_chunks and enc_chunks is None: if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks): raise ValueError( - "Zarr requires uniform chunk sizes excpet for final chunk." + "Zarr requires uniform chunk sizes except for final chunk." " Variable %r has incompatible chunks. Consider " "rechunking using `chunk()`." % (var_chunks,)) if any((chunks[0] < chunks[-1]) for chunks in var_chunks): From 84c69506038b6e39d056b5f44630bee7cad7915a Mon Sep 17 00:00:00 2001 From: Lily Wang Date: Mon, 15 Oct 2018 04:04:30 +1100 Subject: [PATCH 2/8] added test for saving opened zarr dataset --- xarray/tests/test_backends.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index c6a2df733fa..d79e4f9ae6a 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1389,6 +1389,9 @@ def test_chunk_encoding_with_dask(self): ds_chunk_irreg = ds.chunk({'x': (5, 5, 2)}) with self.roundtrip(ds_chunk_irreg) as actual: assert (5,) == actual['var1'].encoding['chunks'] + with self.roundtrip(actual) as actual_2: + assert actual['var1'].encoding == actual_2['var1'].encoding + # - encoding specified - # specify compatible encodings @@ -1396,6 +1399,8 @@ def test_chunk_encoding_with_dask(self): ds_chunk4['var1'].encoding.update({'chunks': chunk_enc}) with self.roundtrip(ds_chunk4) as actual: assert (4,) == actual['var1'].encoding['chunks'] + + # TODO: remove this failure once syncronized overlapping writes are # supported by xarray @@ -1403,6 +1408,8 @@ def test_chunk_encoding_with_dask(self): with pytest.raises(NotImplementedError): with self.roundtrip(ds_chunk4) as actual: pass + + def test_hidden_zarr_keys(self): expected = create_test_data() From aeb53d3633e2aa38fe62758356073596c880ba42 Mon Sep 17 00:00:00 2001 From: Lily Wang Date: Mon, 15 Oct 2018 06:07:24 +1100 Subject: [PATCH 3/8] modified test for saving opened zarr dataset --- xarray/tests/test_backends.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index d79e4f9ae6a..9f660c51fb8 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1389,8 +1389,10 @@ def test_chunk_encoding_with_dask(self): ds_chunk_irreg = ds.chunk({'x': (5, 5, 2)}) with self.roundtrip(ds_chunk_irreg) as actual: assert (5,) == actual['var1'].encoding['chunks'] - with self.roundtrip(actual) as actual_2: - assert actual['var1'].encoding == actual_2['var1'].encoding + # re-save Zarr arrays + with self.roundtrip(ds_chunk_irreg) as original: + with self.roundtrip(original) as actual: + assert_identical(original, actual) # - encoding specified - From 3c47c62144a28776647fa0bfdab24c7f175bc42d Mon Sep 17 00:00:00 2001 From: Lily Wang Date: Mon, 15 Oct 2018 06:10:16 +1100 Subject: [PATCH 4/8] allow different last chunk --- xarray/backends/zarr.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 63f054f5f4f..bf7fe24f81f 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -126,7 +126,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim): # threads if var_chunks and enc_chunks_tuple: for zchunk, dchunks in zip(enc_chunks_tuple, var_chunks): - for dchunk in dchunks: + for dchunk in dchunks[:-1]: if dchunk % zchunk: raise NotImplementedError( "Specified zarr chunks %r would overlap multiple dask " @@ -134,6 +134,11 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim): " Consider rechunking the data using " "`chunk()` or specifying different chunks in encoding." % (enc_chunks_tuple, var_chunks)) + if dchunks[-1] > zchunk: + raise ValueError( + "Final chunk of Zarr array must be smaller than first. " + "Variable %r has incompatible chunks. Consider rechunking " + "using `chunk()`." % var_chunks) return enc_chunks_tuple raise AssertionError( From 0cf810b08e20a6cb1d190b88f6cd7b446c7f2453 Mon Sep 17 00:00:00 2001 From: Lily Wang Date: Mon, 15 Oct 2018 06:28:44 +1100 Subject: [PATCH 5/8] removed whitespace --- xarray/tests/test_backends.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 9f660c51fb8..8963ed4edd3 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1393,7 +1393,6 @@ def test_chunk_encoding_with_dask(self): with self.roundtrip(ds_chunk_irreg) as original: with self.roundtrip(original) as actual: assert_identical(original, actual) - # - encoding specified - # specify compatible encodings @@ -1401,8 +1400,6 @@ def test_chunk_encoding_with_dask(self): ds_chunk4['var1'].encoding.update({'chunks': chunk_enc}) with self.roundtrip(ds_chunk4) as actual: assert (4,) == actual['var1'].encoding['chunks'] - - # TODO: remove this failure once syncronized overlapping writes are # supported by xarray @@ -1410,8 +1407,6 @@ def test_chunk_encoding_with_dask(self): with pytest.raises(NotImplementedError): with self.roundtrip(ds_chunk4) as actual: pass - - def test_hidden_zarr_keys(self): expected = create_test_data() From 24c1b454c614aedbabd2074d27ece56972ad4021 Mon Sep 17 00:00:00 2001 From: Lily Wang Date: Wed, 31 Oct 2018 04:41:47 +1100 Subject: [PATCH 6/8] modified error messages --- xarray/backends/zarr.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index bf7fe24f81f..a2b45f06915 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -80,13 +80,13 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim): if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks): raise ValueError( "Zarr requires uniform chunk sizes except for final chunk." - " Variable %r has incompatible chunks. Consider " + " Variable dask chunks %r are incompatible. Consider " "rechunking using `chunk()`." % (var_chunks,)) if any((chunks[0] < chunks[-1]) for chunks in var_chunks): raise ValueError( - "Final chunk of Zarr array must be smaller than first. " - "Variable %r has incompatible chunks. Consider rechunking " - "using `chunk()`." % var_chunks) + "Final chunk of Zarr array must be the same size or smaller " + "than the first. Variable Dask chunks %r are incompatible. " + "Consider rechunking using `chunk()`." % var_chunks) # return the first chunk for each dimension return tuple(chunk[0] for chunk in var_chunks) @@ -136,9 +136,11 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim): % (enc_chunks_tuple, var_chunks)) if dchunks[-1] > zchunk: raise ValueError( - "Final chunk of Zarr array must be smaller than first. " - "Variable %r has incompatible chunks. Consider rechunking " - "using `chunk()`." % var_chunks) + "Final chunk of Zarr array must be the same size or smaller " + "than the first. The specified Zarr chunk encoding is %r, " + "but %r in variable Dask chunks %r is incompatible. " + "Consider rechunking using `chunk()`." + % (enc_chunks_tuple, dchunks, var_chunks)) return enc_chunks_tuple raise AssertionError( From 6dc1f5928ba86bc67db657619e6dddeff82cb0ef Mon Sep 17 00:00:00 2001 From: Lily Wang Date: Wed, 31 Oct 2018 04:48:09 +1100 Subject: [PATCH 7/8] fixed pep8 issues --- xarray/backends/zarr.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index a2b45f06915..06fe7f04e4f 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -136,10 +136,10 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim): % (enc_chunks_tuple, var_chunks)) if dchunks[-1] > zchunk: raise ValueError( - "Final chunk of Zarr array must be the same size or smaller " - "than the first. The specified Zarr chunk encoding is %r, " - "but %r in variable Dask chunks %r is incompatible. " - "Consider rechunking using `chunk()`." + "Final chunk of Zarr array must be the same size or " + "smaller than the first. The specified Zarr chunk " + "encoding is %r, but %r in variable Dask chunks %r is " + "incompatible. Consider rechunking using `chunk()`." % (enc_chunks_tuple, dchunks, var_chunks)) return enc_chunks_tuple From 93990270674eb8bcbd45d3a4fa8b43200684c606 Mon Sep 17 00:00:00 2001 From: Lily Wang Date: Wed, 31 Oct 2018 19:54:32 +1100 Subject: [PATCH 8/8] updated whats-new --- doc/whats-new.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2ffbc60622d..7a7117734e7 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -136,6 +136,10 @@ Bug fixes the dates must be encoded using cftime rather than NumPy (:issue:`2272`). By `Spencer Clark `_. +- Chunked datasets can now roundtrip to Zarr storage continually + with `to_zarr` and ``open_zarr`` (:issue:`2300`). + By `Lily Wang `_. + .. _whats-new.0.10.9: v0.10.9 (21 September 2018)