diff --git a/doc/io.rst b/doc/io.rst index c85f566392d..1039e8bc352 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -609,8 +609,9 @@ store is already present at that path, an error will be raised, preventing it from being overwritten. To override this behavior and overwrite an existing store, add ``mode='w'`` when invoking ``to_zarr``. -It is also possible to append to an existing store. For that, add ``mode='a'`` -and set ``append_dim`` to the name of the dimension along which to append. +It is also possible to append to an existing store. For that, set +``append_dim`` to the name of the dimension along which to append. ``mode`` +can be omitted as it will internally be set to ``'a'``. .. ipython:: python :suppress: @@ -628,7 +629,7 @@ and set ``append_dim`` to the name of the dimension along which to append. coords={'x': [10, 20, 30, 40], 'y': [1,2,3,4,5], 't': pd.date_range('2001-01-03', periods=2)}) - ds2.to_zarr('path/to/directory.zarr', mode='a', append_dim='t') + ds2.to_zarr('path/to/directory.zarr', append_dim='t') To store variable length strings use ``dtype=object``. diff --git a/doc/whats-new.rst b/doc/whats-new.rst index f41a9d78048..76d6f94f54e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -27,6 +27,10 @@ New functions/methods Enhancements ~~~~~~~~~~~~ +- In :py:meth:`~xarray.Dataset.to_zarr`, passing ``mode`` is not mandatory if + ``append_dim`` is set, as it will automatically be set to ``'a'`` internally. + By `David Brochart `_. + Bug fixes ~~~~~~~~~ diff --git a/xarray/backends/api.py b/xarray/backends/api.py index e0f269eb51f..21d91a886af 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1102,7 +1102,7 @@ def _validate_append_dim_and_encoding(ds_to_append, store, append_dim, ) -def to_zarr(dataset, store=None, mode='w-', synchronizer=None, group=None, +def to_zarr(dataset, store=None, mode=None, synchronizer=None, group=None, encoding=None, compute=True, consolidated=False, append_dim=None): """This function creates an appropriate datastore for writing a dataset to a zarr ztore @@ -1118,7 +1118,7 @@ def to_zarr(dataset, store=None, mode='w-', synchronizer=None, group=None, _validate_dataset_names(dataset) _validate_attrs(dataset) - if mode == "a": + if mode == 'a': _validate_datatypes_for_zarr_append(dataset) _validate_append_dim_and_encoding(dataset, store, append_dim, group=group, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index ebc23773738..7cdbdce1ec7 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1389,7 +1389,7 @@ def to_netcdf( def to_zarr( self, store: Union[MutableMapping, str, Path] = None, - mode: str = 'w-', + mode: str = None, synchronizer=None, group: str = None, encoding: Mapping = None, @@ -1407,10 +1407,13 @@ def to_zarr( ---------- store : MutableMapping, str or Path, optional Store or path to directory in file system. - mode : {'w', 'w-', 'a'} + mode : {'w', 'w-', 'a', None} Persistence mode: 'w' means create (overwrite if exists); 'w-' means create (fail if exists); 'a' means append (create if does not exist). + If ``append_dim`` is set, ``mode`` can be omitted as it is + internally set to ``'a'``. Otherwise, ``mode`` will default to + `w-` if not set. synchronizer : object, optional Array synchronizer group : str, optional @@ -1426,7 +1429,7 @@ def to_zarr( If True, apply zarr's `consolidate_metadata` function to the store after writing. append_dim: hashable, optional - If mode='a', the dimension on which the data will be appended. + If set, the dimension on which the data will be appended. References ---------- @@ -1434,6 +1437,16 @@ def to_zarr( """ if encoding is None: encoding = {} + if (mode == 'a') or (append_dim is not None): + if mode is None: + mode = 'a' + elif mode != 'a': + raise ValueError( + "append_dim was set along with mode='{}', either set " + "mode='a' or don't set it.".format(mode) + ) + elif mode is None: + mode = 'w-' if mode not in ['w', 'w-', 'a']: # TODO: figure out how to handle 'r+' raise ValueError("The only supported options for mode are 'w'," diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index f5c27f2fb92..572c18272df 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1654,7 +1654,7 @@ def test_write_persistence_modes(self): # check append mode for append write with self.create_zarr_target() as store_target: ds.to_zarr(store_target, mode='w') - ds_to_append.to_zarr(store_target, mode='a', append_dim='time') + ds_to_append.to_zarr(store_target, append_dim='time') original = xr.concat([ds, ds_to_append], dim='time') assert_identical(original, xr.open_zarr(store_target)) @@ -1690,7 +1690,7 @@ def test_append_write(self): ds, ds_to_append, _ = create_append_test_data() with self.create_zarr_target() as store_target: ds.to_zarr(store_target, mode='w') - ds_to_append.to_zarr(store_target, mode='a', append_dim='time') + ds_to_append.to_zarr(store_target, append_dim='time') original = xr.concat([ds, ds_to_append], dim='time') assert_identical(original, xr.open_zarr(store_target)) @@ -1706,8 +1706,7 @@ def test_append_with_invalid_dim_raises(self): with pytest.raises(ValueError): with self.create_zarr_target() as store_target: ds.to_zarr(store_target, mode='w') - ds_to_append.to_zarr(store_target, mode='a', - append_dim='notvalid') + ds_to_append.to_zarr(store_target, append_dim='notvalid') def test_append_with_append_dim_not_set_raises(self): @@ -1719,6 +1718,17 @@ def test_append_with_append_dim_not_set_raises(self): ds.to_zarr(store_target, mode='w') ds_to_append.to_zarr(store_target, mode='a') + def test_append_with_mode_not_a_raises(self): + + ds, ds_to_append, _ = create_append_test_data() + + # check failure when append_dim is set and mode != 'a' + with pytest.raises(ValueError): + with self.create_zarr_target() as store_target: + ds.to_zarr(store_target, mode='w') + ds_to_append.to_zarr(store_target, mode='w', + append_dim='time') + def test_append_with_existing_encoding_raises(self): ds, ds_to_append, _ = create_append_test_data() @@ -1727,8 +1737,7 @@ def test_append_with_existing_encoding_raises(self): with pytest.raises(ValueError): with self.create_zarr_target() as store_target: ds.to_zarr(store_target, mode='w') - ds_to_append.to_zarr(store_target, mode='a', - append_dim='time', + ds_to_append.to_zarr(store_target, append_dim='time', encoding={'da': {'compressor': None}}) def test_check_encoding_is_consistent_after_append(self): @@ -1741,7 +1750,7 @@ def test_check_encoding_is_consistent_after_append(self): compressor = zarr.Blosc() encoding = {'da': {'compressor': compressor}} ds.to_zarr(store_target, mode='w', encoding=encoding) - ds_to_append.to_zarr(store_target, mode='a', append_dim='time') + ds_to_append.to_zarr(store_target, append_dim='time') actual_ds = xr.open_zarr(store_target) actual_encoding = actual_ds['da'].encoding['compressor'] assert actual_encoding.get_config() == compressor.get_config() @@ -1802,7 +1811,7 @@ def test_to_zarr_append_compute_false_roundtrip(self): assert_identical(ds, actual) delayed_obj = self.save(ds_to_append, store, compute=False, - mode='a', append_dim='time') + append_dim='time') assert isinstance(delayed_obj, Delayed) with pytest.raises(AssertionError):