From 4f414f2d5eb2e5a12fb8ae1012c5ac7aa43b6f0b Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Tue, 22 Sep 2020 14:06:24 -0700 Subject: [PATCH 1/3] Add docs re stable branch (#4444) * Add docs re stable branch * Update HOW_TO_RELEASE.md Co-authored-by: keewis Co-authored-by: keewis --- HOW_TO_RELEASE.md | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/HOW_TO_RELEASE.md b/HOW_TO_RELEASE.md index 6521cdcd82a..5352d427909 100644 --- a/HOW_TO_RELEASE.md +++ b/HOW_TO_RELEASE.md @@ -15,8 +15,14 @@ upstream https://github.com/pydata/xarray (push) 1. Ensure your master branch is synced to upstream: ```sh + git switch master git pull upstream master ``` + 2. Confirm there are no commits on stable that are not yet merged + ([ref](https://github.com/pydata/xarray/pull/4440)): + ```sh + git merge upstream stable + ``` 2. Add a list of contributors with: ```sh git log "$(git tag --sort="v:refname" | sed -n 'x;$p').." --format=%aN | sort -u | perl -pe 's/\n/$1, /' @@ -81,12 +87,12 @@ upstream https://github.com/pydata/xarray (push) ``` 15. Update the stable branch (used by ReadTheDocs) and switch back to master: ```sh - git checkout stable + git switch stable git rebase master git push --force upstream stable - git checkout master + git switch master ``` - It's OK to force push to 'stable' if necessary. (We also update the stable + It's OK to force push to `stable` if necessary. (We also update the stable branch with `git cherry-pick` for documentation only fixes that apply the current released version.) 16. Add a section for the next release {0.X.Y+1} to doc/whats-new.rst: From 4a1319243692e2a4425f9a7faf0c5df8a80a630d Mon Sep 17 00:00:00 2001 From: Monica Rossetti Date: Wed, 23 Sep 2020 15:49:40 +0200 Subject: [PATCH 2/3] Port engine selection refactor from #3166 and add zarr --- xarray/backends/api.py | 127 ++++++++++++++-------------------- xarray/backends/zarr.py | 2 +- xarray/tests/test_backends.py | 2 +- 3 files changed, 54 insertions(+), 77 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 9049db5d602..a77400b7864 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -41,6 +41,17 @@ DATAARRAY_NAME = "__xarray_dataarray_name__" DATAARRAY_VARIABLE = "__xarray_dataarray_variable__" +ENGINES = { + "netcdf4": backends.NetCDF4DataStore.open, + "scipy": backends.ScipyDataStore, + "pydap": backends.PydapDataStore.open, + "h5netcdf": backends.H5NetCDFStore.open, + "pynio": backends.NioDataStore, + "pseudonetcdf": backends.PseudoNetCDFDataStore.open, + "cfgrib": backends.CfGribDataStore, + "zarr": backends.ZarrStore.open_group, +} + def _get_default_engine_remote_uri(): try: @@ -153,6 +164,17 @@ def _get_default_engine(path, allow_remote=False): return engine +def _get_backend_cls(engine): + """Select open_dataset method based on current engine""" + try: + return ENGINES[engine] + except KeyError: + raise ValueError( + "unrecognized engine for open_dataset: {}\n" + "must be one of: {}".format(engine, list(ENGINES)) + ) + + def _normalize_path(path): if is_remote_uri(path): return path @@ -407,23 +429,6 @@ def open_dataset( -------- open_mfdataset """ - engines = [ - None, - "netcdf4", - "scipy", - "pydap", - "h5netcdf", - "pynio", - "cfgrib", - "pseudonetcdf", - "zarr", - ] - if engine not in engines: - raise ValueError( - "unrecognized engine for open_dataset: {}\n" - "must be one of: {}".format(engine, engines) - ) - if autoclose is not None: warnings.warn( "The autoclose argument is no longer used by " @@ -450,6 +455,7 @@ def open_dataset( if backend_kwargs is None: backend_kwargs = {} + extra_kwargs = {} def maybe_decode_store(store, chunks, lock=False): ds = conventions.decode_cf( @@ -532,68 +538,39 @@ def maybe_decode_store(store, chunks, lock=False): if isinstance(filename_or_obj, AbstractDataStore): store = filename_or_obj + else: + if isinstance(filename_or_obj, str): + filename_or_obj = _normalize_path(filename_or_obj) + + if engine is None: + engine = _get_default_engine(filename_or_obj, allow_remote=True) + elif isinstance(filename_or_obj, MutableMapping) and engine == "zarr": + # Zarr supports a wide range of access modes, but for now xarray either + # reads or writes from a store, never both. + # For open_dataset(engine="zarr"), we only read (i.e. mode="r") + extra_kwargs["mode"] = "r" + else: + if engine not in [None, "scipy", "h5netcdf"]: + raise ValueError( + "can only read bytes or file-like objects " + "with engine='scipy' or 'h5netcdf'" + ) + engine = _get_engine_from_magic_number(filename_or_obj) - elif isinstance(filename_or_obj, MutableMapping) and engine == "zarr": - # Zarr supports a wide range of access modes, but for now xarray either - # reads or writes from a store, never both. - # For open_dataset(engine="zarr"), we only read (i.e. mode="r") - mode = "r" - _backend_kwargs = backend_kwargs.copy() - overwrite_encoded_chunks = _backend_kwargs.pop("overwrite_encoded_chunks", None) - store = backends.ZarrStore.open_group( - filename_or_obj, mode=mode, group=group, **_backend_kwargs - ) - - elif isinstance(filename_or_obj, str): - filename_or_obj = _normalize_path(filename_or_obj) - - if engine is None: - engine = _get_default_engine(filename_or_obj, allow_remote=True) - if engine == "netcdf4": - store = backends.NetCDF4DataStore.open( - filename_or_obj, group=group, lock=lock, **backend_kwargs - ) - elif engine == "scipy": - store = backends.ScipyDataStore(filename_or_obj, **backend_kwargs) - elif engine == "pydap": - store = backends.PydapDataStore.open(filename_or_obj, **backend_kwargs) - elif engine == "h5netcdf": - store = backends.H5NetCDFStore.open( - filename_or_obj, group=group, lock=lock, **backend_kwargs - ) - elif engine == "pynio": - store = backends.NioDataStore(filename_or_obj, lock=lock, **backend_kwargs) - elif engine == "pseudonetcdf": - store = backends.PseudoNetCDFDataStore.open( - filename_or_obj, lock=lock, **backend_kwargs - ) - elif engine == "cfgrib": - store = backends.CfGribDataStore( - filename_or_obj, lock=lock, **backend_kwargs - ) + if engine in ["netcdf4", "h5netcdf"]: + extra_kwargs["group"] = group + extra_kwargs["lock"] = lock + elif engine in ["pynio", "pseudonetcdf", "cfgrib"]: + extra_kwargs["lock"] = lock elif engine == "zarr": - # on ZarrStore, mode='r', synchronizer=None, group=None, - # consolidated=False. - _backend_kwargs = backend_kwargs.copy() - overwrite_encoded_chunks = _backend_kwargs.pop( + backend_kwargs = backend_kwargs.copy() + overwrite_encoded_chunks = backend_kwargs.pop( "overwrite_encoded_chunks", None ) - store = backends.ZarrStore.open_group( - filename_or_obj, group=group, **_backend_kwargs - ) - else: - if engine not in [None, "scipy", "h5netcdf"]: - raise ValueError( - "can only read bytes or file-like objects " - "with engine='scipy' or 'h5netcdf'" - ) - engine = _get_engine_from_magic_number(filename_or_obj) - if engine == "scipy": - store = backends.ScipyDataStore(filename_or_obj, **backend_kwargs) - elif engine == "h5netcdf": - store = backends.H5NetCDFStore.open( - filename_or_obj, group=group, lock=lock, **backend_kwargs - ) + extra_kwargs["group"] = group + + opener = _get_backend_cls(engine) + store = opener(filename_or_obj, **extra_kwargs, **backend_kwargs) with close_on_error(store): ds = maybe_decode_store(store, chunks) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 2651f3148fd..260d27fbabe 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -7,7 +7,6 @@ from ..core.pycompat import integer_types from ..core.utils import FrozenDict, HiddenKeyDict from ..core.variable import Variable -from .api import open_dataset from .common import AbstractWritableDataStore, BackendArray, _encode_variable_name # need some special secret attributes to tell us the dimensions @@ -647,6 +646,7 @@ def open_zarr( ---------- http://zarr.readthedocs.io/ """ + from .api import open_dataset if kwargs: raise TypeError( diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index f9cc802f2c8..c9030e31a9e 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2224,7 +2224,7 @@ def test_engine(self): open_dataset(tmp_file, engine="foobar") netcdf_bytes = data.to_netcdf() - with raises_regex(ValueError, "unrecognized engine"): + with raises_regex(ValueError, "can only read bytes or file-like"): open_dataset(BytesIO(netcdf_bytes), engine="foobar") def test_cross_engine_read_write_netcdf3(self): From 6844fb6fd1e4a0b1611cf50405c557a7d1833dbe Mon Sep 17 00:00:00 2001 From: Alessandro Amici Date: Thu, 24 Sep 2020 07:43:05 +0200 Subject: [PATCH 3/3] Always add `mode="r"` to zarr and simplify logic --- xarray/backends/api.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index a77400b7864..9ea222954f4 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1,6 +1,5 @@ import os.path import warnings -from collections.abc import MutableMapping from glob import glob from io import BytesIO from numbers import Number @@ -544,12 +543,7 @@ def maybe_decode_store(store, chunks, lock=False): if engine is None: engine = _get_default_engine(filename_or_obj, allow_remote=True) - elif isinstance(filename_or_obj, MutableMapping) and engine == "zarr": - # Zarr supports a wide range of access modes, but for now xarray either - # reads or writes from a store, never both. - # For open_dataset(engine="zarr"), we only read (i.e. mode="r") - extra_kwargs["mode"] = "r" - else: + elif engine != "zarr": if engine not in [None, "scipy", "h5netcdf"]: raise ValueError( "can only read bytes or file-like objects " @@ -567,6 +561,7 @@ def maybe_decode_store(store, chunks, lock=False): overwrite_encoded_chunks = backend_kwargs.pop( "overwrite_encoded_chunks", None ) + extra_kwargs["mode"] = "r" extra_kwargs["group"] = group opener = _get_backend_cls(engine)