diff --git a/HOW_TO_RELEASE.md b/HOW_TO_RELEASE.md index 6521cdcd82a..5352d427909 100644 --- a/HOW_TO_RELEASE.md +++ b/HOW_TO_RELEASE.md @@ -15,8 +15,14 @@ upstream https://github.com/pydata/xarray (push) 1. Ensure your master branch is synced to upstream: ```sh + git switch master git pull upstream master ``` + 2. Confirm there are no commits on stable that are not yet merged + ([ref](https://github.com/pydata/xarray/pull/4440)): + ```sh + git merge upstream stable + ``` 2. Add a list of contributors with: ```sh git log "$(git tag --sort="v:refname" | sed -n 'x;$p').." --format=%aN | sort -u | perl -pe 's/\n/$1, /' @@ -81,12 +87,12 @@ upstream https://github.com/pydata/xarray (push) ``` 15. Update the stable branch (used by ReadTheDocs) and switch back to master: ```sh - git checkout stable + git switch stable git rebase master git push --force upstream stable - git checkout master + git switch master ``` - It's OK to force push to 'stable' if necessary. (We also update the stable + It's OK to force push to `stable` if necessary. (We also update the stable branch with `git cherry-pick` for documentation only fixes that apply the current released version.) 16. Add a section for the next release {0.X.Y+1} to doc/whats-new.rst: diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 9049db5d602..9ea222954f4 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1,6 +1,5 @@ import os.path import warnings -from collections.abc import MutableMapping from glob import glob from io import BytesIO from numbers import Number @@ -41,6 +40,17 @@ DATAARRAY_NAME = "__xarray_dataarray_name__" DATAARRAY_VARIABLE = "__xarray_dataarray_variable__" +ENGINES = { + "netcdf4": backends.NetCDF4DataStore.open, + "scipy": backends.ScipyDataStore, + "pydap": backends.PydapDataStore.open, + "h5netcdf": backends.H5NetCDFStore.open, + "pynio": backends.NioDataStore, + "pseudonetcdf": backends.PseudoNetCDFDataStore.open, + "cfgrib": backends.CfGribDataStore, + "zarr": backends.ZarrStore.open_group, +} + def _get_default_engine_remote_uri(): try: @@ -153,6 +163,17 @@ def _get_default_engine(path, allow_remote=False): return engine +def _get_backend_cls(engine): + """Select open_dataset method based on current engine""" + try: + return ENGINES[engine] + except KeyError: + raise ValueError( + "unrecognized engine for open_dataset: {}\n" + "must be one of: {}".format(engine, list(ENGINES)) + ) + + def _normalize_path(path): if is_remote_uri(path): return path @@ -407,23 +428,6 @@ def open_dataset( -------- open_mfdataset """ - engines = [ - None, - "netcdf4", - "scipy", - "pydap", - "h5netcdf", - "pynio", - "cfgrib", - "pseudonetcdf", - "zarr", - ] - if engine not in engines: - raise ValueError( - "unrecognized engine for open_dataset: {}\n" - "must be one of: {}".format(engine, engines) - ) - if autoclose is not None: warnings.warn( "The autoclose argument is no longer used by " @@ -450,6 +454,7 @@ def open_dataset( if backend_kwargs is None: backend_kwargs = {} + extra_kwargs = {} def maybe_decode_store(store, chunks, lock=False): ds = conventions.decode_cf( @@ -532,68 +537,35 @@ def maybe_decode_store(store, chunks, lock=False): if isinstance(filename_or_obj, AbstractDataStore): store = filename_or_obj + else: + if isinstance(filename_or_obj, str): + filename_or_obj = _normalize_path(filename_or_obj) - elif isinstance(filename_or_obj, MutableMapping) and engine == "zarr": - # Zarr supports a wide range of access modes, but for now xarray either - # reads or writes from a store, never both. - # For open_dataset(engine="zarr"), we only read (i.e. mode="r") - mode = "r" - _backend_kwargs = backend_kwargs.copy() - overwrite_encoded_chunks = _backend_kwargs.pop("overwrite_encoded_chunks", None) - store = backends.ZarrStore.open_group( - filename_or_obj, mode=mode, group=group, **_backend_kwargs - ) - - elif isinstance(filename_or_obj, str): - filename_or_obj = _normalize_path(filename_or_obj) + if engine is None: + engine = _get_default_engine(filename_or_obj, allow_remote=True) + elif engine != "zarr": + if engine not in [None, "scipy", "h5netcdf"]: + raise ValueError( + "can only read bytes or file-like objects " + "with engine='scipy' or 'h5netcdf'" + ) + engine = _get_engine_from_magic_number(filename_or_obj) - if engine is None: - engine = _get_default_engine(filename_or_obj, allow_remote=True) - if engine == "netcdf4": - store = backends.NetCDF4DataStore.open( - filename_or_obj, group=group, lock=lock, **backend_kwargs - ) - elif engine == "scipy": - store = backends.ScipyDataStore(filename_or_obj, **backend_kwargs) - elif engine == "pydap": - store = backends.PydapDataStore.open(filename_or_obj, **backend_kwargs) - elif engine == "h5netcdf": - store = backends.H5NetCDFStore.open( - filename_or_obj, group=group, lock=lock, **backend_kwargs - ) - elif engine == "pynio": - store = backends.NioDataStore(filename_or_obj, lock=lock, **backend_kwargs) - elif engine == "pseudonetcdf": - store = backends.PseudoNetCDFDataStore.open( - filename_or_obj, lock=lock, **backend_kwargs - ) - elif engine == "cfgrib": - store = backends.CfGribDataStore( - filename_or_obj, lock=lock, **backend_kwargs - ) + if engine in ["netcdf4", "h5netcdf"]: + extra_kwargs["group"] = group + extra_kwargs["lock"] = lock + elif engine in ["pynio", "pseudonetcdf", "cfgrib"]: + extra_kwargs["lock"] = lock elif engine == "zarr": - # on ZarrStore, mode='r', synchronizer=None, group=None, - # consolidated=False. - _backend_kwargs = backend_kwargs.copy() - overwrite_encoded_chunks = _backend_kwargs.pop( + backend_kwargs = backend_kwargs.copy() + overwrite_encoded_chunks = backend_kwargs.pop( "overwrite_encoded_chunks", None ) - store = backends.ZarrStore.open_group( - filename_or_obj, group=group, **_backend_kwargs - ) - else: - if engine not in [None, "scipy", "h5netcdf"]: - raise ValueError( - "can only read bytes or file-like objects " - "with engine='scipy' or 'h5netcdf'" - ) - engine = _get_engine_from_magic_number(filename_or_obj) - if engine == "scipy": - store = backends.ScipyDataStore(filename_or_obj, **backend_kwargs) - elif engine == "h5netcdf": - store = backends.H5NetCDFStore.open( - filename_or_obj, group=group, lock=lock, **backend_kwargs - ) + extra_kwargs["mode"] = "r" + extra_kwargs["group"] = group + + opener = _get_backend_cls(engine) + store = opener(filename_or_obj, **extra_kwargs, **backend_kwargs) with close_on_error(store): ds = maybe_decode_store(store, chunks) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 2651f3148fd..260d27fbabe 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -7,7 +7,6 @@ from ..core.pycompat import integer_types from ..core.utils import FrozenDict, HiddenKeyDict from ..core.variable import Variable -from .api import open_dataset from .common import AbstractWritableDataStore, BackendArray, _encode_variable_name # need some special secret attributes to tell us the dimensions @@ -647,6 +646,7 @@ def open_zarr( ---------- http://zarr.readthedocs.io/ """ + from .api import open_dataset if kwargs: raise TypeError( diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index f9cc802f2c8..c9030e31a9e 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2224,7 +2224,7 @@ def test_engine(self): open_dataset(tmp_file, engine="foobar") netcdf_bytes = data.to_netcdf() - with raises_regex(ValueError, "unrecognized engine"): + with raises_regex(ValueError, "can only read bytes or file-like"): open_dataset(BytesIO(netcdf_bytes), engine="foobar") def test_cross_engine_read_write_netcdf3(self):