Skip to content

Refactor of the big if-chain to a dictionary in the form {backend_name: backend_open}. #4431

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions HOW_TO_RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,14 @@ upstream https://github.com/pydata/xarray (push)

1. Ensure your master branch is synced to upstream:
```sh
git switch master
git pull upstream master
```
2. Confirm there are no commits on stable that are not yet merged
([ref](https://github.com/pydata/xarray/pull/4440)):
```sh
git merge upstream stable
```
2. Add a list of contributors with:
```sh
git log "$(git tag --sort="v:refname" | sed -n 'x;$p').." --format=%aN | sort -u | perl -pe 's/\n/$1, /'
Expand Down Expand Up @@ -81,12 +87,12 @@ upstream https://github.com/pydata/xarray (push)
```
15. Update the stable branch (used by ReadTheDocs) and switch back to master:
```sh
git checkout stable
git switch stable
git rebase master
git push --force upstream stable
git checkout master
git switch master
```
It's OK to force push to 'stable' if necessary. (We also update the stable
It's OK to force push to `stable` if necessary. (We also update the stable
branch with `git cherry-pick` for documentation only fixes that apply the
current released version.)
16. Add a section for the next release {0.X.Y+1} to doc/whats-new.rst:
Expand Down
122 changes: 47 additions & 75 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os.path
import warnings
from collections.abc import MutableMapping
from glob import glob
from io import BytesIO
from numbers import Number
Expand Down Expand Up @@ -41,6 +40,17 @@
DATAARRAY_NAME = "__xarray_dataarray_name__"
DATAARRAY_VARIABLE = "__xarray_dataarray_variable__"

ENGINES = {
"netcdf4": backends.NetCDF4DataStore.open,
"scipy": backends.ScipyDataStore,
"pydap": backends.PydapDataStore.open,
"h5netcdf": backends.H5NetCDFStore.open,
"pynio": backends.NioDataStore,
"pseudonetcdf": backends.PseudoNetCDFDataStore.open,
"cfgrib": backends.CfGribDataStore,
"zarr": backends.ZarrStore.open_group,
}


def _get_default_engine_remote_uri():
try:
Expand Down Expand Up @@ -153,6 +163,17 @@ def _get_default_engine(path, allow_remote=False):
return engine


def _get_backend_cls(engine):
"""Select open_dataset method based on current engine"""
try:
return ENGINES[engine]
except KeyError:
raise ValueError(
"unrecognized engine for open_dataset: {}\n"
"must be one of: {}".format(engine, list(ENGINES))
)


def _normalize_path(path):
if is_remote_uri(path):
return path
Expand Down Expand Up @@ -407,23 +428,6 @@ def open_dataset(
--------
open_mfdataset
"""
engines = [
None,
"netcdf4",
"scipy",
"pydap",
"h5netcdf",
"pynio",
"cfgrib",
"pseudonetcdf",
"zarr",
]
if engine not in engines:
raise ValueError(
"unrecognized engine for open_dataset: {}\n"
"must be one of: {}".format(engine, engines)
)

if autoclose is not None:
warnings.warn(
"The autoclose argument is no longer used by "
Expand All @@ -450,6 +454,7 @@ def open_dataset(

if backend_kwargs is None:
backend_kwargs = {}
extra_kwargs = {}

def maybe_decode_store(store, chunks, lock=False):
ds = conventions.decode_cf(
Expand Down Expand Up @@ -532,68 +537,35 @@ def maybe_decode_store(store, chunks, lock=False):

if isinstance(filename_or_obj, AbstractDataStore):
store = filename_or_obj
else:
if isinstance(filename_or_obj, str):
filename_or_obj = _normalize_path(filename_or_obj)

elif isinstance(filename_or_obj, MutableMapping) and engine == "zarr":
# Zarr supports a wide range of access modes, but for now xarray either
# reads or writes from a store, never both.
# For open_dataset(engine="zarr"), we only read (i.e. mode="r")
mode = "r"
_backend_kwargs = backend_kwargs.copy()
overwrite_encoded_chunks = _backend_kwargs.pop("overwrite_encoded_chunks", None)
store = backends.ZarrStore.open_group(
filename_or_obj, mode=mode, group=group, **_backend_kwargs
)

elif isinstance(filename_or_obj, str):
filename_or_obj = _normalize_path(filename_or_obj)
if engine is None:
engine = _get_default_engine(filename_or_obj, allow_remote=True)
elif engine != "zarr":
if engine not in [None, "scipy", "h5netcdf"]:
raise ValueError(
"can only read bytes or file-like objects "
"with engine='scipy' or 'h5netcdf'"
)
engine = _get_engine_from_magic_number(filename_or_obj)

if engine is None:
engine = _get_default_engine(filename_or_obj, allow_remote=True)
if engine == "netcdf4":
store = backends.NetCDF4DataStore.open(
filename_or_obj, group=group, lock=lock, **backend_kwargs
)
elif engine == "scipy":
store = backends.ScipyDataStore(filename_or_obj, **backend_kwargs)
elif engine == "pydap":
store = backends.PydapDataStore.open(filename_or_obj, **backend_kwargs)
elif engine == "h5netcdf":
store = backends.H5NetCDFStore.open(
filename_or_obj, group=group, lock=lock, **backend_kwargs
)
elif engine == "pynio":
store = backends.NioDataStore(filename_or_obj, lock=lock, **backend_kwargs)
elif engine == "pseudonetcdf":
store = backends.PseudoNetCDFDataStore.open(
filename_or_obj, lock=lock, **backend_kwargs
)
elif engine == "cfgrib":
store = backends.CfGribDataStore(
filename_or_obj, lock=lock, **backend_kwargs
)
if engine in ["netcdf4", "h5netcdf"]:
extra_kwargs["group"] = group
extra_kwargs["lock"] = lock
elif engine in ["pynio", "pseudonetcdf", "cfgrib"]:
extra_kwargs["lock"] = lock
elif engine == "zarr":
# on ZarrStore, mode='r', synchronizer=None, group=None,
# consolidated=False.
_backend_kwargs = backend_kwargs.copy()
overwrite_encoded_chunks = _backend_kwargs.pop(
backend_kwargs = backend_kwargs.copy()
overwrite_encoded_chunks = backend_kwargs.pop(
"overwrite_encoded_chunks", None
)
store = backends.ZarrStore.open_group(
filename_or_obj, group=group, **_backend_kwargs
)
else:
if engine not in [None, "scipy", "h5netcdf"]:
raise ValueError(
"can only read bytes or file-like objects "
"with engine='scipy' or 'h5netcdf'"
)
engine = _get_engine_from_magic_number(filename_or_obj)
if engine == "scipy":
store = backends.ScipyDataStore(filename_or_obj, **backend_kwargs)
elif engine == "h5netcdf":
store = backends.H5NetCDFStore.open(
filename_or_obj, group=group, lock=lock, **backend_kwargs
)
extra_kwargs["mode"] = "r"
extra_kwargs["group"] = group

opener = _get_backend_cls(engine)
store = opener(filename_or_obj, **extra_kwargs, **backend_kwargs)

with close_on_error(store):
ds = maybe_decode_store(store, chunks)
Expand Down
2 changes: 1 addition & 1 deletion xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from ..core.pycompat import integer_types
from ..core.utils import FrozenDict, HiddenKeyDict
from ..core.variable import Variable
from .api import open_dataset
from .common import AbstractWritableDataStore, BackendArray, _encode_variable_name

# need some special secret attributes to tell us the dimensions
Expand Down Expand Up @@ -647,6 +646,7 @@ def open_zarr(
----------
http://zarr.readthedocs.io/
"""
from .api import open_dataset

if kwargs:
raise TypeError(
Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -2224,7 +2224,7 @@ def test_engine(self):
open_dataset(tmp_file, engine="foobar")

netcdf_bytes = data.to_netcdf()
with raises_regex(ValueError, "unrecognized engine"):
with raises_regex(ValueError, "can only read bytes or file-like"):
open_dataset(BytesIO(netcdf_bytes), engine="foobar")

def test_cross_engine_read_write_netcdf3(self):
Expand Down