Skip to content

Add missing_dims argument allowing isel() to ignore missing dimensions #3923

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Apr 3, 2020
5 changes: 5 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ New Features
:py:func:`combine_by_coords` and :py:func:`combine_nested` using
combine_attrs keyword argument. (:issue:`3865`, :pull:`3877`)
By `John Omotani <https://github.com/johnomotani>`_
- 'missing_dims' argument to :py:meth:`Dataset.isel`,
`:py:meth:`DataArray.isel` and :py:meth:`Variable.isel` to allow replacing
the exception when a dimension passed to ``isel`` is not present with a
warning, or just ignore the dimension. (:issue:`3866`, :pull:`3923`)
By `John Omotani <https://github.com/johnomotani>`_
- Limited the length of array items with long string reprs to a
reasonable width (:pull:`3900`)
By `Maximilian Roos <https://github.com/max-sixty>`_
Expand Down
30 changes: 28 additions & 2 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1007,25 +1007,51 @@ def isel(
self,
indexers: Mapping[Hashable, Any] = None,
drop: bool = False,
missing_dims: str = "raise",
**indexers_kwargs: Any,
) -> "DataArray":
"""Return a new DataArray whose data is given by integer indexing
along the specified dimension(s).

Parameters
----------
indexers : dict, optional
A dict with keys matching dimensions and values given
by integers, slice objects or arrays.
indexer can be a integer, slice, array-like or DataArray.
If DataArrays are passed as indexers, xarray-style indexing will be
carried out. See :ref:`indexing` for the details.
One of indexers or indexers_kwargs must be provided.
drop : bool, optional
If ``drop=True``, drop coordinates variables indexed by integers
instead of making them scalar.
missing_dims : {"raise", "warn", "ignore"}, default "raise"
What to do if dimensions that should be selected from are not present in the
DataArray:
- "exception": raise an exception
- "warning": raise a warning, and ignore the missing dimensions
- "ignore": ignore the missing dimensions
**indexers_kwargs : {dim: indexer, ...}, optional
The keyword arguments form of ``indexers``.

See Also
--------
Dataset.isel
DataArray.sel
"""

indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel")

if any(is_fancy_indexer(idx) for idx in indexers.values()):
ds = self._to_temp_dataset()._isel_fancy(indexers, drop=drop)
ds = self._to_temp_dataset()._isel_fancy(
indexers, drop=drop, missing_dims=missing_dims
)
return self._from_temp_dataset(ds)

# Much faster algorithm for when all indexers are ints, slices, one-dimensional
# lists, or zero or one-dimensional np.ndarray's

variable = self._variable.isel(indexers)
variable = self._variable.isel(indexers, missing_dims=missing_dims)

coords = {}
for coord_name, coord_value in self._coords.items():
Expand Down
30 changes: 20 additions & 10 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@
_check_inplace,
_default,
decode_numpy_dict_values,
drop_dims_from_indexers,
either_dict_or_kwargs,
hashable,
infix_dims,
Expand Down Expand Up @@ -1767,7 +1768,7 @@ def maybe_chunk(name, var, chunks):
return self._replace(variables)

def _validate_indexers(
self, indexers: Mapping[Hashable, Any]
self, indexers: Mapping[Hashable, Any], missing_dims: str = "raise",
) -> Iterator[Tuple[Hashable, Union[int, slice, np.ndarray, Variable]]]:
""" Here we make sure
+ indexer has a valid keys
Expand All @@ -1777,9 +1778,7 @@ def _validate_indexers(
"""
from .dataarray import DataArray

invalid = indexers.keys() - self.dims.keys()
if invalid:
raise ValueError("dimensions %r do not exist" % invalid)
indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims)

# all indexers should be int, slice, np.ndarrays, or Variable
for k, v in indexers.items():
Expand Down Expand Up @@ -1875,6 +1874,7 @@ def isel(
self,
indexers: Mapping[Hashable, Any] = None,
drop: bool = False,
missing_dims: str = "raise",
**indexers_kwargs: Any,
) -> "Dataset":
"""Returns a new dataset with each array indexed along the specified
Expand All @@ -1896,6 +1896,12 @@ def isel(
drop : bool, optional
If ``drop=True``, drop coordinates variables indexed by integers
instead of making them scalar.
missing_dims : {"raise", "warn", "ignore"}, default "raise"
What to do if dimensions that should be selected from are not present in the
Dataset:
- "exception": raise an exception
- "warning": raise a warning, and ignore the missing dimensions
- "ignore": ignore the missing dimensions
**indexers_kwargs : {dim: indexer, ...}, optional
The keyword arguments form of ``indexers``.
One of indexers or indexers_kwargs must be provided.
Expand All @@ -1918,13 +1924,11 @@ def isel(
"""
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel")
if any(is_fancy_indexer(idx) for idx in indexers.values()):
return self._isel_fancy(indexers, drop=drop)
return self._isel_fancy(indexers, drop=drop, missing_dims=missing_dims)

# Much faster algorithm for when all indexers are ints, slices, one-dimensional
# lists, or zero or one-dimensional np.ndarray's
invalid = indexers.keys() - self.dims.keys()
if invalid:
raise ValueError("dimensions %r do not exist" % invalid)
indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims)

variables = {}
dims: Dict[Hashable, Tuple[int, ...]] = {}
Expand Down Expand Up @@ -1958,10 +1962,16 @@ def isel(
file_obj=self._file_obj,
)

def _isel_fancy(self, indexers: Mapping[Hashable, Any], *, drop: bool) -> "Dataset":
def _isel_fancy(
self,
indexers: Mapping[Hashable, Any],
*,
drop: bool,
missing_dims: str = "raise",
) -> "Dataset":
# Note: we need to preserve the original indexers variable in order to merge the
# coords below
indexers_list = list(self._validate_indexers(indexers))
indexers_list = list(self._validate_indexers(indexers, missing_dims))

variables: Dict[Hashable, Variable] = {}
indexes: Dict[Hashable, pd.Index] = {}
Expand Down
49 changes: 49 additions & 0 deletions xarray/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
Sequence,
Tuple,
TypeVar,
Union,
cast,
)

Expand Down Expand Up @@ -738,6 +739,54 @@ def get_temp_dimname(dims: Container[Hashable], new_dim: Hashable) -> Hashable:
return new_dim


def drop_dims_from_indexers(
indexers: Mapping[Hashable, Any],
dims: Union[list, Mapping[Hashable, int]],
missing_dims: str,
) -> Mapping[Hashable, Any]:
""" Depending on the setting of missing_dims, drop any dimensions from indexers that
are not present in dims.

Parameters
----------
indexers : dict
dims : sequence
missing_dims : {"raise", "warn", "ignore"}
"""

if missing_dims == "raise":
invalid = indexers.keys() - set(dims)
if invalid:
raise ValueError(
f"dimensions {invalid} do not exist. Expected one or more of {dims}"
)

return indexers

elif missing_dims == "warn":

# don't modify input
indexers = dict(indexers)

invalid = indexers.keys() - set(dims)
if invalid:
warnings.warn(
f"dimensions {invalid} do not exist. Expected one or more of {dims}"
)
for key in invalid:
indexers.pop(key)

return indexers

elif missing_dims == "ignore":
return {key: val for key, val in indexers.items() if key in dims}

else:
raise ValueError(
f"Unrecognised option {missing_dims} for missing_dims argument"
)


# Singleton type, as per https://github.com/python/typing/pull/240
class Default(Enum):
token = 0
Expand Down
14 changes: 9 additions & 5 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
OrderedSet,
_default,
decode_numpy_dict_values,
drop_dims_from_indexers,
either_dict_or_kwargs,
ensure_us_time_resolution,
infix_dims,
Expand Down Expand Up @@ -1030,6 +1031,7 @@ def _to_dense(self):
def isel(
self: VariableType,
indexers: Mapping[Hashable, Any] = None,
missing_dims: str = "raise",
**indexers_kwargs: Any,
) -> VariableType:
"""Return a new array indexed along the specified dimension(s).
Expand All @@ -1039,6 +1041,12 @@ def isel(
**indexers : {dim: indexer, ...}
Keyword arguments with names matching dimensions and values given
by integers, slice objects or arrays.
missing_dims : {"raise", "warn", "ignore"}, default "raise"
What to do if dimensions that should be selected from are not present in the
DataArray:
- "exception": raise an exception
- "warning": raise a warning, and ignore the missing dimensions
- "ignore": ignore the missing dimensions

Returns
-------
Expand All @@ -1050,11 +1058,7 @@ def isel(
"""
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel")

invalid = indexers.keys() - set(self.dims)
if invalid:
raise ValueError(
f"dimensions {invalid} do not exist. Expected one or more of {self.dims}"
)
indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims)

key = tuple(indexers.get(dim, slice(None)) for dim in self.dims)
return self[key]
Expand Down
13 changes: 13 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,19 @@ def test_isel(self):
assert_identical(self.dv, self.dv.isel(x=slice(None)))
assert_identical(self.dv[:3], self.dv.isel(x=slice(3)))
assert_identical(self.dv[:3, :5], self.dv.isel(x=slice(3), y=slice(5)))
with raises_regex(
ValueError,
r"dimensions {'not_a_dim'} do not exist. Expected "
r"one or more of \('x', 'y'\)",
):
self.dv.isel(not_a_dim=0)
with pytest.warns(
UserWarning,
match=r"dimensions {'not_a_dim'} do not exist. "
r"Expected one or more of \('x', 'y'\)",
):
self.dv.isel(not_a_dim=0, missing_dims="warn")
assert_identical(self.dv, self.dv.isel(not_a_dim=0, missing_dims="ignore"))

def test_isel_types(self):
# regression test for #1405
Expand Down
15 changes: 15 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1023,6 +1023,21 @@ def test_isel(self):

with pytest.raises(ValueError):
data.isel(not_a_dim=slice(0, 2))
with raises_regex(
ValueError,
r"dimensions {'not_a_dim'} do not exist. Expected "
r"one or more of "
r"[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*",
):
data.isel(not_a_dim=slice(0, 2))
with pytest.warns(
UserWarning,
match=r"dimensions {'not_a_dim'} do not exist. "
r"Expected one or more of "
r"[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*",
):
data.isel(not_a_dim=slice(0, 2), missing_dims="warn")
assert_identical(data, data.isel(not_a_dim=slice(0, 2), missing_dims="ignore"))

ret = data.isel(dim1=0)
assert {"time": 20, "dim2": 9, "dim3": 10} == ret.dims
Expand Down
13 changes: 12 additions & 1 deletion xarray/tests/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1254,8 +1254,19 @@ def test_isel(self):
assert_identical(v.isel(x=0), v[:, 0])
assert_identical(v.isel(x=[0, 2]), v[:, [0, 2]])
assert_identical(v.isel(time=[]), v[[]])
with raises_regex(ValueError, "do not exist"):
with raises_regex(
ValueError,
r"dimensions {'not_a_dim'} do not exist. Expected one or more of "
r"\('time', 'x'\)",
):
v.isel(not_a_dim=0)
with pytest.warns(
UserWarning,
match=r"dimensions {'not_a_dim'} do not exist. Expected one or more of "
r"\('time', 'x'\)",
):
v.isel(not_a_dim=0, missing_dims="warn")
assert_identical(v, v.isel(not_a_dim=0, missing_dims="ignore"))

def test_index_0d_numpy_string(self):
# regression test to verify our work around for indexing 0d strings
Expand Down