diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b9c81ad3474..46a67f9ae4f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -35,6 +35,11 @@ New Features :py:func:`combine_by_coords` and :py:func:`combine_nested` using combine_attrs keyword argument. (:issue:`3865`, :pull:`3877`) By `John Omotani `_ +- 'missing_dims' argument to :py:meth:`Dataset.isel`, + `:py:meth:`DataArray.isel` and :py:meth:`Variable.isel` to allow replacing + the exception when a dimension passed to ``isel`` is not present with a + warning, or just ignore the dimension. (:issue:`3866`, :pull:`3923`) + By `John Omotani `_ - Limited the length of array items with long string reprs to a reasonable width (:pull:`3900`) By `Maximilian Roos `_ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b7e0333dcd9..63cba53b689 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1007,25 +1007,51 @@ def isel( self, indexers: Mapping[Hashable, Any] = None, drop: bool = False, + missing_dims: str = "raise", **indexers_kwargs: Any, ) -> "DataArray": """Return a new DataArray whose data is given by integer indexing along the specified dimension(s). + Parameters + ---------- + indexers : dict, optional + A dict with keys matching dimensions and values given + by integers, slice objects or arrays. + indexer can be a integer, slice, array-like or DataArray. + If DataArrays are passed as indexers, xarray-style indexing will be + carried out. See :ref:`indexing` for the details. + One of indexers or indexers_kwargs must be provided. + drop : bool, optional + If ``drop=True``, drop coordinates variables indexed by integers + instead of making them scalar. + missing_dims : {"raise", "warn", "ignore"}, default "raise" + What to do if dimensions that should be selected from are not present in the + DataArray: + - "exception": raise an exception + - "warning": raise a warning, and ignore the missing dimensions + - "ignore": ignore the missing dimensions + **indexers_kwargs : {dim: indexer, ...}, optional + The keyword arguments form of ``indexers``. + See Also -------- Dataset.isel DataArray.sel """ + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel") + if any(is_fancy_indexer(idx) for idx in indexers.values()): - ds = self._to_temp_dataset()._isel_fancy(indexers, drop=drop) + ds = self._to_temp_dataset()._isel_fancy( + indexers, drop=drop, missing_dims=missing_dims + ) return self._from_temp_dataset(ds) # Much faster algorithm for when all indexers are ints, slices, one-dimensional # lists, or zero or one-dimensional np.ndarray's - variable = self._variable.isel(indexers) + variable = self._variable.isel(indexers, missing_dims=missing_dims) coords = {} for coord_name, coord_value in self._coords.items(): diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index c515d781db1..97b3caf2b6e 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -87,6 +87,7 @@ _check_inplace, _default, decode_numpy_dict_values, + drop_dims_from_indexers, either_dict_or_kwargs, hashable, infix_dims, @@ -1767,7 +1768,7 @@ def maybe_chunk(name, var, chunks): return self._replace(variables) def _validate_indexers( - self, indexers: Mapping[Hashable, Any] + self, indexers: Mapping[Hashable, Any], missing_dims: str = "raise", ) -> Iterator[Tuple[Hashable, Union[int, slice, np.ndarray, Variable]]]: """ Here we make sure + indexer has a valid keys @@ -1777,9 +1778,7 @@ def _validate_indexers( """ from .dataarray import DataArray - invalid = indexers.keys() - self.dims.keys() - if invalid: - raise ValueError("dimensions %r do not exist" % invalid) + indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims) # all indexers should be int, slice, np.ndarrays, or Variable for k, v in indexers.items(): @@ -1875,6 +1874,7 @@ def isel( self, indexers: Mapping[Hashable, Any] = None, drop: bool = False, + missing_dims: str = "raise", **indexers_kwargs: Any, ) -> "Dataset": """Returns a new dataset with each array indexed along the specified @@ -1896,6 +1896,12 @@ def isel( drop : bool, optional If ``drop=True``, drop coordinates variables indexed by integers instead of making them scalar. + missing_dims : {"raise", "warn", "ignore"}, default "raise" + What to do if dimensions that should be selected from are not present in the + Dataset: + - "exception": raise an exception + - "warning": raise a warning, and ignore the missing dimensions + - "ignore": ignore the missing dimensions **indexers_kwargs : {dim: indexer, ...}, optional The keyword arguments form of ``indexers``. One of indexers or indexers_kwargs must be provided. @@ -1918,13 +1924,11 @@ def isel( """ indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel") if any(is_fancy_indexer(idx) for idx in indexers.values()): - return self._isel_fancy(indexers, drop=drop) + return self._isel_fancy(indexers, drop=drop, missing_dims=missing_dims) # Much faster algorithm for when all indexers are ints, slices, one-dimensional # lists, or zero or one-dimensional np.ndarray's - invalid = indexers.keys() - self.dims.keys() - if invalid: - raise ValueError("dimensions %r do not exist" % invalid) + indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims) variables = {} dims: Dict[Hashable, Tuple[int, ...]] = {} @@ -1958,10 +1962,16 @@ def isel( file_obj=self._file_obj, ) - def _isel_fancy(self, indexers: Mapping[Hashable, Any], *, drop: bool) -> "Dataset": + def _isel_fancy( + self, + indexers: Mapping[Hashable, Any], + *, + drop: bool, + missing_dims: str = "raise", + ) -> "Dataset": # Note: we need to preserve the original indexers variable in order to merge the # coords below - indexers_list = list(self._validate_indexers(indexers)) + indexers_list = list(self._validate_indexers(indexers, missing_dims)) variables: Dict[Hashable, Variable] = {} indexes: Dict[Hashable, pd.Index] = {} diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 896ee31ab5c..1126cf3037f 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -24,6 +24,7 @@ Sequence, Tuple, TypeVar, + Union, cast, ) @@ -738,6 +739,54 @@ def get_temp_dimname(dims: Container[Hashable], new_dim: Hashable) -> Hashable: return new_dim +def drop_dims_from_indexers( + indexers: Mapping[Hashable, Any], + dims: Union[list, Mapping[Hashable, int]], + missing_dims: str, +) -> Mapping[Hashable, Any]: + """ Depending on the setting of missing_dims, drop any dimensions from indexers that + are not present in dims. + + Parameters + ---------- + indexers : dict + dims : sequence + missing_dims : {"raise", "warn", "ignore"} + """ + + if missing_dims == "raise": + invalid = indexers.keys() - set(dims) + if invalid: + raise ValueError( + f"dimensions {invalid} do not exist. Expected one or more of {dims}" + ) + + return indexers + + elif missing_dims == "warn": + + # don't modify input + indexers = dict(indexers) + + invalid = indexers.keys() - set(dims) + if invalid: + warnings.warn( + f"dimensions {invalid} do not exist. Expected one or more of {dims}" + ) + for key in invalid: + indexers.pop(key) + + return indexers + + elif missing_dims == "ignore": + return {key: val for key, val in indexers.items() if key in dims} + + else: + raise ValueError( + f"Unrecognised option {missing_dims} for missing_dims argument" + ) + + # Singleton type, as per https://github.com/python/typing/pull/240 class Default(Enum): token = 0 diff --git a/xarray/core/variable.py b/xarray/core/variable.py index c9addeefb04..68e823ca426 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -28,6 +28,7 @@ OrderedSet, _default, decode_numpy_dict_values, + drop_dims_from_indexers, either_dict_or_kwargs, ensure_us_time_resolution, infix_dims, @@ -1030,6 +1031,7 @@ def _to_dense(self): def isel( self: VariableType, indexers: Mapping[Hashable, Any] = None, + missing_dims: str = "raise", **indexers_kwargs: Any, ) -> VariableType: """Return a new array indexed along the specified dimension(s). @@ -1039,6 +1041,12 @@ def isel( **indexers : {dim: indexer, ...} Keyword arguments with names matching dimensions and values given by integers, slice objects or arrays. + missing_dims : {"raise", "warn", "ignore"}, default "raise" + What to do if dimensions that should be selected from are not present in the + DataArray: + - "exception": raise an exception + - "warning": raise a warning, and ignore the missing dimensions + - "ignore": ignore the missing dimensions Returns ------- @@ -1050,11 +1058,7 @@ def isel( """ indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel") - invalid = indexers.keys() - set(self.dims) - if invalid: - raise ValueError( - f"dimensions {invalid} do not exist. Expected one or more of {self.dims}" - ) + indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims) key = tuple(indexers.get(dim, slice(None)) for dim in self.dims) return self[key] diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index ced72d1bc06..cf31182ed30 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -781,6 +781,19 @@ def test_isel(self): assert_identical(self.dv, self.dv.isel(x=slice(None))) assert_identical(self.dv[:3], self.dv.isel(x=slice(3))) assert_identical(self.dv[:3, :5], self.dv.isel(x=slice(3), y=slice(5))) + with raises_regex( + ValueError, + r"dimensions {'not_a_dim'} do not exist. Expected " + r"one or more of \('x', 'y'\)", + ): + self.dv.isel(not_a_dim=0) + with pytest.warns( + UserWarning, + match=r"dimensions {'not_a_dim'} do not exist. " + r"Expected one or more of \('x', 'y'\)", + ): + self.dv.isel(not_a_dim=0, missing_dims="warn") + assert_identical(self.dv, self.dv.isel(not_a_dim=0, missing_dims="ignore")) def test_isel_types(self): # regression test for #1405 diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 237c315583c..a1cb7361e77 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1023,6 +1023,21 @@ def test_isel(self): with pytest.raises(ValueError): data.isel(not_a_dim=slice(0, 2)) + with raises_regex( + ValueError, + r"dimensions {'not_a_dim'} do not exist. Expected " + r"one or more of " + r"[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*", + ): + data.isel(not_a_dim=slice(0, 2)) + with pytest.warns( + UserWarning, + match=r"dimensions {'not_a_dim'} do not exist. " + r"Expected one or more of " + r"[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*", + ): + data.isel(not_a_dim=slice(0, 2), missing_dims="warn") + assert_identical(data, data.isel(not_a_dim=slice(0, 2), missing_dims="ignore")) ret = data.isel(dim1=0) assert {"time": 20, "dim2": 9, "dim3": 10} == ret.dims diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 116466e112d..78e3848b8fb 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1254,8 +1254,19 @@ def test_isel(self): assert_identical(v.isel(x=0), v[:, 0]) assert_identical(v.isel(x=[0, 2]), v[:, [0, 2]]) assert_identical(v.isel(time=[]), v[[]]) - with raises_regex(ValueError, "do not exist"): + with raises_regex( + ValueError, + r"dimensions {'not_a_dim'} do not exist. Expected one or more of " + r"\('time', 'x'\)", + ): v.isel(not_a_dim=0) + with pytest.warns( + UserWarning, + match=r"dimensions {'not_a_dim'} do not exist. Expected one or more of " + r"\('time', 'x'\)", + ): + v.isel(not_a_dim=0, missing_dims="warn") + assert_identical(v, v.isel(not_a_dim=0, missing_dims="ignore")) def test_index_0d_numpy_string(self): # regression test to verify our work around for indexing 0d strings